From 518d18848163a5f69994da2e9fbbb9f1259ec53f Mon Sep 17 00:00:00 2001 From: Jaime Adroher Date: Fri, 26 Mar 2021 19:08:38 +0100 Subject: [PATCH] Fix issues on import joblib and pd.concat --- ... Analysis - XGBoost (Batch Transform) - Solution.ipynb | 6 +++--- ...B Sentiment Analysis - XGBoost (Batch Transform).ipynb | 2 +- ...sis - XGBoost (Hyperparameter Tuning) - Solution.ipynb | 8 ++++---- ...iment Analysis - XGBoost (Hyperparameter Tuning).ipynb | 8 ++++---- ...Analysis - XGBoost (Updating a Model) - Solution.ipynb | 6 +++--- ... Sentiment Analysis - XGBoost (Updating a Model).ipynb | 6 +++--- .../IMDB Sentiment Analysis - XGBoost - Web App.ipynb | 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform) - Solution.ipynb b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform) - Solution.ipynb index bfe49c599..bca97f852 100644 --- a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform) - Solution.ipynb +++ b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform) - Solution.ipynb @@ -274,7 +274,7 @@ "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.externals import joblib\n", + "import joblib\n", "# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n", "\n", "def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n", @@ -407,8 +407,8 @@ "# Make sure that the files you create are in the correct format.\n", "\n", "# Solution:\n", - "pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", - "pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" + "pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" ] }, { diff --git a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform).ipynb b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform).ipynb index af013db4f..929ab819c 100644 --- a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform).ipynb +++ b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Batch Transform).ipynb @@ -274,7 +274,7 @@ "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.externals import joblib\n", + "import joblib\n", "# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n", "\n", "def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n", diff --git a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning) - Solution.ipynb b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning) - Solution.ipynb index 0367943a2..067caa05c 100644 --- a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning) - Solution.ipynb +++ b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning) - Solution.ipynb @@ -274,7 +274,7 @@ "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.externals import joblib\n", + "import joblib\n", "# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n", "\n", "def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n", @@ -396,11 +396,11 @@ "# Solution:\n", "# The test data shouldn't contain the ground truth labels as they are what the model is\n", "# trying to predict. We will end up using them afterward to compare the predictions to.\n", - "# pd.concat([test_y, test_X], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", + "# pd.concat([pd.DataFrame(test_y), pd.DataFrame(test_X)], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", "pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", "\n", - "pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", - "pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" + "pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" ] }, { diff --git a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning).ipynb b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning).ipynb index 71f5306e5..c48b9f172 100644 --- a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning).ipynb +++ b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Hyperparameter Tuning).ipynb @@ -274,7 +274,7 @@ "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.externals import joblib\n", + "import joblib\n", "# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n", "\n", "def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n", @@ -393,9 +393,9 @@ "# First, save the test data to test.csv in the data_dir directory. Note that we do not save the associated ground truth\n", "# labels, instead we will use them later to compare with our model output.\n", "\n", - "pd.concat([test_y, test_X], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", - "pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", - "pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" + "pd.concat([pd.DataFrame(test_y), pd.DataFrame(test_X)], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" ] }, { diff --git a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model) - Solution.ipynb b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model) - Solution.ipynb index b3afc282b..27d23543a 100644 --- a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model) - Solution.ipynb +++ b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model) - Solution.ipynb @@ -285,7 +285,7 @@ "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.externals import joblib\n", + "import joblib\n", "# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n", "\n", "def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n", @@ -411,8 +411,8 @@ "source": [ "pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", "\n", - "pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", - "pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" + "pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" ] }, { diff --git a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model).ipynb b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model).ipynb index 9d37a6970..6f3965af6 100644 --- a/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model).ipynb +++ b/Mini-Projects/IMDB Sentiment Analysis - XGBoost (Updating a Model).ipynb @@ -285,7 +285,7 @@ "source": [ "import numpy as np\n", "from sklearn.feature_extraction.text import CountVectorizer\n", - "from sklearn.externals import joblib\n", + "import joblib\n", "# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n", "\n", "def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n", @@ -411,8 +411,8 @@ "source": [ "pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", "\n", - "pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", - "pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" + "pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" ] }, { diff --git a/Tutorials/IMDB Sentiment Analysis - XGBoost - Web App.ipynb b/Tutorials/IMDB Sentiment Analysis - XGBoost - Web App.ipynb index c34b371d6..bcd3fe36a 100644 --- a/Tutorials/IMDB Sentiment Analysis - XGBoost - Web App.ipynb +++ b/Tutorials/IMDB Sentiment Analysis - XGBoost - Web App.ipynb @@ -274,7 +274,7 @@ "from sklearn.feature_extraction.text import CountVectorizer\n", "\n", "# sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. \n", - "# from sklearn.externals import joblib\n", + "# import joblib\n", "\n", "# Import joblib package directly\n", "import joblib\n", @@ -402,8 +402,8 @@ "source": [ "pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n", "\n", - "pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", - "pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" + "pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n", + "pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)" ] }, {