From eed301a13189c7153021fc87f8cb24012f52982c Mon Sep 17 00:00:00 2001
From: southjohn64 <61773763+southjohn64@users.noreply.github.com>
Date: Tue, 9 Nov 2021 08:34:32 +0200
Subject: [PATCH] Created using Colaboratory
---
Copy_of_HW_3.ipynb | 478 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 415 insertions(+), 63 deletions(-)
diff --git a/Copy_of_HW_3.ipynb b/Copy_of_HW_3.ipynb
index d6427db..aacaee3 100644
--- a/Copy_of_HW_3.ipynb
+++ b/Copy_of_HW_3.ipynb
@@ -53,19 +53,19 @@
"base_uri": "https://localhost:8080/"
},
"id": "0UjnuJREuaeD",
- "outputId": "8810ccac-63d3-4307-866c-770bf1111221"
+ "outputId": "23d87730-0bed-479a-b356-e9c04f1732e9"
},
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
- "execution_count": 3,
+ "execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+ "Mounted at /content/drive\n"
]
}
]
@@ -77,7 +77,7 @@
"base_uri": "https://localhost:8080/"
},
"id": "i-H32MHYucGO",
- "outputId": "1016c944-52a3-4afb-ec2c-5a31a0a0ba82"
+ "outputId": "32a6d625-bff8-4fff-af2b-a5bf3efa73c4"
},
"source": [
"\n",
@@ -100,24 +100,23 @@
"!cp /content/drive/MyDrive/kaggle.json /root/.kaggle/kaggle.json\n",
"!chmod 600 /root/.kaggle/kaggle.json"
],
- "execution_count": 4,
+ "execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: kaggle in /usr/local/lib/python3.7/dist-packages (1.5.12)\n",
- "Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3)\n",
- "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0)\n",
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.7/dist-packages (from kaggle) (5.0.2)\n",
- "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2021.5.30)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0)\n",
+ "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0)\n",
+ "Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.2)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2021.5.30)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.62.3)\n",
- "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.7/dist-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (3.0.4)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)\n",
- "mkdir: cannot create directory ‘/root/.kaggle/’: File exists\n"
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)\n"
]
}
]
@@ -130,82 +129,100 @@
"height": 1000
},
"id": "meleE9A3udoQ",
- "outputId": "a5daa86c-47e9-4484-a73e-cbd7a57ca52f"
+ "outputId": "9c6bb5b6-9939-4307-8042-b62d07c7b84e"
},
"source": [
"!pip install turicreate"
],
- "execution_count": 4,
+ "execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "\u001b[K |████████████████████████████████| 86.4 MB 19 kB/s \n",
- "\u001b[?25hCollecting prettytable==0.7.2\n",
- " Downloading prettytable-0.7.2.zip (28 kB)\n",
- "Requirement already satisfied: requests>=2.9.1 in /usr/local/lib/python3.7/dist-packages (from turicreate) (2.23.0)\n",
+ "Collecting turicreate\n",
+ " Downloading turicreate-6.4.1-cp37-cp37m-manylinux1_x86_64.whl (92.0 MB)\n",
+ "\u001b[K |████████████████████████████████| 92.0 MB 13 kB/s \n",
+ "\u001b[?25hCollecting numba<0.51.0\n",
+ " Downloading numba-0.50.1-cp37-cp37m-manylinux2014_x86_64.whl (3.6 MB)\n",
+ "\u001b[K |████████████████████████████████| 3.6 MB 27.2 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: pandas>=0.23.2 in /usr/local/lib/python3.7/dist-packages (from turicreate) (1.1.5)\n",
+ "Collecting resampy==0.2.1\n",
+ " Downloading resampy-0.2.1.tar.gz (322 kB)\n",
+ "\u001b[K |████████████████████████████████| 322 kB 46.8 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from turicreate) (1.19.5)\n",
+ "Requirement already satisfied: decorator>=4.0.9 in /usr/local/lib/python3.7/dist-packages (from turicreate) (4.4.2)\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from turicreate) (1.15.0)\n",
+ "Collecting prettytable==0.7.2\n",
+ " Downloading prettytable-0.7.2.zip (28 kB)\n",
+ "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from turicreate) (1.4.1)\n",
+ "Collecting coremltools==3.3\n",
+ " Downloading coremltools-3.3-cp37-none-manylinux1_x86_64.whl (3.5 MB)\n",
+ "\u001b[K |████████████████████████████████| 3.5 MB 30.9 MB/s \n",
+ "\u001b[?25hCollecting tensorflow<2.1.0,>=2.0.0\n",
+ " Downloading tensorflow-2.0.4-cp37-cp37m-manylinux2010_x86_64.whl (86.4 MB)\n",
+ "\u001b[K |████████████████████████████████| 86.4 MB 58 kB/s \n",
+ "\u001b[?25hRequirement already satisfied: requests>=2.9.1 in /usr/local/lib/python3.7/dist-packages (from turicreate) (2.23.0)\n",
"Requirement already satisfied: pillow>=5.2.0 in /usr/local/lib/python3.7/dist-packages (from turicreate) (7.1.2)\n",
"Requirement already satisfied: protobuf>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from coremltools==3.3->turicreate) (3.17.3)\n",
"Collecting llvmlite<0.34,>=0.33.0.dev0\n",
" Downloading llvmlite-0.33.0-cp37-cp37m-manylinux1_x86_64.whl (18.3 MB)\n",
- "\u001b[K |████████████████████████████████| 18.3 MB 80 kB/s \n",
+ "\u001b[K |████████████████████████████████| 18.3 MB 85 kB/s \n",
"\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from numba<0.51.0->turicreate) (57.4.0)\n",
- "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.23.2->turicreate) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.23.2->turicreate) (2018.9)\n",
+ "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.23.2->turicreate) (2.8.2)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.9.1->turicreate) (2021.5.30)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.9.1->turicreate) (2.10)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.9.1->turicreate) (1.24.3)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.9.1->turicreate) (3.0.4)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.9.1->turicreate) (2.10)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.9.1->turicreate) (2021.5.30)\n",
- "Collecting tensorflow-estimator<2.1.0,>=2.0.0\n",
- " Downloading tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449 kB)\n",
- "\u001b[K |████████████████████████████████| 449 kB 60.2 MB/s \n",
- "\u001b[?25hRequirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.12.0)\n",
- "Collecting numpy\n",
- " Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)\n",
- "\u001b[K |████████████████████████████████| 20.1 MB 12.5 MB/s \n",
- "\u001b[?25hCollecting gast==0.2.2\n",
+ "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.8.1)\n",
+ "Collecting h5py<=2.10.0\n",
+ " Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)\n",
+ "\u001b[K |████████████████████████████████| 2.9 MB 21.1 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (1.1.0)\n",
+ "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.37.0)\n",
+ "Collecting gast==0.2.2\n",
" Downloading gast-0.2.2.tar.gz (10 kB)\n",
"Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.2.0)\n",
- "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.37.0)\n",
- "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (1.1.0)\n",
"Collecting tensorboard<2.1.0,>=2.0.0\n",
" Downloading tensorboard-2.0.2-py3-none-any.whl (3.8 MB)\n",
- "\u001b[K |████████████████████████████████| 3.8 MB 48.4 MB/s \n",
- "\u001b[?25hCollecting keras-applications>=1.0.8\n",
+ "\u001b[K |████████████████████████████████| 3.8 MB 53.2 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (1.12.1)\n",
+ "Collecting keras-applications>=1.0.8\n",
" Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)\n",
- "\u001b[K |████████████████████████████████| 50 kB 3.1 MB/s \n",
- "\u001b[?25hRequirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (3.3.0)\n",
- "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (1.12.1)\n",
+ "\u001b[K |████████████████████████████████| 50 kB 2.2 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.12.0)\n",
"Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (1.41.0)\n",
+ "Collecting numpy\n",
+ " Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)\n",
+ "\u001b[K |████████████████████████████████| 20.1 MB 79.7 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (3.3.0)\n",
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (1.1.2)\n",
- "Collecting h5py<=2.10.0\n",
- " Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)\n",
- "\u001b[K |████████████████████████████████| 2.9 MB 36.7 MB/s \n",
- "\u001b[?25hRequirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.1.0,>=2.0.0->turicreate) (0.8.1)\n",
+ "Collecting tensorflow-estimator<2.1.0,>=2.0.0\n",
+ " Downloading tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449 kB)\n",
+ "\u001b[K |████████████████████████████████| 449 kB 56.1 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (3.3.4)\n",
"Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (1.0.1)\n",
- "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (1.35.0)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (3.3.4)\n",
"Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (0.4.6)\n",
+ "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (1.35.0)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (0.2.8)\n",
- "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (4.7.2)\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (4.2.4)\n",
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (4.7.2)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (1.3.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (4.8.1)\n",
"Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (0.4.8)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (3.1.1)\n",
- "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (3.7.4.3)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (3.6.0)\n",
+ "Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard<2.1.0,>=2.0.0->tensorflow<2.1.0,>=2.0.0->turicreate) (3.7.4.3)\n",
"Building wheels for collected packages: prettytable, resampy, gast\n",
" Building wheel for prettytable (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for prettytable: filename=prettytable-0.7.2-py3-none-any.whl size=13714 sha256=50cffc338408dc0757faeb2b677f8d7631529cc7ce86ae046d70795b3aa9532b\n",
+ " Created wheel for prettytable: filename=prettytable-0.7.2-py3-none-any.whl size=13714 sha256=8294916be8356ddd111627ebdc9857f727adb0d600c32f2a3ec7de280b7e27cc\n",
" Stored in directory: /root/.cache/pip/wheels/b2/7f/f6/f180315b584f00445045ff1699b550fa895d09471337ce21c6\n",
" Building wheel for resampy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for resampy: filename=resampy-0.2.1-py3-none-any.whl size=320860 sha256=ed858e4dcabf713e78740b01b2bb508d960c8856a66b88064a5282ee36296243\n",
+ " Created wheel for resampy: filename=resampy-0.2.1-py3-none-any.whl size=320860 sha256=cbc914dd62f5a9bec6285300428fc6c9fb8bd082a743c10d374ca6ea93e4a3d4\n",
" Stored in directory: /root/.cache/pip/wheels/71/74/53/d5ceb7c5ee7a168c7d106041863e71ac3273f4a4677743a284\n",
" Building wheel for gast (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for gast: filename=gast-0.2.2-py3-none-any.whl size=7554 sha256=382c9ccab85c52f9f986689a81221133d60c27c126c5f35701c13830e03f87c4\n",
+ " Created wheel for gast: filename=gast-0.2.2-py3-none-any.whl size=7554 sha256=732d58765b60a5a85f6777d3a788a777b727964c31d324ace0c1b7fb8ba4af3e\n",
" Stored in directory: /root/.cache/pip/wheels/21/7f/02/420f32a803f7d0967b48dd823da3f558c5166991bfd204eef3\n",
"Successfully built prettytable resampy gast\n",
"Installing collected packages: numpy, llvmlite, h5py, tensorflow-estimator, tensorboard, numba, keras-applications, gast, tensorflow, resampy, prettytable, coremltools, turicreate\n",
@@ -311,7 +328,7 @@
"!unzip ./datasets/library-collection/*.zip -d ./datasets/library-collection\n",
"!ls ./datasets/library-collection"
],
- "execution_count": 5,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -345,7 +362,7 @@
"sf = tc.SFrame.read_csv(\"/content/datasets/library-collection/library-collection-inventory.csv\")\n",
"sf"
],
- "execution_count": 6,
+ "execution_count": null,
"outputs": [
{
"output_type": "display_data",
@@ -2007,7 +2024,7 @@
"sf['year'] = sf['PublicationYear'].apply(lambda s: get_year(s))\n",
"sf['year']"
],
- "execution_count": 7,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -2033,7 +2050,7 @@
"?sf.materialize\n",
"sf.materialize()"
],
- "execution_count": 8,
+ "execution_count": null,
"outputs": []
},
{
@@ -2044,7 +2061,7 @@
"source": [
"sf_gt_2017 = sf[sf['year'] >= 2017]\n"
],
- "execution_count": 9,
+ "execution_count": null,
"outputs": []
},
{
@@ -2067,7 +2084,7 @@
"#sf2 = sf2.unique() \n",
"sf2"
],
- "execution_count": 10,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -2229,7 +2246,7 @@
"sf2 = sf2.stack(\"subject_list\", new_column_name=\"subject\") \n",
"sf2['subject']"
],
- "execution_count": 11,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -2265,7 +2282,7 @@
" return sf_by_subject_most_Common\n",
"most_popular_book(sf2,'Mystery Fiction')"
],
- "execution_count": 12,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -2370,7 +2387,7 @@
"sf2_fiction = sf2_subject[sf2_subject.apply(lambda row: 'Fiction'.lower() in row['subject'].lower())]\n",
"#sf2_fiction.num_rows()"
],
- "execution_count": 13,
+ "execution_count": null,
"outputs": []
},
{
@@ -2387,7 +2404,7 @@
"sf2_fiction_sorted = g.sort('ItemCount', ascending=False )\n",
"sf2_fiction_sorted.print_rows(10)"
],
- "execution_count": 14,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -2421,7 +2438,7 @@
"source": [
"top10_subject = sf2_fiction_sorted[:10]['subject'] # list of the top 10 subjects\n"
],
- "execution_count": 15,
+ "execution_count": null,
"outputs": []
},
{
@@ -2434,7 +2451,7 @@
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
],
- "execution_count": 16,
+ "execution_count": null,
"outputs": []
},
{
@@ -2445,7 +2462,7 @@
"source": [
"sf2_fiction_top10 = sf2_subject[sf2_subject.apply(lambda row: row['subject'] in top10_subject)]"
],
- "execution_count": 17,
+ "execution_count": null,
"outputs": []
},
{
@@ -2464,13 +2481,114 @@
{
"cell_type": "code",
"metadata": {
- "id": "uNPGJ9CEm6p0"
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 364
+ },
+ "id": "uNPGJ9CEm6p0",
+ "outputId": "7ce4f21d-1dad-415c-80fe-422a57ad2c0e"
},
"source": [
"sf2_fiction_top10"
],
"execution_count": null,
- "outputs": []
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ " \n",
+ " subject \n",
+ " ItemCount \n",
+ " year \n",
+ " \n",
+ " \n",
+ " Romance fiction \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Historical fiction \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Fantasy fiction \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Fiction television programs ... \n",
+ " 2 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Thrillers Fiction \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Friendship Juvenile fiction ... \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Detective and mystery fiction ... \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Thrillers Fiction \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Fiction television programs ... \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ " \n",
+ " Friendship Juvenile fiction ... \n",
+ " 1 \n",
+ " 2017 \n",
+ " \n",
+ "
\n",
+ "[2345831 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n",
+ "
"
+ ],
+ "text/plain": [
+ "Columns:\n",
+ "\tsubject\tstr\n",
+ "\tItemCount\tint\n",
+ "\tyear\tint\n",
+ "\n",
+ "Rows: 2345831\n",
+ "\n",
+ "Data:\n",
+ "+-------------------------------+-----------+------+\n",
+ "| subject | ItemCount | year |\n",
+ "+-------------------------------+-----------+------+\n",
+ "| Romance fiction | 1 | 2017 |\n",
+ "| Historical fiction | 1 | 2017 |\n",
+ "| Fantasy fiction | 1 | 2017 |\n",
+ "| Fiction television programs | 2 | 2017 |\n",
+ "| Thrillers Fiction | 1 | 2017 |\n",
+ "| Friendship Juvenile fiction | 1 | 2017 |\n",
+ "| Detective and mystery fiction | 1 | 2017 |\n",
+ "| Thrillers Fiction | 1 | 2017 |\n",
+ "| Fiction television programs | 1 | 2017 |\n",
+ "| Friendship Juvenile fiction | 1 | 2017 |\n",
+ "+-------------------------------+-----------+------+\n",
+ "[2345831 rows x 3 columns]\n",
+ "Note: Only the head of the SFrame is printed.\n",
+ "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns."
+ ]
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ]
},
{
"cell_type": "code",
@@ -2577,10 +2695,244 @@
"id": "HNtgDQZE0P2y"
},
"source": [
- ""
+ "!mkdir ./datasets\n",
+ "!mkdir ./datasets/sjr/\n",
+ "!wget -O ./datasets/sjr/sjr2018.csv https://www.scimagojr.com/journalrank.php?out=xls"
],
"execution_count": null,
"outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "EXUyH7-bSZEu"
+ },
+ "source": [
+ "import turicreate as tc\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "%matplotlib inline"
+ ],
+ "execution_count": 10,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 204
+ },
+ "id": "uJOHuNfwSKJk",
+ "outputId": "8f0cc685-67a7-4856-aac0-b684eb318930"
+ },
+ "source": [
+ "sf = tc.SFrame.read_csv(\"./datasets/sjr/sjr2018.csv\", delimiter=\";\")\n"
+ ],
+ "execution_count": 49,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "Finished parsing file /content/datasets/sjr/sjr2018.csv "
+ ],
+ "text/plain": [
+ "Finished parsing file /content/datasets/sjr/sjr2018.csv"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "Parsing completed. Parsed 100 lines in 0.449465 secs. "
+ ],
+ "text/plain": [
+ "Parsing completed. Parsed 100 lines in 0.449465 secs."
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "------------------------------------------------------\n",
+ "Inferred types from first 100 line(s) of file as \n",
+ "column_type_hints=[int,int,str,str,str,str,str,int,int,int,int,int,int,str,str,str,str,str,str,str]\n",
+ "If parsing fails due to incorrect types, you can correct\n",
+ "the inferred type list above and pass it to read_csv in\n",
+ "the column_type_hints argument\n",
+ "------------------------------------------------------\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "Finished parsing file /content/datasets/sjr/sjr2018.csv "
+ ],
+ "text/plain": [
+ "Finished parsing file /content/datasets/sjr/sjr2018.csv"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "Parsing completed. Parsed 32952 lines in 0.26651 secs. "
+ ],
+ "text/plain": [
+ "Parsing completed. Parsed 32952 lines in 0.26651 secs."
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "iAwb1livSM_z"
+ },
+ "source": [
+ "df_sjr =sf.to_dataframe()\n",
+ "df_sjr.drop(df_sjr[df_sjr['SJR Best Quartile'] =='-'].index, inplace=True)\n"
+ ],
+ "execution_count": 50,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 386
+ },
+ "id": "DDiqnPltSO7B",
+ "outputId": "01d31872-3276-4179-aba8-f8b1b761c620"
+ },
+ "source": [
+ "sns.displot(df_sjr, x=\"SJR Best Quartile\", y='H index')\n"
+ ],
+ "execution_count": 51,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 51
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "R2xo7M4LUsl1",
+ "outputId": "f83f841c-6e94-42f6-9879-e947a6c38319"
+ },
+ "source": [
+ "df_sjr['H index'].max()"
+ ],
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "1226"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "kSdk2SMiSPK2"
+ },
+ "source": [
+ "g = sns.FacetGrid(df_sjr, col=\"SJR Best Quartile\", margin_titles=True, sharex=True) # this will create a grid\n",
+ "g.map(sns.distplot, \"H index\", color=\"steelblue\")"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "zfBX4JuwVEMQ"
+ },
+ "source": [
+ "sns.displot(df_sjr['H index'], vertical=True, kde=False,hue='SJR Best Quartile') # KDE =True - draw gaussian kernel density estimate"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 323
+ },
+ "id": "tQvcm171YuHn",
+ "outputId": "48ac5413-b4ca-49a0-ffd8-d8b964c7b013"
+ },
+ "source": [
+ "sns.displot(df_sjr, x='H index', hue=\"SJR Best Quartile\", stat=\"density\",bins=20,col=\"SJR Best Quartile\")\n"
+ ],
+ "execution_count": 53,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 53
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ]
}
]
}
\ No newline at end of file