diff --git a/HW_2.ipynb b/HW_2.ipynb
new file mode 100644
index 0000000..6b85c7c
--- /dev/null
+++ b/HW_2.ipynb
@@ -0,0 +1,512 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.7"
+ },
+ "colab": {
+ "name": "HW 2.ipynb",
+ "provenance": [],
+ "include_colab_link": true
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DfUHKA19ifXX"
+ },
+ "source": [
+ "# Homework Assignment 2\n",
+ "### [The Art of Analyzing Big Data - The Data Scientist’s Toolbox](https://www.ise.bgu.ac.il/labs/fire/lectures.html)\n",
+ "#### By Dr. Michael Fire "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "D9hGo7nXifXa"
+ },
+ "source": [
+ "## Dataset Collecting"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "D15-Sk2lifXc"
+ },
+ "source": [
+ "**Question 1**: Write a function that collects all titles and number of votes for each title of a given [hacker news page](http://news.ycombinator.com) (15pt)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dR0z-yztifXe"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fpKRAkjtifXk"
+ },
+ "source": [
+ "**Question 2**: Write a function that collects data on four James Bond movies from [The Movie Database](https://www.themoviedb.org). You can use [tmdbv3api](https://pypi.org/project/tmdbv3api/) (15pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "32w2fs6JifXl"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AUdAUefVifXo"
+ },
+ "source": [
+ "## Kickstarter Projects Dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zx58iFpDifXo"
+ },
+ "source": [
+ "Using the [Kickstarter Projects Dataset](https://www.kaggle.com/kemical/kickstarter-projects#ks-projects-201801.csv) and Pandas, please answer one of following questions:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9C5Pn9SkifXp"
+ },
+ "source": [
+ "### Please answer only **one** of the following questions according to your (ID number + 1) (use the formula ** mod 3 +1**) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Nttgf82QifXq"
+ },
+ "source": [
+ "# which question to answer - put your ID number and run the code \n",
+ "your_id = \"\"\n",
+ "q = (int(your_id) + 1) % 3 + 1\n",
+ "print(\"You need to answer questions %s and 4\" % q)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "po4uxd2tifXu"
+ },
+ "source": [
+ "**Question 1:** On average which project category received the lowest number of backers? (15 pt) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "7tWX81YeifXv"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-5TYWzlxifXx"
+ },
+ "source": [
+ "**Question 2:** On average which project category received the lowest pledged USD? (15 pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "PMYZplK9ifXy"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "O0sULSUlifX0"
+ },
+ "source": [
+ "**Question 3:** In which month is there the lowest number of projects? (15 pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "EflUthvqifX1"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jKZ19m4bifX3"
+ },
+ "source": [
+ "**Question 4 (for all):** Visualize your answer using matplotlib or seaborn (15pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "01gCsLrqifX3"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "v5NWpk8sx9BT",
+ "outputId": "90f440eb-096f-429d-d61e-333e1eb833e9",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive')"
+ ],
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Mounted at /content/drive\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "VjmQ5hWOx_QO",
+ "outputId": "24f4f47e-d1a7-445a-d77f-8bda7ba5dd22",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "source": [
+ "import json\n",
+ "import os\n",
+ "\n",
+ "# Installing the Kaggle package\n",
+ "!pip install kaggle \n",
+ "\n",
+ "#Important Note: complete this with your own key - after running this for the first time remmember to **remove** your API_KEY\n",
+ "#api_token = {\"username\":\"\",\"key\":\"\"}\n",
+ "\n",
+ "\n",
+ "# creating kaggle.json file with the personal API-Key details \n",
+ "# You can also put this file on your Google Drive\n",
+ "#with open('/root/.kaggle/kaggle.json', 'w') as file:\n",
+ "# json.dump(api_token, file)\n",
+ "!mkdir /root/.kaggle/\n",
+ "!cp /content/drive/MyDrive/kaggle.json /root/.kaggle/kaggle.json\n",
+ "!chmod 600 /root/.kaggle/kaggle.json"
+ ],
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: kaggle in /usr/local/lib/python3.7/dist-packages (1.5.12)\n",
+ "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.62.3)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2021.5.30)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0)\n",
+ "Requirement already satisfied: python-slugify in /usr/local/lib/python3.7/dist-packages (from kaggle) (5.0.2)\n",
+ "Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3)\n",
+ "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.2)\n",
+ "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.7/dist-packages (from python-slugify->kaggle) (1.3)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (3.0.4)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "PpjMJgB1ifX6"
+ },
+ "source": [
+ "## The Marvel Universe Social Network"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oVRgZuN5ifX6"
+ },
+ "source": [
+ "Using the [The Marvel Universe Social Network](https://www.kaggle.com/csanhueza/the-marvel-universe-social-network) and Pandas, please answer the following questions:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "XpdIMnqxyCa7",
+ "outputId": "55e1b0fa-e7c2-4151-b715-2f86cfbc12c1",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "source": [
+ "# Creating a dataset directory\n",
+ "!mkdir ./datasets\n",
+ "!mkdir ./datasets/marvel\n",
+ "\n",
+ "# download the dataset from Kaggle and unzip it\n",
+ "!kaggle datasets download csanhueza/the-marvel-universe-social-network -f hero-network.csv -p ./datasets/marvel/\n",
+ "!chdir ./datasets/marvel\n",
+ "\n",
+ "!unzip ./datasets/marvel/hero-network.csv.zip -d ./datasets/marvel"
+ ],
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Archive: ./datasets/marvel/hero-network.csv.zip\n",
+ " inflating: ./datasets/marvel/hero-network.csv \n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Ifgyk4SVy0gg",
+ "outputId": "24e480aa-e923-487a-8847-4edcf56df3fc",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 204
+ }
+ },
+ "source": [
+ "\n",
+ "import pandas as pd # we will talk more about pandas in our next lecture\n",
+ "df = pd.read_csv('/content/datasets/marvel/hero-network.csv',engine ='python')\n",
+ "df.head()"
+ ],
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " hero1 | \n",
+ " hero2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " LITTLE, ABNER | \n",
+ " PRINCESS ZANDA | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " LITTLE, ABNER | \n",
+ " BLACK PANTHER/T'CHAL | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " BLACK PANTHER/T'CHAL | \n",
+ " PRINCESS ZANDA | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " LITTLE, ABNER | \n",
+ " PRINCESS ZANDA | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " LITTLE, ABNER | \n",
+ " BLACK PANTHER/T'CHAL | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " hero1 hero2\n",
+ "0 LITTLE, ABNER PRINCESS ZANDA\n",
+ "1 LITTLE, ABNER BLACK PANTHER/T'CHAL\n",
+ "2 BLACK PANTHER/T'CHAL PRINCESS ZANDA\n",
+ "3 LITTLE, ABNER PRINCESS ZANDA\n",
+ "4 LITTLE, ABNER BLACK PANTHER/T'CHAL"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "E91-E4ufifX7"
+ },
+ "source": [
+ "**Question 1:** Write code which calculate the top-10 most friendly characters, i.e., characters with the \n",
+ " highest number of friends. Please use _hero_network.csv_ file (15pt). \n",
+ " \n",
+ "**Note:** Not all the links in this dataset are symmetric."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "gYIoTJNIifX8"
+ },
+ "source": [
+ "hero_dic = {}\n",
+ "new_df = {'hero1':[] , 'hero2':[]}\n",
+ "for index, row1 in df.iterrows():\n",
+ " hero1 = row1['hero1']\n",
+ " hero2 = row1['hero2']\n",
+ " if \"hero1\" not in hero_dic:\n",
+ " hero_dic[hero1]=[]\n",
+ " hero_dic[hero1].append(hero2)\n",
+ " \n",
+ " # for index, row2 in df.copy().iterrows():\n",
+ " # copy_hero1 = row2['hero1']\n",
+ " # copy_hero2 = row2['hero2']\n",
+ " # if hero1 == copy_hero2 and hero2 ==copy_hero1:\n",
+ " # new_df['hero1'].append(hero1)\n",
+ " #new_df['hero1'].append(hero1)\n",
+ " "
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "EqcKzoUsifX-"
+ },
+ "source": [
+ "**Question 2:** Using Pandas and Networkx create a graph object of The Marvel Universe Social Network with the 150 most \"friendly\" characters (10pt).\n",
+ " The vertices in that graph need to be relative to the size of each character's number of links (also referred to as the vertex degree) (10pt).\n",
+ "Please color each node in the graph according to character type according to data in the *nodes.csv* file (5pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "_4650rgeifX_"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ELLg4egiifYA"
+ },
+ "source": [
+ "**Bonus:** Visualize the above network using [Cytoscape](https://cytoscape.org) or [Gephi](https://gephi.org) (10pt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "TQvUWrqWifYB"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file