update notebook and plots

buds-lab · Dec 13, 2022 · dd50302 · dd50302
1 parent a28feac
commit dd50302
Show file tree

Hide file tree

Showing 5 changed files with 441 additions and 16 deletions.
diff --git a/.ipynb_checkpoints/full_paper_workflow-Copy2-checkpoint.ipynb b/.ipynb_checkpoints/full_paper_workflow-Copy2-checkpoint.ipynb
diff --git a/full_paper_workflow-Copy2.ipynb b/full_paper_workflow-Copy2.ipynb
@@ -32,7 +32,9 @@
     "# Import packages\n",
     "import datetime\n",
     "import geopandas as gpd\n",
+    "import holidays\n",
     "import itertools\n",
+    "import july\n",
     "import numpy as np\n",
     "import os\n",
     "import pandas as pd\n",
@@ -141,12 +143,20 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Plot sample buildings"
+    "## Measured data cleanup\n",
+    "Outliers filtered out using Z-scores"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plot sample buildings"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -200,14 +210,6 @@
     "fig.savefig(os.path.join(os.getcwd(), 'plots', 'Electricity_and_cooling_outliers.pdf'))"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Measured data cleanup\n",
-    "Outliers filtered out using Z-scores"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -481,7 +483,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -493,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -118210,6 +118212,185 @@
     "            '_'.join(['comparison', metric, demand]) + '.csv'))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Clustering WiFi profiles to create occupancy schedules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Number of clusters for k-means clustering\n",
+    "n_clusters = 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create dict to contain WiFi data\n",
+    "wifi_data_dict = {}\n",
+    "for building in list_buildings:\n",
+    "    building_index = wifi_data[building].dropna().index\n",
+    "    wifi_data_dict[building] = wifi_data.loc[building_index, building]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define colors for plots based on number of clusters\n",
+    "colors = {1: ['tab:brown'], 2: ['tab:blue', 'tab:cyan'], 3: ['tab:blue', 'tab:brown', 'tab:cyan'], \n",
+    "          4: ['tab:blue', 'tab:red', 'tab:pink', 'tab:cyan'], \n",
+    "          5: ['tab:blue', 'tab:purple', 'tab:pink', 'tab:olive', 'tab:cyan'],\n",
+    "          6: ['tab:blue', 'tab:green', 'tab:purple', 'tab:pink', 'tab:olive', 'tab:cyan']}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create directories to contain results\n",
+    "if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules')):\n",
+    "    os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules'))\n",
+    "if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters')):\n",
+    "    os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters'))\n",
+    "if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
+    "                                  'k_' + str(n_clusters) + '_pre_and_post')):\n",
+    "    os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
+    "                          'k_' + str(n_clusters) + '_pre_and_post'))\n",
+    "if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
+    "                                  'k_' + str(n_clusters) + '_pre_and_post', 'schedule_assignments')):\n",
+    "    os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
+    "                          'k_' + str(n_clusters) + '_pre_and_post', 'schedule_assignments'))\n",
+    "if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
+    "                                  'k_' + str(n_clusters) + '_pre_and_post', 'schedules')):\n",
+    "    os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
+    "                          'k_' + str(n_clusters) + '_pre_and_post', 'schedules'))\n",
+    "if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'plots')):\n",
+    "    os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'plots'))\n",
+    "if not os.path.isdir(os.path.join(\n",
+    "    os.getcwd(), 'occupant_schedules', 'plots', 'k_' + str(n_clusters) + '_pre_and_post')):\n",
+    "    os.mkdir(os.path.join(\n",
+    "        os.getcwd(), 'occupant_schedules', 'plots', 'k_' + str(n_clusters) + '_pre_and_post'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get public holidays in Singapore\n",
+    "public_holidays = [i[0] for i in sorted(holidays.SG(years=range(2018,2021)).items())]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for building in [b for b in wifi_data_dict.keys() if (len(wifi_data_dict[b]) > 0)]:\n",
+    "    # create wifi data dataframe\n",
+    "    df = wifi_data_dict[building].to_frame().rename(columns={building: 'wifi'})\n",
+    "    df.index.name = 'DateTime'\n",
+    "    df['Date'] = df.index.date\n",
+    "    df['Time'] = df.index.time\n",
+    "    df['Weekday'] = df.index.weekday\n",
+    "    df['Holiday'] = df.Date.isin(public_holidays)\n",
+    "    # subtract daily minimum\n",
+    "    for d in df.Date.unique():\n",
+    "        df.loc[df.Date==d, 'wifi'] -= df.loc[df.Date==d, 'wifi'].min()\n",
+    "    # create dataframe for cluster assignments\n",
+    "    cluster_assignment = pd.DataFrame(index=pd.date_range('2018-01-01 00:00', '2020-12-31 23:59', freq='D'),\n",
+    "                                      columns=['cluster'], data=None)\n",
+    "    cluster_assignment['Weekday'] = [d.weekday() for d in cluster_assignment.index]\n",
+    "    cluster_assignment['Holiday'] = cluster_assignment.index.isin(public_holidays)\n",
+    "    # create dataframe to export schedules\n",
+    "    export_clusters = pd.DataFrame(\n",
+    "        index=[datetime.time(i, 0) for i in range(24)],\n",
+    "        columns=['_'.join([str(i), j, str(k[0])]) for i in range(n_clusters) \n",
+    "                 for j in ['avg', 'std'] for k in [[2018, 2019], [2020]]], data=None)\n",
+    "    for years in [[2018, 2019], [2020]]:\n",
+    "        # normalize pre-2020 and post-2020 separately\n",
+    "        df.loc[df.index.year.isin(years), 'wifi'] /= df.loc[df.index.year.isin(years), 'wifi'].max()\n",
+    "        df_pivot = pd.pivot_table(df.loc[df.index.year.isin(years)], values='wifi', index='Time',\n",
+    "                                  columns='Date').dropna(axis=1)\n",
+    "        if len(df_pivot) > 0:\n",
+    "            # run k-means clustering\n",
+    "            matrix_norm = np.matrix(df_pivot).transpose()\n",
+    "            centers, _ = kmeans(matrix_norm, n_clusters, iter=10000)\n",
+    "            cluster, _ = vq(matrix_norm, centers)\n",
+    "\n",
+    "            cluster_assignment.loc[df_pivot.columns, 'cluster'] = cluster\n",
+    "            for i in range(n_clusters):\n",
+    "                export_clusters['_'.join([str(i), 'avg', str(years[0])])] = df_pivot[\n",
+    "                    df_pivot.columns[np.where(cluster==i)]].mean(axis=1)\n",
+    "                export_clusters['_'.join([str(i), 'std', str(years[0])])] = df_pivot[\n",
+    "                    df_pivot.columns[np.where(cluster==i)]].std(axis=1)\n",
+    "\n",
+    "    # save cluster assignments\n",
+    "    cluster_assignment.to_csv(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters', 'k_' + str(\n",
+    "        n_clusters) + '_pre_and_post', 'schedule_assignments', building + '.csv'))\n",
+    "    export_clusters.to_csv(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters', 'k_' + str(\n",
+    "        n_clusters) + '_pre_and_post', 'schedules', building + '.csv'))\n",
+    "\n",
+    "    # Create plots of cluster assignments\n",
+    "    for years in [[2018, 2019], [2020]]:\n",
+    "        if len(cluster_assignment.loc[cluster_assignment.index.year.isin(years)].dropna()) > 0:\n",
+    "            n_rows = len(years) + 1\n",
+    "            fig, ax = plt.subplots(n_rows, 1, figsize=(15, 5 * n_rows))\n",
+    "            for i, year in enumerate(years):\n",
+    "                # get relevant clusters\n",
+    "                relevant_clusters = [cluster for cluster in cluster_assignment.loc[\n",
+    "                    cluster_assignment.index.year == year, 'cluster'].unique() if not np.isnan(cluster)]\n",
+    "                relevant_clusters.sort()\n",
+    "                # plot daily profiles\n",
+    "                for j, n in enumerate(relevant_clusters):\n",
+    "                    if f'{str(n)}_avg_{year}' in export_clusters.columns:\n",
+    "                        ax[0].plot(range(24),  export_clusters[f'{str(n)}_avg_{year}'],\n",
+    "                                    color=colors[len(relevant_clusters)][j], label=n)\n",
+    "                        ax[0].fill_between(range(24), (export_clusters[f'{str(n)}_avg_{year}'] + \n",
+    "                                                       export_clusters[f'{str(n)}_std_{year}']),\n",
+    "                                           (export_clusters[f'{str(n)}_avg_{year}'] - \n",
+    "                                            export_clusters[f'{str(n)}_std_{year}']),\n",
+    "                                           color=colors[len(relevant_clusters)][j], alpha=0.1)\n",
+    "                        ax[0].set_title('/'.join(map(str, years)))\n",
+    "                        ax[0].set_ylabel('Estimated building occupancy profile')\n",
+    "                        ax[0].legend(title='Cluster')\n",
+    "                # plot calendar view\n",
+    "                j = year % 2 + 1\n",
+    "                july.heatmap(cluster_assignment.loc[cluster_assignment.index.year == year].index,\n",
+    "                             cluster_assignment.loc[cluster_assignment.index.year == year, 'cluster'],\n",
+    "                             cmap=\"tab10\", colorbar=True, ax=ax[j])\n",
+    "                for holiday in [i[0] for i in sorted(holidays.SG(years=year).items())]:\n",
+    "                    ax[j].annotate('x', (holiday.isocalendar()[1] - 0.75, holiday.weekday() + 0.65))\n",
+    "            # save plots if and only if there is something to plot\n",
+    "            fig.savefig(os.path.join(\n",
+    "                os.getcwd(), 'occupant_schedules', 'plots',\n",
+    "                f'k_{str(n_clusters)}_pre_and_post', '_'.join([building, '_'.join(map(str, years))]) + '.pdf'))\n",
+    "            plt.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/plots/Electricity_and_cooling_outliers.pdf b/plots/Electricity_and_cooling_outliers.pdf
diff --git a/plots/Sample_wifi_clusters_2018_2019.pdf b/plots/Sample_wifi_clusters_2018_2019.pdf
diff --git a/plots/Sample_wifi_clusters_2020.pdf b/plots/Sample_wifi_clusters_2020.pdf