Skip to content

Commit

Permalink
update notebook and plots
Browse files Browse the repository at this point in the history
  • Loading branch information
Martín Mosteiro Romero committed Dec 13, 2022
1 parent a28feac commit dd50302
Show file tree
Hide file tree
Showing 5 changed files with 441 additions and 16 deletions.
252 changes: 248 additions & 4 deletions .ipynb_checkpoints/full_paper_workflow-Copy2-checkpoint.ipynb

Large diffs are not rendered by default.

205 changes: 193 additions & 12 deletions full_paper_workflow-Copy2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@
"# Import packages\n",
"import datetime\n",
"import geopandas as gpd\n",
"import holidays\n",
"import itertools\n",
"import july\n",
"import numpy as np\n",
"import os\n",
"import pandas as pd\n",
Expand Down Expand Up @@ -141,12 +143,20 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot sample buildings"
"## Measured data cleanup\n",
"Outliers filtered out using Z-scores"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plot sample buildings"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -200,14 +210,6 @@
"fig.savefig(os.path.join(os.getcwd(), 'plots', 'Electricity_and_cooling_outliers.pdf'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Measured data cleanup\n",
"Outliers filtered out using Z-scores"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -481,7 +483,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -493,7 +495,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -118210,6 +118212,185 @@
" '_'.join(['comparison', metric, demand]) + '.csv'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 4. Clustering WiFi profiles to create occupancy schedules"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Number of clusters for k-means clustering\n",
"n_clusters = 4"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Create dict to contain WiFi data\n",
"wifi_data_dict = {}\n",
"for building in list_buildings:\n",
" building_index = wifi_data[building].dropna().index\n",
" wifi_data_dict[building] = wifi_data.loc[building_index, building]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Define colors for plots based on number of clusters\n",
"colors = {1: ['tab:brown'], 2: ['tab:blue', 'tab:cyan'], 3: ['tab:blue', 'tab:brown', 'tab:cyan'], \n",
" 4: ['tab:blue', 'tab:red', 'tab:pink', 'tab:cyan'], \n",
" 5: ['tab:blue', 'tab:purple', 'tab:pink', 'tab:olive', 'tab:cyan'],\n",
" 6: ['tab:blue', 'tab:green', 'tab:purple', 'tab:pink', 'tab:olive', 'tab:cyan']}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Create directories to contain results\n",
"if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules')):\n",
" os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules'))\n",
"if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters')):\n",
" os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters'))\n",
"if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
" 'k_' + str(n_clusters) + '_pre_and_post')):\n",
" os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
" 'k_' + str(n_clusters) + '_pre_and_post'))\n",
"if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
" 'k_' + str(n_clusters) + '_pre_and_post', 'schedule_assignments')):\n",
" os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
" 'k_' + str(n_clusters) + '_pre_and_post', 'schedule_assignments'))\n",
"if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
" 'k_' + str(n_clusters) + '_pre_and_post', 'schedules')):\n",
" os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters',\n",
" 'k_' + str(n_clusters) + '_pre_and_post', 'schedules'))\n",
"if not os.path.isdir(os.path.join(os.getcwd(), 'occupant_schedules', 'plots')):\n",
" os.mkdir(os.path.join(os.getcwd(), 'occupant_schedules', 'plots'))\n",
"if not os.path.isdir(os.path.join(\n",
" os.getcwd(), 'occupant_schedules', 'plots', 'k_' + str(n_clusters) + '_pre_and_post')):\n",
" os.mkdir(os.path.join(\n",
" os.getcwd(), 'occupant_schedules', 'plots', 'k_' + str(n_clusters) + '_pre_and_post'))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Get public holidays in Singapore\n",
"public_holidays = [i[0] for i in sorted(holidays.SG(years=range(2018,2021)).items())]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"for building in [b for b in wifi_data_dict.keys() if (len(wifi_data_dict[b]) > 0)]:\n",
" # create wifi data dataframe\n",
" df = wifi_data_dict[building].to_frame().rename(columns={building: 'wifi'})\n",
" df.index.name = 'DateTime'\n",
" df['Date'] = df.index.date\n",
" df['Time'] = df.index.time\n",
" df['Weekday'] = df.index.weekday\n",
" df['Holiday'] = df.Date.isin(public_holidays)\n",
" # subtract daily minimum\n",
" for d in df.Date.unique():\n",
" df.loc[df.Date==d, 'wifi'] -= df.loc[df.Date==d, 'wifi'].min()\n",
" # create dataframe for cluster assignments\n",
" cluster_assignment = pd.DataFrame(index=pd.date_range('2018-01-01 00:00', '2020-12-31 23:59', freq='D'),\n",
" columns=['cluster'], data=None)\n",
" cluster_assignment['Weekday'] = [d.weekday() for d in cluster_assignment.index]\n",
" cluster_assignment['Holiday'] = cluster_assignment.index.isin(public_holidays)\n",
" # create dataframe to export schedules\n",
" export_clusters = pd.DataFrame(\n",
" index=[datetime.time(i, 0) for i in range(24)],\n",
" columns=['_'.join([str(i), j, str(k[0])]) for i in range(n_clusters) \n",
" for j in ['avg', 'std'] for k in [[2018, 2019], [2020]]], data=None)\n",
" for years in [[2018, 2019], [2020]]:\n",
" # normalize pre-2020 and post-2020 separately\n",
" df.loc[df.index.year.isin(years), 'wifi'] /= df.loc[df.index.year.isin(years), 'wifi'].max()\n",
" df_pivot = pd.pivot_table(df.loc[df.index.year.isin(years)], values='wifi', index='Time',\n",
" columns='Date').dropna(axis=1)\n",
" if len(df_pivot) > 0:\n",
" # run k-means clustering\n",
" matrix_norm = np.matrix(df_pivot).transpose()\n",
" centers, _ = kmeans(matrix_norm, n_clusters, iter=10000)\n",
" cluster, _ = vq(matrix_norm, centers)\n",
"\n",
" cluster_assignment.loc[df_pivot.columns, 'cluster'] = cluster\n",
" for i in range(n_clusters):\n",
" export_clusters['_'.join([str(i), 'avg', str(years[0])])] = df_pivot[\n",
" df_pivot.columns[np.where(cluster==i)]].mean(axis=1)\n",
" export_clusters['_'.join([str(i), 'std', str(years[0])])] = df_pivot[\n",
" df_pivot.columns[np.where(cluster==i)]].std(axis=1)\n",
"\n",
" # save cluster assignments\n",
" cluster_assignment.to_csv(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters', 'k_' + str(\n",
" n_clusters) + '_pre_and_post', 'schedule_assignments', building + '.csv'))\n",
" export_clusters.to_csv(os.path.join(os.getcwd(), 'occupant_schedules', 'clusters', 'k_' + str(\n",
" n_clusters) + '_pre_and_post', 'schedules', building + '.csv'))\n",
"\n",
" # Create plots of cluster assignments\n",
" for years in [[2018, 2019], [2020]]:\n",
" if len(cluster_assignment.loc[cluster_assignment.index.year.isin(years)].dropna()) > 0:\n",
" n_rows = len(years) + 1\n",
" fig, ax = plt.subplots(n_rows, 1, figsize=(15, 5 * n_rows))\n",
" for i, year in enumerate(years):\n",
" # get relevant clusters\n",
" relevant_clusters = [cluster for cluster in cluster_assignment.loc[\n",
" cluster_assignment.index.year == year, 'cluster'].unique() if not np.isnan(cluster)]\n",
" relevant_clusters.sort()\n",
" # plot daily profiles\n",
" for j, n in enumerate(relevant_clusters):\n",
" if f'{str(n)}_avg_{year}' in export_clusters.columns:\n",
" ax[0].plot(range(24), export_clusters[f'{str(n)}_avg_{year}'],\n",
" color=colors[len(relevant_clusters)][j], label=n)\n",
" ax[0].fill_between(range(24), (export_clusters[f'{str(n)}_avg_{year}'] + \n",
" export_clusters[f'{str(n)}_std_{year}']),\n",
" (export_clusters[f'{str(n)}_avg_{year}'] - \n",
" export_clusters[f'{str(n)}_std_{year}']),\n",
" color=colors[len(relevant_clusters)][j], alpha=0.1)\n",
" ax[0].set_title('/'.join(map(str, years)))\n",
" ax[0].set_ylabel('Estimated building occupancy profile')\n",
" ax[0].legend(title='Cluster')\n",
" # plot calendar view\n",
" j = year % 2 + 1\n",
" july.heatmap(cluster_assignment.loc[cluster_assignment.index.year == year].index,\n",
" cluster_assignment.loc[cluster_assignment.index.year == year, 'cluster'],\n",
" cmap=\"tab10\", colorbar=True, ax=ax[j])\n",
" for holiday in [i[0] for i in sorted(holidays.SG(years=year).items())]:\n",
" ax[j].annotate('x', (holiday.isocalendar()[1] - 0.75, holiday.weekday() + 0.65))\n",
" # save plots if and only if there is something to plot\n",
" fig.savefig(os.path.join(\n",
" os.getcwd(), 'occupant_schedules', 'plots',\n",
" f'k_{str(n_clusters)}_pre_and_post', '_'.join([building, '_'.join(map(str, years))]) + '.pdf'))\n",
" plt.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Binary file modified plots/Electricity_and_cooling_outliers.pdf
Binary file not shown.
Binary file added plots/Sample_wifi_clusters_2018_2019.pdf
Binary file not shown.
Binary file added plots/Sample_wifi_clusters_2020.pdf
Binary file not shown.

0 comments on commit dd50302

Please sign in to comment.