From f455c4ce0037225fad32d1ae91758786201c6732 Mon Sep 17 00:00:00 2001 From: Ed Chalstrey Date: Wed, 17 May 2023 13:43:31 +0100 Subject: [PATCH 1/6] copy bits to mcq nb --- multiple-choice-qs.ipynb | 129 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 multiple-choice-qs.ipynb diff --git a/multiple-choice-qs.ipynb b/multiple-choice-qs.ipynb new file mode 100644 index 0000000..8d1769a --- /dev/null +++ b/multiple-choice-qs.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import spreadsheet cleaning loader\n", + "from load_survey_data import load_data, headers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime \n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "# Requires openpyxl for read_excel\n", + "from pathlib import Path\n", + "import re\n", + "from IPython.display import Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sheet = \"summarise-all-results-2023-05-16-1359.xlsx\"\n", + "format = \"%Y-%m-%d %H:%M:%S %Z\"\n", + "df = load_data(sheet, format)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def show_ranking_answer(column, ax, y_max):\n", + " counts = column.value_counts().sort_index()\n", + " counts.plot(kind='bar', ax=ax)\n", + " ax.set_ylim(0, y_max + 1)\n", + " ax.grid(axis='y')\n", + " # ax.tick_params(axis=\"x\", labelrotation='auto')\n", + " ax.set_ylabel(truncate_with_ellipsis(h, 55))\n", + " return ax\n", + "def truncate_with_ellipsis(s, n):\n", + " if len(s) > n:\n", + " s = s[:(n - 1)]\n", + " return f\"{s}…\"\n", + " return s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Maximum number for any answer\n", + "y_max = 0\n", + "for (h, t) in headers.items():\n", + " if t == \"RANKING\":\n", + " y_max = max(y_max, df[h].value_counts().max())\n", + "\n", + "max_plots_in_one_section = 5\n", + "\n", + "matplotlib.rcParams.update({'figure.max_open_warning': 0})\n", + "\n", + "fig = None\n", + "axs = []\n", + "ax_n = 0\n", + "for (h, t) in headers.items():\n", + " if t != \"RANKING\":\n", + " # Delete unused plot spaces\n", + " for ax in axs[ax_n:]:\n", + " fig.delaxes(ax)\n", + " # Show plot NOW, so that we can interleave plots and markdown\n", + " if ax_n:\n", + " plt.show()\n", + " fig = None\n", + " axs = []\n", + " ax_n = 0\n", + "\n", + " if t == \"SECTION\":\n", + " display(Markdown(f\"## {h}\"))\n", + " # if t == \"TEXT\":\n", + " # show_text_answer(df[h])\n", + " if t == \"RANKING\":\n", + " if not fig:\n", + " fig, axs = plt.subplots(1, max_plots_in_one_section, figsize=(10, 5))\n", + " fig.subplots_adjust(wspace=1)\n", + " ax = axs[ax_n]\n", + " show_ranking_answer(df[h], ax, y_max)\n", + " ax_n += 1" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "satre-feature-survey", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From b4fa5226baa18cf918b0d5eb46254af68088116e Mon Sep 17 00:00:00 2001 From: Ed Chalstrey Date: Wed, 17 May 2023 14:05:37 +0100 Subject: [PATCH 2/6] separate function --- mcq_tables.py | 2 ++ multiple-choice-qs.ipynb | 32 ++++++++++---------------------- 2 files changed, 12 insertions(+), 22 deletions(-) create mode 100644 mcq_tables.py diff --git a/mcq_tables.py b/mcq_tables.py new file mode 100644 index 0000000..f44a0c0 --- /dev/null +++ b/mcq_tables.py @@ -0,0 +1,2 @@ +def make_table(column): + return column.value_counts().sort_index() \ No newline at end of file diff --git a/multiple-choice-qs.ipynb b/multiple-choice-qs.ipynb index 8d1769a..2e8acdc 100644 --- a/multiple-choice-qs.ipynb +++ b/multiple-choice-qs.ipynb @@ -7,7 +7,8 @@ "outputs": [], "source": [ "# Import spreadsheet cleaning loader\n", - "from load_survey_data import load_data, headers" + "from load_survey_data import load_data, headers\n", + "from mcq_tables import make_table" ] }, { @@ -43,21 +44,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "def show_ranking_answer(column, ax, y_max):\n", - " counts = column.value_counts().sort_index()\n", - " counts.plot(kind='bar', ax=ax)\n", - " ax.set_ylim(0, y_max + 1)\n", - " ax.grid(axis='y')\n", - " # ax.tick_params(axis=\"x\", labelrotation='auto')\n", - " ax.set_ylabel(truncate_with_ellipsis(h, 55))\n", - " return ax\n", - "def truncate_with_ellipsis(s, n):\n", - " if len(s) > n:\n", - " s = s[:(n - 1)]\n", - " return f\"{s}…\"\n", - " return s" - ] + "source": [] }, { "cell_type": "code", @@ -65,6 +52,12 @@ "metadata": {}, "outputs": [], "source": [ + "def truncate_with_ellipsis(s, n):\n", + " if len(s) > n:\n", + " s = s[:(n - 1)]\n", + " return f\"{s}…\"\n", + " return s\n", + "\n", "# Maximum number for any answer\n", "y_max = 0\n", "for (h, t) in headers.items():\n", @@ -95,12 +88,7 @@ " # if t == \"TEXT\":\n", " # show_text_answer(df[h])\n", " if t == \"RANKING\":\n", - " if not fig:\n", - " fig, axs = plt.subplots(1, max_plots_in_one_section, figsize=(10, 5))\n", - " fig.subplots_adjust(wspace=1)\n", - " ax = axs[ax_n]\n", - " show_ranking_answer(df[h], ax, y_max)\n", - " ax_n += 1" + " print(make_table(df[h]))" ] } ], From 98f2eee000b935623bafb9961432a6736aba2ecf Mon Sep 17 00:00:00 2001 From: Ed Chalstrey Date: Wed, 17 May 2023 14:47:56 +0100 Subject: [PATCH 3/6] convert to percentage --- mcq_tables.py | 8 +++++++- multiple-choice-qs.ipynb | 39 +-------------------------------------- 2 files changed, 8 insertions(+), 39 deletions(-) diff --git a/mcq_tables.py b/mcq_tables.py index f44a0c0..383fc73 100644 --- a/mcq_tables.py +++ b/mcq_tables.py @@ -1,2 +1,8 @@ def make_table(column): - return column.value_counts().sort_index() \ No newline at end of file + """Converts to percentages and makes md table""" + table_dict = column.value_counts().sort_index().to_dict() + total = sum(table_dict.values()) + for k, v in table_dict.items(): + perc = round(v / total * 100, 2) + table_dict[k] = '%.2f' % perc + "%" + return table_dict diff --git a/multiple-choice-qs.ipynb b/multiple-choice-qs.ipynb index 2e8acdc..feca5a6 100644 --- a/multiple-choice-qs.ipynb +++ b/multiple-choice-qs.ipynb @@ -39,55 +39,18 @@ "df = load_data(sheet, format)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def truncate_with_ellipsis(s, n):\n", - " if len(s) > n:\n", - " s = s[:(n - 1)]\n", - " return f\"{s}…\"\n", - " return s\n", "\n", - "# Maximum number for any answer\n", - "y_max = 0\n", "for (h, t) in headers.items():\n", - " if t == \"RANKING\":\n", - " y_max = max(y_max, df[h].value_counts().max())\n", - "\n", - "max_plots_in_one_section = 5\n", - "\n", - "matplotlib.rcParams.update({'figure.max_open_warning': 0})\n", - "\n", - "fig = None\n", - "axs = []\n", - "ax_n = 0\n", - "for (h, t) in headers.items():\n", - " if t != \"RANKING\":\n", - " # Delete unused plot spaces\n", - " for ax in axs[ax_n:]:\n", - " fig.delaxes(ax)\n", - " # Show plot NOW, so that we can interleave plots and markdown\n", - " if ax_n:\n", - " plt.show()\n", - " fig = None\n", - " axs = []\n", - " ax_n = 0\n", - "\n", " if t == \"SECTION\":\n", " display(Markdown(f\"## {h}\"))\n", - " # if t == \"TEXT\":\n", - " # show_text_answer(df[h])\n", " if t == \"RANKING\":\n", + " display(Markdown(f\"#### {h}\"))\n", " print(make_table(df[h]))" ] } From 92426e2716ff9e526c4b2c8b12328c227c541e98 Mon Sep 17 00:00:00 2001 From: Ed Chalstrey Date: Wed, 17 May 2023 15:08:19 +0100 Subject: [PATCH 4/6] make md table --- mcq_tables.py | 16 ++++++++++++---- multiple-choice-qs.ipynb | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/mcq_tables.py b/mcq_tables.py index 383fc73..9174294 100644 --- a/mcq_tables.py +++ b/mcq_tables.py @@ -1,8 +1,16 @@ def make_table(column): - """Converts to percentages and makes md table""" + """Converts to percentages and prints md table""" table_dict = column.value_counts().sort_index().to_dict() total = sum(table_dict.values()) + first_iteration = True for k, v in table_dict.items(): - perc = round(v / total * 100, 2) - table_dict[k] = '%.2f' % perc + "%" - return table_dict + perc = '%.2f' % round(v / total * 100, 2) + "%" + if first_iteration: + header_str = k + count_str = perc + else: + header_str += ' | ' + k + count_str += ' | ' + perc + first_iteration = False + print(header_str) + print(count_str) diff --git a/multiple-choice-qs.ipynb b/multiple-choice-qs.ipynb index feca5a6..8b18ccf 100644 --- a/multiple-choice-qs.ipynb +++ b/multiple-choice-qs.ipynb @@ -50,7 +50,7 @@ " if t == \"SECTION\":\n", " display(Markdown(f\"## {h}\"))\n", " if t == \"RANKING\":\n", - " display(Markdown(f\"#### {h}\"))\n", + " print(\"#### \", h)\n", " print(make_table(df[h]))" ] } From 2fe3264e092d4af03c7b65d7a64c2a66391a22c5 Mon Sep 17 00:00:00 2001 From: Ed Chalstrey Date: Wed, 17 May 2023 15:15:04 +0100 Subject: [PATCH 5/6] remove print --- multiple-choice-qs.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiple-choice-qs.ipynb b/multiple-choice-qs.ipynb index 8b18ccf..ef1caf2 100644 --- a/multiple-choice-qs.ipynb +++ b/multiple-choice-qs.ipynb @@ -51,7 +51,7 @@ " display(Markdown(f\"## {h}\"))\n", " if t == \"RANKING\":\n", " print(\"#### \", h)\n", - " print(make_table(df[h]))" + " make_table(df[h])" ] } ], From 3f3549dd3896d44d2d80fb0c48f6c1fd6c06c04a Mon Sep 17 00:00:00 2001 From: Ed Chalstrey Date: Wed, 17 May 2023 15:25:56 +0100 Subject: [PATCH 6/6] refactor table code --- mcq_tables.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mcq_tables.py b/mcq_tables.py index 9174294..9aefd5f 100644 --- a/mcq_tables.py +++ b/mcq_tables.py @@ -2,15 +2,12 @@ def make_table(column): """Converts to percentages and prints md table""" table_dict = column.value_counts().sort_index().to_dict() total = sum(table_dict.values()) - first_iteration = True + header_str, between_str, count_str = '', '', '' for k, v in table_dict.items(): perc = '%.2f' % round(v / total * 100, 2) + "%" - if first_iteration: - header_str = k - count_str = perc - else: - header_str += ' | ' + k - count_str += ' | ' + perc - first_iteration = False + header_str += '| ' + k + ' ' + between_str += '| --- ' + count_str += '| ' + perc + ' ' print(header_str) + print(between_str) print(count_str)