diff --git a/resources/costing/Minute_Salary_HR.csv b/resources/costing/Minute_Salary_HR.csv
new file mode 100644
index 0000000000..64fec2c8f1
--- /dev/null
+++ b/resources/costing/Minute_Salary_HR.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1731535fc81a7918dcaf6eceda21452999828515bb1b781c433361af6acd00e2
+size 35276
diff --git a/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv
new file mode 100644
index 0000000000..ae50af04f5
--- /dev/null
+++ b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2457b9b914a1b356ba64168790f99467a86f459760268a729a6ddaf719b45b7
+size 245
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
new file mode 100644
index 0000000000..fddfd2eddd
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
@@ -0,0 +1,2444 @@
+"""
+This file analyses and plots the services, DALYs, Deaths within different scenarios of expanding current hr by officer
+type given some extra budget. Return on investment and marginal productivity of each officer type will be examined.
+
+The scenarios are defined in scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py.
+"""
+
+import argparse
+from collections import Counter
+from pathlib import Path
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
+    Minute_Salary_by_Cadre_Level,
+    extra_budget_fracs,
+)
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
+    HRHExpansionByCadreWithExtraBudget,
+)
+from tlo import Date
+from tlo.analysis.utils import (
+    APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
+    CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
+    SHORT_TREATMENT_ID_TO_COLOR_MAP,
+    bin_hsi_event_details,
+    compute_mean_across_runs,
+    extract_results,
+    summarize,
+)
+
+# rename scenarios
+substitute_labels = {
+    's_0': 'no_extra_budget_allocation',
+    's_1': 'all_cadres_current_allocation',
+    's_2': 'all_cadres_gap_allocation',
+    's_3': 'all_cadres_equal_allocation',
+    's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (N&M)', 's_7': 'Pharmacy (P)',
+    's_8': 'Other (O)',
+    's_9': 'C + D', 's_10': 'C + N&M', 's_11': 'C + P', 's_12': 'C + O', 's_13': 'D + N&M',
+    's_14': 'D + P', 's_15': 'D + O', 's_16': 'N&M + P', 's_17': 'N&M + O', 's_18': 'P + O',
+    's_19': 'C + D + N&M', 's_20': 'C + D + P', 's_21': 'C + D + O', 's_22': 'C + N&M + P', 's_23': 'C + N&M + O',
+    's_24': 'C + P + O', 's_25': 'D + N&M + P', 's_26': 'D + N&M + O', 's_27': 'D + P + O', 's_28': 'N&M + P + O',
+    's_29': 'C + D + N&M + P', 's_30': 'C + D + N&M + O', 's_31': 'C + D + P + O', 's_32': 'C + N&M + P + O',
+    's_33': 'D + N&M + P + O',
+}
+
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
+          the_target_period: Tuple[Date, Date] = None):
+    """
+    Extract results of number of services by appt type, number of DALYs, number of Deaths in the target period.
+    (To see whether to extract these results by short treatment id and/or disease.)
+    Calculate the extra budget allocated, extra staff by cadre, return on investment and marginal productivity by cadre.
+    """
+    TARGET_PERIOD = the_target_period
+
+    # Definitions of general helper functions
+    make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
+
+    def target_period() -> str:
+        """Returns the target period as a string of the form YYYY-YYYY"""
+        return "-".join(str(t.year) for t in TARGET_PERIOD)
+
+    def get_parameter_names_from_scenario_file() -> Tuple[str]:
+        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
+        e = HRHExpansionByCadreWithExtraBudget()
+        return tuple(e._scenarios.keys())
+
+    def get_num_appts(_df):
+        """Return the number of services by appt type (total within the TARGET_PERIOD)"""
+        return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+                .apply(pd.Series)
+                .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP)
+                .groupby(level=0, axis=1).sum()
+                .sum())
+
+    def get_num_appts_by_level(_df):
+        """Return the number of services by appt type and facility level (total within the TARGET_PERIOD)"""
+        def unpack_nested_dict_in_series(_raw: pd.Series):
+            return pd.concat(
+                {
+                  idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items()
+                 }
+             ).unstack().fillna(0.0).astype(int)
+
+        return _df \
+            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \
+            .pipe(unpack_nested_dict_in_series) \
+            .sum(axis=0)
+
+    def get_num_services(_df):
+        """Return the number of services in total of all appt types (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+            .apply(pd.Series).sum().sum()
+        )
+
+    def get_num_treatments(_df):
+        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum()
+        return _df
+
+    def get_num_treatments_total(_df):
+        """Return the number of treatments in total of all treatments (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum().sum()
+        return pd.Series(_df)
+
+    def get_num_deaths(_df):
+        """Return total number of Deaths (total within the TARGET_PERIOD)"""
+        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
+
+    def get_num_dalys(_df):
+        """Return total number of DALYS (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*years_needed)]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+
+    def get_num_dalys_yearly(_df):
+        """Return total number of DALYS (Stacked) for every year in the TARGET_PERIOD.
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        _df = (_df.loc[_df.year.between(*years_needed)]
+               .drop(columns=['date', 'sex', 'age_range'])
+               .groupby('year').sum()
+               .sum(axis=1)
+               )
+        return _df
+
+    def get_num_dalys_by_cause(_df):
+        """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return (_df
+                .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth'])
+                .sum(axis=0)
+                )
+
+    def set_param_names_as_column_index_level_0(_df):
+        """Set the columns index (level 0) as the param_names."""
+        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
+        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
+        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
+        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
+        return _df
+
+    def find_difference_relative_to_comparison_series(
+        _ser: pd.Series,
+        comparison: str,
+        scaled: bool = False,
+        drop_comparison: bool = True,
+    ):
+        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+        within the runs (level 1), relative to where draw = `comparison`.
+        The comparison is `X - COMPARISON`."""
+        return (_ser
+                .unstack(level=0)
+                .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1)
+                .drop(columns=([comparison] if drop_comparison else []))
+                .stack()
+                )
+
+    def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs):
+        """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe"""
+        return pd.concat({
+            _idx: find_difference_relative_to_comparison_series(row, **kwargs)
+            for _idx, row in _df.iterrows()
+        }, axis=1).T
+
+    # group scenarios for presentation
+    def scenario_grouping_coloring(by='effect'):
+        if by == 'effect':  # based on DALYs averted/whether to  expand Clinical + Pharmacy
+            grouping = {
+                'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
+                'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
+                'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
+                'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'}
+            }
+            grouping_color = {
+                'D/N&M/O/None': 'lightpink',
+                'P + D/N&M/O/None': 'violet',
+                'C + D/N&M/O/None': 'darkorchid',
+                'C + P + D/N&M/O/None': 'darkturquoise',
+            }
+        elif by == 'expansion':  # based on how many cadres are expanded
+            grouping = {
+                'no_expansion': {'s_0'},
+                'all_cadres_equal_expansion': {'s_3'},
+                'all_cadres_gap_expansion': {'s_2'},
+                'all_cadres_current_expansion': {'s_1'},
+                'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
+                'two_cadres_equal_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
+                                               's_14', 's_15', 's_16', 's_17', 's_18'},
+                'three_cadres_equal_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
+                                                 's_24', 's_25', 's_26', 's_27', 's_28'},
+                'four_cadres_equal_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
+
+            }
+            grouping_color = {
+                'no_expansion': 'gray',
+                'one_cadre_expansion': 'lightpink',
+                'two_cadres_equal_expansion': 'violet',
+                'three_cadres_equal_expansion': 'darkorchid',
+                'four_cadres_equal_expansion': 'paleturquoise',
+                'all_cadres_equal_expansion': 'darkturquoise',
+                'all_cadres_current_expansion': 'deepskyblue',
+                'all_cadres_gap_expansion': 'royalblue',
+            }
+        return grouping, grouping_color
+
+    def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
+        """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+         extent of the error bar.
+         Annotated with percent statistics from _df_percent, if annotation=True and _df_percent not None."""
+
+        yerr = np.array([
+            (_df['mean'] - _df['lower']).values,
+            (_df['upper'] - _df['mean']).values,
+        ])
+
+        xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+        colors = [scenario_color[s] for s in _df.index]
+
+        fig, ax = plt.subplots(figsize=(18, 6))
+        ax.bar(
+            xticks.keys(),
+            _df['mean'].values,
+            yerr=yerr,
+            alpha=0.8,
+            ecolor='black',
+            color=colors,
+            capsize=10,
+            label=xticks.values(),
+            zorder=100,
+        )
+
+        if annotation:
+            assert (_df.index == _df_percent.index).all()
+            for xpos, ypos, text1, text2, text3 in zip(xticks.keys(), _df['upper'].values,
+                                                       _df_percent['mean'].values,
+                                                       _df_percent['lower'].values,
+                                                       _df_percent['upper'].values):
+                text = f"{int(round(text1 * 100, 2))}%\n{[round(text2, 2),round(text3, 2)]}"
+                ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize='xx-small')
+
+        ax.set_xticks(list(xticks.keys()))
+
+        xtick_label_detail = [substitute_labels[v] for v in xticks.values()]
+        ax.set_xticklabels(xtick_label_detail, rotation=90)
+
+        legend_labels = list(scenario_groups[1].keys())
+        legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                        color=scenario_groups[1][label]) for label in legend_labels]
+        ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+                  title='Scenario groups')
+
+        ax.grid(axis="y")
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        fig.tight_layout()
+
+        return fig, ax
+
+    def get_scale_up_factor(_df):
+        """
+        Return a series of yearly scale up factors for all cadres,
+        with index of year and value of list of scale up factors.
+        """
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor']
+                      ].set_index('year_of_scale_up')
+        _df = _df['scale_up_factor'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_total_cost(_df):
+        """
+        Return a series of yearly total cost for all cadres,
+        with index of year and values of list of total cost.
+        """
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'total_hr_salary']
+                      ].set_index('year_of_scale_up')
+        _df = _df['total_hr_salary'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_current_hr(cadres):
+        """
+        Return current (year of 2018/2019) staff counts and capabilities for the cadres specified.
+        """
+        curr_hr_path = Path(resourcefilepath
+                            / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+        curr_hr = pd.read_csv(curr_hr_path).groupby('Officer_Category').agg(
+            {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index()
+        curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25
+        curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True)
+        curr_hr_counts = curr_hr.loc[
+            curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']
+        ].set_index('Officer_Category').T
+        curr_hr_capabilities = curr_hr.loc[
+            curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Total_Minutes_Per_Year']
+        ].set_index('Officer_Category').T
+
+        return curr_hr_counts[cadres], curr_hr_capabilities[cadres]
+
+    def get_hr_salary(cadres):
+        """
+        Return annual salary for the cadres specified.
+        """
+        salary_path = Path(resourcefilepath
+                           / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv')
+        salary = pd.read_csv(salary_path, index_col=False)
+        salary = salary.loc[
+            salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD']
+        ].set_index('Officer_Category').T
+        return salary[cadres]
+
+    def format_appt_time_and_cost():
+        """
+        Return the formatted appointment time requirements and costs per cadre
+        """
+        file_path = Path(resourcefilepath
+                         / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv')
+        _df = pd.read_csv(file_path, index_col=False)
+
+        time = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                         values='Time_Taken_Mins').fillna(0.0).T
+        minute_salary = Minute_Salary_by_Cadre_Level
+        cost = _df.merge(minute_salary, on=['Facility_Level', 'Officer_Category'], how='left')
+        cost['cost_USD'] = cost['Time_Taken_Mins'] * cost['Minute_Salary_USD']
+        cost = cost.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                          values='cost_USD').fillna(0.0).T
+
+        return time, cost
+
+    def get_frac_of_hcw_time_used(_df):
+        """Return the fraction of time used by cadre and facility level"""
+        # CNP_cols = ['date']
+        # for col in _df.columns[1:]:
+        #     if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
+        #         CNP_cols.append(col)
+        #
+        # _df = _df[CNP_cols].copy()
+        _df = _df.loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), :]
+        _df = _df.set_index('date').mean(axis=0)  # average over years
+
+        return _df
+
+    def get_hcw_time_by_treatment():
+        appointment_time_table = pd.read_csv(
+            resourcefilepath
+            / 'healthsystem'
+            / 'human_resources'
+            / 'definitions'
+            / 'ResourceFile_Appt_Time_Table.csv',
+            index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"]
+        )
+
+        appt_type_facility_level_officer_category_to_appt_time = (
+            appointment_time_table.Time_Taken_Mins.to_dict()
+        )
+
+        officer_categories = appointment_time_table.index.levels[
+            appointment_time_table.index.names.index("Officer_Category")
+        ].to_list()
+
+        times_by_officer_category_treatment_id_per_draw_run = bin_hsi_event_details(
+            results_folder,
+            lambda event_details, count: sum(
+                [
+                    Counter({
+                        (
+                            officer_category,
+                            event_details["treatment_id"].split("_")[0]
+                        ):
+                            count
+                            * appt_number
+                            * appt_type_facility_level_officer_category_to_appt_time.get(
+                                (
+                                    appt_type,
+                                    event_details["facility_level"],
+                                    officer_category
+                                ),
+                                0
+                            )
+                        for officer_category in officer_categories
+                    })
+                    for appt_type, appt_number in event_details["appt_footprint"]
+                ],
+                Counter()
+            ),
+            *TARGET_PERIOD,
+            True
+        )
+
+        time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run)
+
+        # transform counter to dataframe
+        def format_time_by_cadre_treatment(_df):
+            _df.reset_index(drop=False, inplace=True)
+            for idx in _df.index:
+                _df.loc[idx, 'Cadre'] = _df.loc[idx, 'index'][0]
+                _df.loc[idx, 'Treatment'] = _df.loc[idx, 'index'][1]
+            _df = _df.drop('index', axis=1).rename(columns={0: 'value'}).pivot(
+                index='Treatment', columns='Cadre', values='value').fillna(0.0)
+
+            _series = _df.sum(axis=1)  # sum up cadres
+
+            return _df, _series
+
+        # time_by_cadre_treatment_all_scenarios = {
+        #     f's_{key}': format_time_by_cadre_treatment(
+        #         pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+        #     )[0] for key in range(len(param_names))
+        # }
+        #
+        # time_increased_by_cadre_treatment = {
+        #     key: time_by_cadre_treatment_all_scenarios[key] - time_by_cadre_treatment_all_scenarios['s_2']
+        #     for key in time_by_cadre_treatment_all_scenarios.keys()
+        # }
+
+        time_by_treatment_all_scenarios = {
+            f's_{key}': format_time_by_cadre_treatment(
+                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+            )[1] for key in range(len(param_names))
+
+        }
+        time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T
+
+        # rename index of scenario to match with real draw number
+        time_by_treatment_all_scenarios.rename(
+            index={'s_1': 's_10', 's_2': 's_11', 's_3': 's_16', 's_4': 's_22'},
+            inplace=True)
+
+        time_increased_by_treatment = time_by_treatment_all_scenarios.subtract(
+            time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*')
+
+        return time_increased_by_treatment
+
+    # Get parameter/scenario names
+    param_names = ('s_0', 's_10', 's_11', 's_16', 's_22')
+
+    # Define cadres in order
+    cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+              'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+
+    # Get appointment time and cost requirement
+    appt_time, appt_cost = format_appt_time_and_cost()
+
+    # Get current (year of 2018/2019) hr counts
+    # curr_hr = get_current_hr(cadres)[0]
+    curr_hr_cap = get_current_hr(cadres)[1]
+
+    # Get scale up factors for all scenarios
+    scale_up_factors = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HRScaling',
+        custom_generate_series=get_scale_up_factor,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # check that the scale up factors are all most the same between each run within each draw
+    # assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
+    # keep scale up factors of only one run within each draw
+    scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    scale_up_factors[cadres] = scale_up_factors.value.tolist()
+    scale_up_factors.drop(columns='value', inplace=True)
+
+    # get total capabilities by cadre in the target period
+    hcw_time_capabilities = scale_up_factors.copy()
+    assert (hcw_time_capabilities.columns[2:] == curr_hr_cap.columns).all()
+    hcw_time_capabilities[hcw_time_capabilities.columns[2:]] = (
+        hcw_time_capabilities[hcw_time_capabilities.columns[2:]].mul(curr_hr_cap.values, axis=1))
+    hcw_time_capabilities = hcw_time_capabilities.groupby(by=['draw']).sum().drop(columns=['index'])  # sum up years
+
+    # # Get salary
+    # salary = get_hr_salary(cadres)
+    #
+    # # Get total cost for all scenarios
+    # total_cost = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HRScaling',
+    #     custom_generate_series=get_total_cost,
+    #     do_scaling=False
+    # ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    # total_cost[cadres] = total_cost.value.tolist()
+    # total_cost.drop(columns='value', inplace=True)
+    # total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
+    # total_cost.rename(columns={'index': 'year'}, inplace=True)
+    #
+    # # total cost of all expansion years
+    # total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
+    #
+    # # total extra cost of all expansion years
+    # extra_cost_all_yrs = total_cost_all_yrs.copy()
+    # for s in param_names[1:]:
+    #     extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
+    # extra_cost_all_yrs.drop(index='s_0', inplace=True)
+    #
+    # # get staff count = total cost / salary
+    # staff_count = total_cost.copy()
+    # for c in cadres:
+    #     staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
+    # staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
+    #
+    # # get extra count = staff count - staff count of no expansion s_1
+    # # note that annual staff increase rate = scale up factor - 1
+    # extra_staff = staff_count.copy()
+    # for i in staff_count.index:
+    #     extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
+    #
+    # # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
+    # #     index='s_1'
+    # # )
+    # # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
+    #
+    # # check total cost calculated is increased as expected
+    # years = range(2019, the_target_period[1].year + 1)
+    # for s in param_names[1:]:
+    #     assert (abs(
+    #         total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
+    #         (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
+    #                                                    'all_cadres'].values[0]
+    #     ) < 1e6).all()
+
+    # Absolute Number of Deaths and DALYs and Services
+    num_deaths = extract_results(
+        results_folder,
+        module='tlo.methods.demography',
+        key='death',
+        custom_generate_series=get_num_deaths,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_dalys_yearly = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthburden',
+    #     key='dalys_stacked',
+    #     custom_generate_series=get_num_dalys_yearly,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_by_cause = extract_results(
+        results_folder,
+        module="tlo.methods.healthburden",
+        key="dalys_by_wealth_stacked_by_age_and_time",
+        custom_generate_series=get_num_dalys_by_cause,
+        do_scaling=True,
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments_total = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments_total,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_never_ran_treatments_total = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments_total,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_never_ran_treatments = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    # get total service demand
+    assert len(num_services) == len(num_never_ran_services) == 1
+    assert (num_services.columns == num_never_ran_services.columns).all()
+    # num_services_demand = num_services + num_never_ran_services
+    # ratio_services = num_services / num_services_demand
+
+    assert (num_appts.columns == num_never_ran_appts.columns).all()
+    num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
+    num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
+    assert (num_appts.index == num_never_ran_appts.index).all()
+    # num_appts_demand = num_appts + num_never_ran_appts
+
+    hcw_time_usage = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
+        custom_generate_series=get_frac_of_hcw_time_used,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # get absolute numbers for scenarios
+    # sort the scenarios according to their DALYs values, in ascending order
+    num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean')
+    num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+
+    # num_dalys_yearly_summarized = (summarize(num_dalys_yearly)
+    #                                .stack([0, 1])
+    #                                .rename_axis(['year', 'scenario', 'stat'])
+    #                                .reset_index(name='count'))
+    #
+    # num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+
+    num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index).fillna(0.0)
+    num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(
+        param_names).reindex(num_dalys_summarized.index).fillna(0.0)
+    # num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+
+    # num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    hcw_time_usage_summarized.columns = [col.replace('OfficerType=', '').replace('FacilityLevel=', '')
+                                         for col in hcw_time_usage_summarized.columns]
+    hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True)
+
+    # get relative numbers for scenarios, compared to no_expansion scenario: s_0
+    num_services_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_services_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_deaths_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_deaths_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_0',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_0',
+                scaled=True
+            )
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_by_cause_averted = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_0',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
+    # # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
+    # num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_2', :].sort_values(
+    #     ascending=False)
+    # # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
+    # #     ascending=False)
+
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_appts_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_appts,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_never_ran_appts_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_appts,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_never_ran_treatments_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_treatments,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_appts_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_appts,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_treatments_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_treatments,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_total_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_treatments_total_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_treatments_total.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1')
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # Check that when we sum across the causes/appt types,
+    # we get the same total as calculated when we didn't split by cause/appt type.
+    assert (
+        (num_appts_increased.sum(axis=1).sort_index()
+         - num_services_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_dalys_by_cause_averted.sum(axis=1).sort_index()
+         - num_dalys_averted['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_treatments_increased.sum(axis=1).sort_index()
+         - num_treatments_total_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    # get time used by services delivered
+    def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        # assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() -> ('2', 'Tomography')
+        # replace Tomography from level 2 to level 3
+        count_df.loc[:, ('3', 'Tomography')] += count_df.loc[:, ('2', 'Tomography')]
+        count_df.loc[:, ('2', 'Tomography')] = 0
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time used
+        use = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in use.index:
+            for j in use.columns:
+                use.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with descending order of DALYs averted
+        use = use.reindex(num_dalys_summarized.index)
+
+        # calculate time used by cadre and level
+        used_by_cadre_level = {
+            key: time_cost_df[_count_df.columns].mul(_count_df.loc[key, :].values, axis=1).rename(
+                columns={'1b': '2'}, level=0).groupby(
+                level=0, axis=1).sum().T.unstack().T for key in _count_df.index
+        }
+        used_by_cadre_level = pd.DataFrame.from_dict(used_by_cadre_level, orient='index')
+
+        return use, used_by_cadre_level
+
+    hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0]
+    hcw_time_used_increased = pd.DataFrame(
+        hcw_time_used.subtract(hcw_time_used.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    )
+    hcw_time_used_by_cadre_level = hcw_time_or_cost_used(time_cost_df=appt_time)[1]
+
+    # get hcw capabilities rescaled
+    assert set(hcw_time_used_by_cadre_level.columns).issubset(set(hcw_time_usage_summarized.columns))
+    assert (hcw_time_usage_summarized[
+        list(set(hcw_time_usage_summarized.columns) - set(hcw_time_used_by_cadre_level.columns))
+    ] == 0.0).all().all()
+    hcw_time_capabilities_rescaled = (hcw_time_used_by_cadre_level /
+                                      hcw_time_usage_summarized[hcw_time_used_by_cadre_level.columns])
+    hcw_time_capabilities_rescaled = hcw_time_capabilities_rescaled.groupby(level=0, axis=1).sum()
+    hcw_time_capabilities_rescaled = hcw_time_capabilities_rescaled[hcw_time_used.columns]
+    hcw_time_capabilities_increased = pd.DataFrame(
+        hcw_time_capabilities_rescaled.subtract(
+            hcw_time_capabilities_rescaled.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    )
+
+    # get hcw capabilities not rescaled
+    hcw_time_capabilities = hcw_time_capabilities.reindex(num_dalys_summarized.index).drop(columns='Nutrition')
+
+    # get HCW time and cost needed to run the never run appts
+    def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time gap
+        gap = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in gap.index:
+            for j in gap.columns:
+                gap.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with
+        gap = gap.reindex(num_dalys_summarized.index)
+
+        return gap
+
+    hcw_time_gap = hcw_time_or_cost_gap(appt_time)
+    hcw_cost_gap = hcw_time_or_cost_gap(appt_cost)
+
+    # hcw time demand to meet ran + never ran services
+    assert (hcw_time_used.index == hcw_time_gap.index).all()
+    assert (hcw_time_used.columns == hcw_time_gap.columns).all()
+    hcw_time_demand = hcw_time_used + hcw_time_gap
+    # hcw_time_demand_increased = pd.DataFrame(
+    #     hcw_time_demand.subtract(hcw_time_demand.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    # )
+
+    # cost gap proportions of cadres within each scenario
+    hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns)
+    for i in hcw_cost_gap_percent.index:
+        hcw_cost_gap_percent.loc[i, :] = hcw_cost_gap.loc[i, :] / hcw_cost_gap.loc[i, :].sum()
+    # add a column of 'other' to sum up other cadres
+    hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+
+    # # store the proportions of no expansion scenario as the "best" scenario that is to be tested
+    # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[
+    #     's_1', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']
+    # ].copy()  # [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]
+
+    # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery)
+    # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint)
+    # in never run set
+    # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios
+    # then the next question is what proportion for C and P and any indication for better extra budget allocation
+    # so that never ran appts will be reduced and DALYs could be averted further?
+    def get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'], appts_count_all=num_never_ran_appts_by_level_summarized
+    ):
+        # find the appts that need all cadres in cadres_to_find
+        def find_never_ran_appts_that_need_specific_cadres():
+            appts_to_find = []
+            _common_cols = appt_time.columns.intersection(appts_count_all.columns)
+            # already checked above that columns in the latter that are not in the former have 0 count
+            for col in _common_cols:
+                if ((appt_time.loc[cadres_to_find, col] > 0).all()
+                    and (appt_time.loc[~appt_time.index.isin(cadres_to_find), col] == 0).all()):
+                    appts_to_find.append(col)
+
+            return appts_to_find
+
+        # counts and count proportions of all never ran
+        _appts = find_never_ran_appts_that_need_specific_cadres()
+        _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum()
+                   .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                   .reindex(num_dalys_summarized.index))
+        _counts_all = (appts_count_all.groupby(level=1, axis=1).sum()
+                       .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                       .reindex(num_dalys_summarized.index))
+        assert (_counts.index == _counts_all.index).all()
+        _proportions = _counts / _counts_all[_counts.columns]
+
+        # hcw time gap and proportions
+        _time_gap = hcw_time_or_cost_gap(appt_time, appts_count_all[_appts])
+        assert (_time_gap.index == hcw_time_gap.index).all()
+        _time_gap_proportions = _time_gap / hcw_time_gap[_time_gap.columns]
+
+        # hcw cost gap and proportions
+        _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts])
+        assert (_cost_gap.index == hcw_cost_gap.index).all()
+        _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns]
+        # cost gap distribution among cadres
+        _cost_gap_percent = pd.DataFrame(index=_cost_gap.index, columns=_cost_gap.columns)
+        for i in _cost_gap_percent.index:
+            _cost_gap_percent.loc[i, :] = _cost_gap.loc[i, :] / _cost_gap.loc[i, :].sum()
+
+        # if sum up all appt types/cadres
+        _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
+        _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
+        _time_gap_proportions_total = _time_gap.sum(axis=1) / hcw_time_gap.sum(axis=1)
+
+        return (_proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent,
+                _time_gap_proportions_total, _time_gap)
+
+    never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'])
+    never_ran_appts_info_that_need_CN = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_NP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_C = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical'])
+    never_ran_appts_info_that_need_N = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Pharmacy'])
+
+    # cost/time proportions within never ran appts, in total of all cadres
+    p_cost = pd.DataFrame(index=num_services_summarized.index)
+    p_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[1]
+    p_cost['C and P'] = never_ran_appts_info_that_need_CP[1]
+    p_cost['C and N&M'] = never_ran_appts_info_that_need_CN[1]
+    p_cost['N&M and P'] = never_ran_appts_info_that_need_NP[1]
+    p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1]
+    p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1]
+    p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1]
+    p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
+
+    p_time = pd.DataFrame(index=num_services_summarized.index)
+    p_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[4]
+    p_time['C and P'] = never_ran_appts_info_that_need_CP[4]
+    p_time['C and N&M'] = never_ran_appts_info_that_need_CN[4]
+    p_time['N&M and P'] = never_ran_appts_info_that_need_NP[4]
+    p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4]
+    p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4]
+    p_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[4]
+    p_time['Other cases'] = 1 - p_time[p_time.columns[0:7]].sum(axis=1)
+
+    # absolute cost/time gap within never ran appts
+    a_cost = pd.DataFrame(index=num_services_summarized.index)
+    a_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
+    a_cost['C and P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
+    a_cost['C and N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
+    a_cost['N&M and P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
+    a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1)
+    a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1)
+    a_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[2].sum(axis=1)
+    a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1)
+
+    a_time = pd.DataFrame(index=num_services_summarized.index)
+    a_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1)
+    a_time['C and P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1)
+    a_time['C and N&M'] = never_ran_appts_info_that_need_CN[5].sum(axis=1)
+    a_time['N&M and P'] = never_ran_appts_info_that_need_NP[5].sum(axis=1)
+    a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1)
+    a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1)
+    a_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[5].sum(axis=1)
+    a_time['Other cases'] = hcw_time_gap.sum(axis=1) - a_time.sum(axis=1)
+
+    # appts count proportions within never ran appts, in total of all cadres
+    p_count = pd.DataFrame(index=num_services_summarized.index)
+    p_count['C and P and N&M'] = never_ran_appts_info_that_need_CNP[0]
+    p_count['C and P'] = never_ran_appts_info_that_need_CP[0]
+    p_count['C and N&M'] = never_ran_appts_info_that_need_CN[0]
+    p_count['N&M and P'] = never_ran_appts_info_that_need_NP[0]
+    p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0]
+    p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0]
+    p_count['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[0]
+    p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1)
+
+    # define color for the cadres combinations above
+    # cadre_comb_color = {
+    #     'C and P and N&M': 'royalblue',
+    #     'C and P': 'turquoise',
+    #     'C and N&M': 'gold',
+    #     'N&M and P': 'yellowgreen',
+    #     'Clinical (C)': 'mediumpurple',
+    #     'Pharmacy (P)': 'limegreen',
+    #     'Nursing_and_Midwifery (N&M)': 'pink',
+    #     'Other cases': 'gray',
+    # }
+
+    # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
+
+    # hcw time by cadre and treatment: draw = 22: C + N + P vs no expansion, draw = 11, C + P vs no expansion
+    # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
+    # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
+
+    # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative
+    # #  (maybe only due to statistical variation; relative difference to s_1 are close to 0%),
+    # #  thus CE does not make sense.
+    # # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # for i in ROI.index:
+    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
+    # #     CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    # #     CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    # #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
+
+    # prepare colors for plots
+    # appt_color = {
+    #     appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
+    # }
+    treatment_color = {
+        treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
+        for treatment in num_treatments_summarized.columns
+    }
+    cause_color = {
+        cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
+        for cause in num_dalys_by_cause_summarized.columns
+    }
+    officer_category_color = {
+        'Clinical': 'blue',
+        'DCSA': 'orange',
+        'Nursing_and_Midwifery': 'red',
+        'Pharmacy': 'green',
+        'Dental': 'purple',
+        'Laboratory': 'orchid',
+        'Mental': 'plum',
+        'Nutrition': 'thistle',
+        'Radiography': 'lightgray',
+        'Other': 'gray'
+    }
+    # get scenario color
+    # scenario_groups = scenario_grouping_coloring(by='effect')
+    scenario_groups = scenario_grouping_coloring(by='expansion')
+    scenario_color = {}
+    for s in param_names:
+        for k in scenario_groups[1].keys():
+            if s in scenario_groups[0][k]:
+                scenario_color[s] = scenario_groups[1][k]
+
+    # representative_scenarios_color = {}
+    # cmap_list = list(map(plt.get_cmap("Set3"), range(len(param_names))))
+    # for i in range(len(param_names)):
+    #     representative_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i]
+
+    # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
+    # percentage of DALYs averted decides the color of that scatter point
+    extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index)
+    extra_budget_allocation['Other'] = extra_budget_allocation[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+    name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
+    heat_data = pd.merge(num_dalys_averted_percent['mean'],
+                         extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         left_index=True, right_index=True, how='inner')
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
+               alpha=0.8, marker='o', s=heat_data['mean'] * 2000,
+               #c=heat_data['mean'] * 100, cmap='viridis',
+               c=colors)
+    # plot lines from the best point to three axes panes
+    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+    #           [0, heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+    #           [0, heat_data['Pharmacy'][0]],
+    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    # ax.plot3D([0, heat_data['Clinical'][0]],
+    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre (C)')
+    ax.set_ylabel('Pharmacy cadre (P)')
+    #ax.invert_xaxis()
+    ax.invert_yaxis()
+    ax.set_zlabel('Nursing and Midwifery (N&M)')
+    ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='-', color='orange', alpha=1.0, linewidth=2)
+    legend_labels = list(scenario_groups[1].keys()) + ['line of C = P = N&M']
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups[1][label]
+                                 ) for label in legend_labels[0:len(legend_labels) - 1]
+                      ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')]
+    plt.legend(legend_handles, legend_labels,
+               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+               title='Scenario groups')
+    # plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_0, 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig = plt.figure()
+    # ax = fig.add_subplot(111, projection='3d')
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o',
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # ax.set_zlabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2],
+    #            alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0],
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Services increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Treatments increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Services ratio increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Service delivery ratio increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
+    # outcome_data = num_dalys_averted_percent['mean']
+    # # outcome = num_services_increased_percent['mean']
+    # # outcome = num_treatments_total_increased_percent['mean']
+    # regression_data = pd.merge(outcome_data,
+    #                            extra_budget_allocation,
+    #                            left_index=True, right_index=True, how='inner')
+    # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+    #                             * regression_data['Nursing_and_Midwifery'])
+    # cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
+    # regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
+    # predictor = regression_data[regression_data.columns[1:]]
+    # outcome = regression_data['mean']
+    # predictor = sm.add_constant(predictor)
+    # est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    # print(est.summary())
+
+    # todo: could do regression analysis of DALYs averted and Services increased
+
+    # # do anova analysis to test the difference of scenario groups
+    # def anova_oneway(df=num_dalys_averted_percent):
+    #     best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean']
+    #     middle_C = df.loc[list(scenario_groups['C + D/N&M/O/None']), 'mean']
+    #     middle_P = df.loc[list(scenario_groups['P + D/N&M/O/None']), 'mean']
+    #     worst = df.loc[df.index.isin(scenario_groups['D/N&M/O/None']), 'mean']
+    #
+    #     return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
+    #                                   groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
+
+    # anova_dalys = anova_oneway()
+    # anova_services = anova_oneway(num_services_increased_percent)
+    # anova_treatments = anova_oneway(num_treatments_total_increased_percent)
+
+    # plot absolute numbers for scenarios
+
+    # name_of_plot = f'Deaths, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Service demand, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Service delivery ratio, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ratio_service_summarized)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('services delivered / demand')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # plot yearly DALYs for best 9 scenarios
+    # name_of_plot = f'Yearly DALYs, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :]
+    #             .drop(columns='scenario')
+    #             .pivot(index='year', columns='stat')
+    #             .droplevel(0, axis=1))
+    #     ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s], linewidth=2)
+    #     # ax.fill_between(data.index.to_numpy(),
+    #     #                 (data['lower'] / 1e6).to_numpy(),
+    #     #                 (data['upper'] / 1e6).to_numpy(),
+    #     #                 color=best_scenarios_color[s],
+    #     #                 alpha=0.2)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
+    # best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
+    # name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum(
+    #         axis=1)
+    #     ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s])
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Thousands)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services by appointment type, {target_period()}'
+    # num_appts_summarized_in_millions = num_appts_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_services_summarized['mean'] - num_services_summarized['lower']).values,
+    #     (num_services_summarized['upper'] - num_services_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services demand by appointment type, {target_period()}'
+    # num_appts_demand_to_plot = num_appts_demand_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_service_demand_summarized['mean'] - num_service_demand_summarized['lower']).values,
+    #     (num_service_demand_summarized['upper'] - num_service_demand_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_demand_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_service_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_demand_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services by appointment type, {target_period()}'
+    # num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_never_ran_services_summarized['mean'] - num_never_ran_services_summarized['lower']).values,
+    #     (num_never_ran_services_summarized['upper'] - num_never_ran_services_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Total services demand by appointment type, {target_period()}'
+    # data_to_plot = num_appts_demand_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_services_demand_summarized['mean'] - num_services_demand_summarized['lower']).values,
+    #     (num_services_demand_summarized['upper'] - num_services_demand_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # data_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_services_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services by treatment type, {target_period()}'
+    # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_summarized['mean'] - num_treatments_total_summarized['lower']).values,
+    #     (num_treatments_total_summarized['upper'] - num_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services by treatment type, {target_period()}'
+    # num_never_ran_treatments_summarized_in_millions = num_never_ran_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_never_ran_treatments_total_summarized['mean'] - num_never_ran_treatments_total_summarized['lower']).values,
+    #     (num_never_ran_treatments_total_summarized['upper'] - num_never_ran_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_never_ran_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_never_ran_treatments_total_summarized['mean'].values / 1e6,
+    #             yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
+    # total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_staff_to_plot.pop('DCSA')
+    # total_staff_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}'
+    # data_to_plot = (hcw_time_used / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = data_to_plot.pop('DCSA')
+    # data_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Minutes in Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
+    # hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    # hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Minutes in Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'HCW time needed to deliver ran + never ran appointments, {target_period()}'
+    hcw_time_gap_to_plot = (hcw_time_demand / 1e9).reindex(num_dalys_summarized.index)
+    column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'HCW cost needed by cadre to deliver never ran appointments, {target_period()}'
+    # hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = hcw_cost_gap_to_plot.pop('DCSA')
+    # hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('USD in Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Count proportions of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = p_count * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average proportions of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Cost proportions of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = p_cost * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average proportions of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time proportions of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = p_time * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average proportions of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Cost distribution of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = a_cost / 1e6
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylabel('USD in millions')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average cost of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time distribution of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = a_time / 1e6
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylabel('minutes in millions')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average cost of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'HCW cost gap by cadre distribution of never ran appointments, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    # hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
+    # data_to_plot = never_ran_appts_info_that_need_CNP[3][cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
+    data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    #ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}'
+    data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    # extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    # total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_cost_to_plot.pop('DCSA')
+    # total_cost_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs by cause, {target_period()}'
+    # num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
+    # yerr_dalys = np.array([
+    #     (num_dalys_summarized['mean'] - num_dalys_summarized['lower']).values,
+    #     (num_dalys_summarized['upper'] - num_dalys_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # fig.subplots_adjust(right=0.7)
+    # ax.legend(
+    #     loc="center left",
+    #     bbox_to_anchor=(0.750, 0.6),
+    #     bbox_transform=fig.transFigure,
+    #     title='Cause of death or injury',
+    #     title_fontsize='x-small',
+    #     fontsize='x-small',
+    #     reverse=True,
+    #     ncol=1
+    # )
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # plot relative numbers for scenarios
+    name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # todo: plot Deaths averted by cause
+
+    # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('Percentage')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
+    # extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(['s_1']) / 1e3
+    # column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA')
+    # extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
+    # extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(index='s_0') / 1e6
+    # column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
+    # extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
+    # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
+    # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
+    # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
+    # data_to_plot['total'] = data_to_plot.sum(axis=1)
+    # data_to_plot.sort_values(by='total', inplace=True, ascending=False)
+    # data_to_plot.drop('total', axis=1, inplace=True)
+    # data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
+    #                              'DCSA', 'Laboratory', 'Mental', 'Radiography']]
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time used increased by treatment and cadre: C + N&M + P vs no expansion, {target_period()}'
+    # # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}'
+    # data_to_plot = data_to_plot.T
+    # data_to_plot = data_to_plot.add_suffix('*')
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    # data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
+    # # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
+    # # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
+    # fig, ax = plt.subplots()
+    # data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    # ax.set_ylabel('Millions')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '').replace('\n', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    # data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
+    # fig, ax = plt.subplots()
+    # data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '').replace('\n', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
+    # num_appts_increased_in_millions = num_appts_increased / 1e6
+    # yerr_services = np.array([
+    #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = num_treatments_increased / 1e6
+    yerr_services = np.array([
+        (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+        (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_increased_by_treatment_type / 1e9
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by cadre \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_used_increased / 1e9
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW capabilities increased by cadre \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_capabilities_increased / 1e9
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time - used, needed, capabilities rescaled, capabilities - by cadre \nvs no extra budget allocation, {target_period()}'
+    # name_of_plot = f'HCW time - used, needed - by cadre \nvs no extra budget allocation, {target_period()}'
+    assert (hcw_time_used.index == hcw_time_capabilities_rescaled.index).all().all()
+    assert (hcw_time_used.index == hcw_time_demand.index).all().all()
+    assert (hcw_time_used.index == hcw_time_capabilities.index).all().all()
+    assert (hcw_time_used.columns == hcw_time_capabilities_rescaled.columns).all().all()
+    assert (hcw_time_used.columns == hcw_time_demand.columns).all().all()
+    assert (hcw_time_used.columns == hcw_time_capabilities.columns).all().all()
+    use_to_plot = hcw_time_used / 1e9
+    cap_to_plot = hcw_time_capabilities / 1e9
+    cap_rescaled_to_plot = hcw_time_capabilities_rescaled / 1e9
+    demand_to_plot = hcw_time_demand / 1e9
+    fig, ax = plt.subplots(figsize=(8, 5))
+    use_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=1.0, position=3,
+                     width=0.15, edgecolor='dimgrey', rot=0, ax=ax)
+    cap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.3, position=0,
+                     width=0.15, edgecolor='dimgrey', rot=0, ax=ax)
+    cap_rescaled_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.6, position=1,
+                              width=0.15, edgecolor='dimgrey', rot=0,  ax=ax)
+    demand_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.8, position=2,
+                        width=0.15, edgecolor='dimgrey', rot=0, ax=ax)
+    ax.set_xlim(right=len(use_to_plot) - 0.45)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in use_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    legend_1 = plt.legend(use_to_plot.columns, loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize='small',
+                          title='Officer category', title_fontsize='small', reverse=True)
+    fig.add_artist(legend_1)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
+    num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
+    yerr_dalys = np.array([
+        (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
+        (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    fig.subplots_adjust(right=0.7)
+    ax.legend(
+        loc="center left",
+        bbox_to_anchor=(0.750, 0.6),
+        bbox_transform=fig.transFigure,
+        title='Cause of death or injury',
+        title_fontsize='x-small',
+        fontsize='x-small',
+        ncol=1,
+        reverse=True
+    )
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # plot ROI and CE for all expansion scenarios
+
+    # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ROI)
+    # ax.set_title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Cost per DALY averted, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(CE)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('USD dollars')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # todo
+    # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \
+    # (around the best possible extra budget allocation scenario)?
+    # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \
+    # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \
+    # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered.
+    # To plot time series of staff and budget in the target period to show \
+    # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
+    # Before submit a run, merge in the remote master.
+    # Think about a measure of Universal Health Service Coverage for the scenarios?
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("results_folder", type=Path)  # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z
+    args = parser.parse_args()
+
+    # Produce results for short-term analysis: 5 years
+
+    # # 2015-2019, before change, incl. mode, hr expansion, etc.
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31))
+    # )
+    #
+    # # 2020-2024
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
+    # )
+
+    # Produce results for long-term analysis: 10 years
+    # 2020-2029
+    apply(
+        results_folder=args.results_folder,
+        output_folder=args.results_folder,
+        resourcefilepath=Path('./resources'),
+        the_target_period=(Date(2019, 1, 1), Date(2029, 12, 31))
+    )
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
new file mode 100644
index 0000000000..1685a3dcaa
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -0,0 +1,2926 @@
+"""
+This file analyses and plots the services, DALYs, Deaths within different scenarios of expanding current hr by officer
+type given some extra budget. Return on investment and marginal productivity of each officer type will be examined.
+
+The scenarios are defined in scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py.
+"""
+
+import argparse
+from collections import Counter
+from pathlib import Path
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+# import statsmodels.stats as ss
+from matplotlib import pyplot as plt
+
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
+    Minute_Salary_by_Cadre_Level,
+    avg_increase_rate_exp,
+    extra_budget_fracs,
+)
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
+    HRHExpansionByCadreWithExtraBudget,
+)
+from tlo import Date
+from tlo.analysis.utils import (
+    APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
+    CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
+    COARSE_APPT_TYPE_TO_COLOR_MAP,
+    SHORT_TREATMENT_ID_TO_COLOR_MAP,
+    bin_hsi_event_details,
+    compute_mean_across_runs,
+    extract_results,
+    summarize,
+)
+
+# rename scenarios
+substitute_labels = {
+    's_0': 'no_allocation',
+    's_1': 'current_allocation',
+    's_2': 'gap_allocation',
+    's_3': 'C = P = NM = D = O',
+    's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (NM)', 's_7': 'Pharmacy (P)',
+    's_8': 'Other (O)',
+    's_9': 'C = D', 's_10': 'C = NM', 's_11': 'C = P', 's_12': 'C = O', 's_13': 'NM = D',
+    's_14': 'P = D', 's_15': 'D = O', 's_16': 'P = NM', 's_17': 'NM = O', 's_18': 'P = O',
+    's_19': 'C = NM = D', 's_20': 'C = P = D', 's_21': 'C = D = O', 's_22': 'C = P = NM', 's_23': 'C = NM = O',
+    's_24': 'C = P = O', 's_25': 'P = NM = D', 's_26': 'NM = D = O', 's_27': 'P = D = O', 's_28': 'P = NM = O',
+    's_29': 'C = P = NM = D', 's_30': 'C = NM = D = O', 's_31': 'C = P = D = O', 's_32': 'C = P = NM = O',
+    's_33': 'P = NM = D = O',
+    's_*': 'optimal_allocation'
+}
+
+# grouping causes of DALYs and types of treatments
+cause_group = {
+    'AIDS': 'HIV/AIDS',
+    'TB (non-AIDS)': 'TB (non-AIDS)',
+    'Malaria': 'Malaria',
+    'Childhood Diarrhoea': 'RMNCH',
+    'Congenital birth defects': 'RMNCH',
+    'Lower respiratory infections': 'RMNCH',
+    'Maternal Disorders': 'RMNCH',
+    'Measles': 'RMNCH',
+    'Neonatal Disorders': 'RMNCH',
+    'Schistosomiasis': 'RMNCH',
+    'COPD': 'NCDs',
+    'Cancer (Bladder)': 'NCDs',
+    'Cancer (Breast)': 'NCDs',
+    'Cancer (Oesophagus)': 'NCDs',
+    'Cancer (Other)': 'NCDs',
+    'Cancer (Prostate)': 'NCDs',
+    'Depression / Self-harm': 'NCDs',
+    'Diabetes': 'NCDs',
+    'Epilepsy': 'NCDs',
+    'Heart Disease': 'NCDs',
+    'Kidney Disease': 'NCDs',
+    'Lower Back Pain': 'NCDs',
+    'Stroke': 'NCDs',
+    'Transport Injuries': 'Transport Injuries',
+    'Other': 'Other',
+}
+cause_group_color = {
+    'HIV/AIDS': 'deepskyblue',
+    'TB (non-AIDS)': 'mediumslateblue',
+    'Malaria': 'khaki',
+    'RMNCH': 'mediumaquamarine',
+    'NCDs': 'violet',
+    'Transport Injuries': 'lightsalmon',
+    'Other': 'dimgrey',
+}
+
+treatment_group = {
+    'Alri*': 'RMNCH',
+    'AntenatalCare*': 'RMNCH',
+    'BladderCancer*': 'NCDs',
+    'BreastCancer*': 'NCDs',
+    'CardioMetabolicDisorders*': 'NCDs',
+    'Contraception*': 'RMNCH',
+    'Copd*': 'NCDs',
+    'DeliveryCare*': 'RMNCH',
+    'Depression*': 'NCDs',
+    'Diarrhoea*': 'RMNCH',
+    'Epi*': 'RMNCH',
+    'Epilepsy*': 'NCDs',
+    'FirstAttendance*': 'First Attendance',
+    'Hiv*': 'HIV/AIDS',
+    'Inpatient*': 'Inpatient',
+    'Malaria*':	'Malaria',
+    'Measles*':	'RMNCH',
+    'OesophagealCancer*': 'NCDs',
+    'OtherAdultCancer*': 'NCDs',
+    'PostnatalCare*': 'RMNCH',
+    'ProstateCancer*': 'NCDs',
+    'Rti*': 'Transport Injuries',
+    'Schisto*': 'RMNCH',
+    'Tb*': 'TB (non-AIDS)',
+    'Undernutrition*': 'RMNCH',
+}
+treatment_group_color = {
+    'HIV/AIDS': 'deepskyblue',
+    'TB (non-AIDS)': 'mediumslateblue',
+    'Malaria': 'khaki',
+    'RMNCH': 'mediumaquamarine',
+    'NCDs': 'violet',
+    'Transport Injuries': 'lightsalmon',
+    'First Attendance': 'darkgrey',
+    'Inpatient': 'lightgrey',
+}
+
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
+          the_target_period: Tuple[Date, Date] = None):
+    """
+    Extract results of number of services by appt type, number of DALYs, number of Deaths in the target period.
+    (To see whether to extract these results by short treatment id and/or disease.)
+    Calculate the extra budget allocated, extra staff by cadre, return on investment and marginal productivity by cadre.
+    """
+    TARGET_PERIOD = the_target_period
+    the_cause = 'TB (non-AIDS)'  # the cause to investigate for yearly DALYs
+    # TB (non-AIDS), Transport Injuries, Lower respiratory infections, Transport Injuries
+
+    # Definitions of general helper functions
+    make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
+
+    def target_period() -> str:
+        """Returns the target period as a string of the form YYYY-YYYY"""
+        return "-".join(str(t.year) for t in TARGET_PERIOD)
+
+    def get_parameter_names_from_scenario_file() -> Tuple[str]:
+        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
+        e = HRHExpansionByCadreWithExtraBudget()
+        return tuple(e._scenarios.keys())
+
+    def get_num_appts(_df):
+        """Return the number of services by appt type (total within the TARGET_PERIOD)"""
+        return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+                .apply(pd.Series)
+                .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP)
+                .groupby(level=0, axis=1).sum()
+                .sum())
+
+    def get_num_appts_by_level(_df):
+        """Return the number of services by appt type and facility level (total within the TARGET_PERIOD)"""
+        def unpack_nested_dict_in_series(_raw: pd.Series):
+            return pd.concat(
+                {
+                  idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items()
+                 }
+             ).unstack().fillna(0.0).astype(int)
+
+        return _df \
+            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \
+            .pipe(unpack_nested_dict_in_series) \
+            .sum(axis=0)
+
+    def get_num_services(_df):
+        """Return the number of services in total of all appt types (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+            .apply(pd.Series).sum().sum()
+        )
+
+    def get_num_treatments(_df):
+        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum()
+        return _df
+
+    def get_num_treatments_group(_df):
+        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.rename(index=treatment_group)
+        _df = _df.groupby(level=0).sum()
+        return _df
+
+    def get_num_treatments_total(_df):
+        """Return the number of treatments in total of all treatments (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum().sum()
+        return pd.Series(_df)
+
+    def get_num_deaths(_df):
+        """Return total number of Deaths (total within the TARGET_PERIOD)"""
+        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
+
+    def get_num_dalys(_df):
+        """Return total number of DALYS (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*years_needed)]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+
+    def get_num_dalys_yearly(_df):
+        """Return total number of DALYS (Stacked) for every year in the TARGET_PERIOD.
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        period = (Date(2010, 1, 1), Date(2034, 12, 31))
+        years_needed = [i.year for i in period]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        _df = (_df.loc[_df.year.between(*years_needed)]
+               .drop(columns=['date', 'sex', 'age_range'])
+               .groupby('year').sum()
+               .sum(axis=1)
+               )
+        return _df
+
+    def get_num_dalys_by_one_cause_yearly(_df, one_cause=the_cause):
+        """Return total number of DALYS by TB (Stacked) for every year in simulation period 2010-2034.
+        Throw error if not a record for every year in the period (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        period = (Date(2010, 1, 1), Date(2034, 12, 31))
+        years_needed = [i.year for i in period]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        _df = (_df.loc[_df.year.between(*years_needed)]
+               .drop(columns=['date', 'sex', 'age_range'])
+               .groupby('year').sum()
+               )
+        _df = _df[one_cause]
+        return _df
+
+    def get_num_dalys_by_cause(_df):
+        """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return (_df
+                .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth'])
+                .sum(axis=0)
+                )
+
+    def get_num_dalys_by_cause_group(_df):
+        """Return total number of DALYS by cause group (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        _df = _df.rename(columns=cause_group)  # rename cause as cause group
+        _df = _df.groupby(_df.columns, axis=1).sum()  # group up causes in each cause group
+        return (_df
+                .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth'])
+                .sum(axis=0)
+                )
+
+    def set_param_names_as_column_index_level_0(_df):
+        """Set the columns index (level 0) as the param_names."""
+        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
+        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
+        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
+        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
+        return _df
+
+    def find_difference_relative_to_comparison_series(
+        _ser: pd.Series,
+        comparison: str,
+        scaled: bool = False,
+        drop_comparison: bool = True,
+    ):
+        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+        within the runs (level 1), relative to where draw = `comparison`.
+        The comparison is `X - COMPARISON`."""
+        return (_ser
+                .unstack(level=0)
+                .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1)
+                .drop(columns=([comparison] if drop_comparison else []))
+                .stack()
+                )
+
+    def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs):
+        """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe"""
+        return pd.concat({
+            _idx: find_difference_relative_to_comparison_series(row, **kwargs)
+            for _idx, row in _df.iterrows()
+        }, axis=1).T
+
+    # group scenarios for presentation
+    def scenario_grouping_coloring(by='effect'):
+        if by == 'effect':  # based on DALYs averted/whether to  expand Clinical + Pharmacy
+            grouping = {
+                'C & P & D/NM/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32',
+                                         's_*'},
+                'C & D/NM/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
+                'P & D/NM/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
+                'D/O/None': {'s_5', 's_8', 's_15', 's_0'},
+                'NM & D/O/None': {'s_6', 's_13', 's_17', 's_26'},
+            }
+            grouping_color = {
+                'D/O/None': 'silver',
+                'NM & D/O/None': 'lightpink',
+                'P & D/NM/O/None': 'violet',
+                'C & D/NM/O/None': 'darkorchid',
+                'C & P & D/NM/O/None': 'darkturquoise',
+            }
+        elif by == 'allocation':
+            grouping = {
+                'D/O': {'s_5', 's_8', 's_15'},
+                'C & D/O/None': {'s_4', 's_9', 's_12', 's_21'},
+                'P & D/O/None': {'s_7', 's_14', 's_18', 's_27'},
+                'NM & D/O/None': {'s_6', 's_13', 's_17', 's_26'},
+                'C & P & D/O/None': {'s_11', 's_20', 's_24', 's_31'},
+                'C & NM & D/O/None': {'s_10', 's_19', 's_23', 's_30'},
+                'P & NM & D/O/None': {'s_16', 's_25', 's_28', 's_33'},
+                'C & P & NM & D/O/None': {'s_3', 's_22', 's_29', 's_32'},
+                'gap_allocation': {'s_2'},
+                'current_allocation': {'s_1'},
+                'optimal_allocation': {'s_*'},
+                'no_allocation': {'s_0'},
+            }
+            keys = ['gap_allocation', 'C & P & NM & D/O/None', 'C & P & D/O/None',
+                    'current_allocation', 'C & NM & D/O/None', 'C & D/O/None', 'P & NM & D/O/None',
+                    'P & D/O/None', 'NM & D/O/None', 'D/O', 'optimal_allocation', 'no_allocation',
+                    ]
+            cmap_list = list(map(plt.get_cmap("Set3"), range(len(keys))))
+            grouping_color = {keys[idx]: cmap_list[idx] for idx in range(len(keys))}
+            # grouping_color = {
+            #     'D/O': 'silver',
+            #     'C & D/O/None': 'lightskyblue',
+            #     'P & D/O/None': 'lightgreen',
+            #     'NM & D/O/None': 'lightpink',
+            #     'C & P & D/O/None': 'khaki',
+            #     'C & NM & D/O/None': 'violet',
+            #     'P & NM & D/O/None': 'burlywood',
+            #     'C & P & NM & D/O/None':  'darkturquoise',
+            #     'gap_allocation': 'yellowgreen',
+            #     'current_allocation': 'thistle',
+            #     'optimal_allocation': 'gold',
+            #     'no_allocation': 'lavender',
+            # }
+        elif by == 'allocation_alt':  # based on how many cadres are expanded
+            grouping = {
+                'no_allocation': {'s_0'},
+                '5_cadres_equal_allocation': {'s_3'},
+                'gap_allocation': {'s_2'},
+                'current_allocation': {'s_1'},
+                'optimal_allocation': {'s_*'},
+                '1_cadre_allocation': {'s_4', 's_5', 's_6', 's_7', 's_8'},
+                '2_cadres_equal_allocation': {'s_9', 's_10', 's_11', 's_12', 's_13',
+                                              's_14', 's_15', 's_16', 's_17', 's_18'},
+                '3_cadres_equal_allocation': {'s_19', 's_20', 's_21', 's_22', 's_23',
+                                              's_24', 's_25', 's_26', 's_27', 's_28'},
+                '4_cadres_equal_allocation': {'s_29', 's_30', 's_31', 's_32', 's_33'}
+
+            }
+            # grouping_color = {
+            #     'no_allocation': 'gray',
+            #     '1_cadre_allocation': 'lightpink',
+            #     '2_cadres_equal_allocation': 'violet',
+            #     '3_cadres_equal_allocation': 'darkorchid',
+            #     '4_cadres_equal_allocation': 'paleturquoise',
+            #     '5_cadres_equal_allocation': 'darkturquoise',
+            #     '5_cadres_current_allocation': 'deepskyblue',
+            #     '5_cadres_gap_allocation': 'royalblue',
+            #     '5_cadres_optimal_allocation': 'khaki',
+            # }
+            keys = ['gap_allocation', 'current_allocation',
+                    '5_cadres_equal_allocation', '4_cadres_equal_allocation', '3_cadres_equal_allocation',
+                    '2_cadres_equal_allocation', '1_cadre_allocation', 'optimal_allocation', 'no_allocation']
+            cmap_list = list(map(plt.get_cmap("Set3"), range(len(keys))))
+            grouping_color = {keys[idx]: cmap_list[idx] for idx in range(len(keys))}
+        return grouping, grouping_color
+
+    def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
+        """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+         extent of the error bar.
+         Annotated with percent statistics from _df_percent, if annotation=True and _df_percent not None."""
+
+        yerr = np.array([
+            (_df['mean'] - _df['lower']).values,
+            (_df['upper'] - _df['mean']).values,
+        ])
+
+        xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+        colors = [scenario_color[s] for s in _df.index]
+
+        fig, ax = plt.subplots(figsize=(9, 6))
+        ax.bar(
+            xticks.keys(),
+            _df['mean'].values,
+            yerr=yerr,
+            alpha=0.8,
+            ecolor='dimgrey',
+            color=colors,
+            capsize=6,
+            label=xticks.values(),
+            zorder=100,
+        )
+
+        if annotation:
+            assert (_df.index == _df_percent.index).all()
+            for xpos, ypos, text1, text2, text3 in zip(xticks.keys(), _df['upper'].values,
+                                                       _df_percent['mean'].values,
+                                                       _df_percent['lower'].values,
+                                                       _df_percent['upper'].values):
+                text = f"{int(round(text1 * 100, 2))}%"  # \n{[round(text2, 2),round(text3, 2)]}"
+                ax.text(xpos, ypos + 0.2, text, horizontalalignment='center', fontsize='x-small')
+
+        ax.set_xticks(list(xticks.keys()))
+
+        xtick_label_detail = [substitute_labels[v] for v in xticks.values()]
+        ax.set_xticklabels(xtick_label_detail, rotation=90, fontsize='medium')
+
+        legend_labels = list(scenario_groups[1].keys())[:-2]
+        legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                        color=scenario_groups[1][label]) for label in legend_labels]
+        ax.legend(legend_handles, legend_labels, ncol=2,  # loc='center left', bbox_to_anchor=(1, 0.5),
+                  title='Scenario groups')
+
+        ax.grid(axis="y")
+        ax.set_ylim((None, 12))
+        # ax.spines['top'].set_visible(False)
+        # ax.spines['right'].set_visible(False)
+        fig.tight_layout()
+
+        return fig, ax
+
+    # def get_scale_up_factor(_df):
+    #     """
+    #     Return a series of yearly scale up factors for all cadres,
+    #     with index of year and value of list of scale up factors.
+    #     """
+    #     _df['year'] = _df['date'].dt.year
+    #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year', 'scale_up_factor']
+    #                   ].set_index('year')
+    #     _df = _df['scale_up_factor'].apply(pd.Series)
+    #     assert (_df.columns == cadres).all()
+    #     _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+    #     _df_1 = pd.DataFrame(data=_dict).T
+    #     return pd.Series(
+    #         _df_1.loc[:, 0], index=_df_1.index
+    #     )
+
+    def get_total_cost(_df):
+        """
+        Return a series of yearly total cost for all cadres,
+        with index of year and values of list of total cost.
+        """
+        _df['year'] = _df['date'].dt.year
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year', 'total_hr_salary']].set_index('year')
+        _df = _df['total_hr_salary'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_yearly_hr_count(_df):
+        """
+        Return a series of yearly total cost for all cadres,
+        with index of year and values of list of total cost.
+        """
+        # format
+        _df['year'] = _df['date'].dt.year
+        _df = _df.drop(columns='date').set_index('year').fillna(0)
+        _df.columns = _df.columns.map(lambda x: x.split('_')[-1])
+        _df.rename(columns={'Midwifery': 'Nursing_and_Midwifery'}, inplace=True)
+        _df = _df.groupby(level=0, axis=1).sum()
+        assert set(_df.columns) == set(cadres)
+        _df = _df[cadres]
+        # get multiplier for popsize=100,000: 145.39609000000002
+        _df = _df * 145.39609000000002
+        # reformat as a series
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_current_hr(cadres):
+        """
+        Return current (year of 2018/2019) staff counts and capabilities for the cadres specified.
+        """
+        curr_hr_path = Path(resourcefilepath
+                            / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+        curr_hr = pd.read_csv(curr_hr_path).groupby('Officer_Category').agg(
+            {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index()
+        curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25
+        curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True)
+        curr_hr = curr_hr.loc[
+            curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']
+        ].set_index('Officer_Category').T
+        return curr_hr[cadres]
+
+    def get_hr_salary(cadres):
+        """
+        Return annual salary for the cadres specified.
+        """
+        salary_path = Path(resourcefilepath
+                           / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv')
+        salary = pd.read_csv(salary_path, index_col=False)
+        salary = salary.loc[
+            salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD']
+        ].set_index('Officer_Category').T
+        return salary[cadres]
+
+    def format_appt_time_and_cost():
+        """
+        Return the formatted appointment time requirements and costs per cadre
+        """
+        file_path = Path(resourcefilepath
+                         / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv')
+        _df = pd.read_csv(file_path, index_col=False)
+
+        time = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                         values='Time_Taken_Mins').fillna(0.0).T
+        minute_salary = Minute_Salary_by_Cadre_Level
+        cost = _df.merge(minute_salary, on=['Facility_Level', 'Officer_Category'], how='left')
+        cost['cost_USD'] = cost['Time_Taken_Mins'] * cost['Minute_Salary_USD']
+        cost = cost.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                          values='cost_USD').fillna(0.0).T
+
+        return time, cost
+
+    def get_frac_of_hcw_time_used(_df):
+        """Return the fraction of time used by cadre and facility level"""
+        CNP_cols = ['date']
+        for col in _df.columns[1:]:
+            if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
+                CNP_cols.append(col)
+
+        _df = _df[CNP_cols].copy()
+        _df = _df.loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), :]
+        _df = _df.set_index('date').mean(axis=0) # average over years
+
+        return _df
+
+    def get_hcw_time_by_treatment():
+        appointment_time_table = pd.read_csv(
+            resourcefilepath
+            / 'healthsystem'
+            / 'human_resources'
+            / 'definitions'
+            / 'ResourceFile_Appt_Time_Table.csv',
+            index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"]
+        )
+
+        appt_type_facility_level_officer_category_to_appt_time = (
+            appointment_time_table.Time_Taken_Mins.to_dict()
+        )
+
+        officer_categories = appointment_time_table.index.levels[
+            appointment_time_table.index.names.index("Officer_Category")
+        ].to_list()
+
+        times_by_officer_category_treatment_id_per_draw_run = bin_hsi_event_details(
+            results_folder,
+            lambda event_details, count: sum(
+                [
+                    Counter({
+                        (
+                            officer_category,
+                            event_details["treatment_id"].split("_")[0]
+                        ):
+                            count
+                            * appt_number
+                            * appt_type_facility_level_officer_category_to_appt_time.get(
+                                (
+                                    appt_type,
+                                    event_details["facility_level"],
+                                    officer_category
+                                ),
+                                0
+                            )
+                        for officer_category in officer_categories
+                    })
+                    for appt_type, appt_number in event_details["appt_footprint"]
+                ],
+                Counter()
+            ),
+            *TARGET_PERIOD,
+            True
+        )
+
+        time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run)
+
+        # transform counter to dataframe
+        def format_time_by_cadre_treatment(_df):
+            _df.reset_index(drop=False, inplace=True)
+            for idx in _df.index:
+                _df.loc[idx, 'Cadre'] = _df.loc[idx, 'index'][0]
+                _df.loc[idx, 'Treatment'] = _df.loc[idx, 'index'][1]
+            _df = _df.drop('index', axis=1).rename(columns={0: 'value'}).pivot(
+                index='Treatment', columns='Cadre', values='value').fillna(0.0)
+
+            _series = _df.sum(axis=1)  # sum up cadres
+
+            return _df, _series
+
+        # time_by_cadre_treatment_all_scenarios = {
+        #     f's_{key}': format_time_by_cadre_treatment(
+        #         pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+        #     )[0] for key in range(len(param_names))
+        # }
+
+        time_by_treatment_all_scenarios = {
+            f's_{key}': format_time_by_cadre_treatment(
+                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+            )[1] for key in range(len(param_names))
+
+        }
+        time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T
+
+        # rename scenarios according to param_names
+        time_by_treatment_all_scenarios.rename(
+            index={time_by_treatment_all_scenarios.index[i]: param_names[i]
+                   for i in range(len(time_by_treatment_all_scenarios.index))}, inplace=True)
+
+        time_increased_by_treatment = time_by_treatment_all_scenarios.subtract(
+            time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*')
+
+        return time_increased_by_treatment
+
+    # Get parameter/scenario names
+    param_names = tuple(extra_budget_fracs.drop(columns='s_*'))
+    # param_names = get_parameter_names_from_scenario_file()
+
+    # Define cadres in order
+    cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+              'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+
+    # Get appointment time and cost requirement
+    appt_time, appt_cost = format_appt_time_and_cost()
+
+    # # Get scale up factors for all scenarios
+    # scale_up_factors = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HRScaling',
+    #     custom_generate_series=get_scale_up_factor,
+    #     do_scaling=False
+    # ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # # check that the scale up factors are all most the same between each run within each draw
+    # # assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
+    # # keep scale up factors of only one run within each draw
+    # scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    # scale_up_factors[cadres] = scale_up_factors.value.tolist()
+    # scale_up_factors.drop(columns='value', inplace=True)
+
+    # Get total cost for all scenarios
+    total_cost = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HRScaling',
+        custom_generate_series=get_total_cost,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    total_cost[cadres] = total_cost.value.tolist()
+    total_cost.drop(columns='value', inplace=True)
+    total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
+    total_cost.rename(columns={'index': 'year'}, inplace=True)
+
+    # total cost of all expansion years
+    total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
+
+    # total extra cost of all expansion years
+    extra_cost_all_yrs = total_cost_all_yrs.copy()
+    for s in param_names[1:]:
+        extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
+    extra_cost_all_yrs.drop(index='s_0', inplace=True)
+
+    # get yearly hr count
+    yearly_hr_count = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='number_of_hcw_staff',
+        custom_generate_series=get_yearly_hr_count,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # check that the staff counts are the same between each run within each draw
+    for i in range(len(yearly_hr_count.index)):
+        for j in yearly_hr_count.columns[1:]:
+            for k in range(len(cadres)):
+                assert abs(yearly_hr_count.iloc[i, j][k] - yearly_hr_count.iloc[i, 0][k]) < 1/1e8
+    # store results for only one run per draw
+    yearly_hr_count = yearly_hr_count.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    yearly_hr_count[cadres] = yearly_hr_count.value.tolist()
+    yearly_hr_count.drop(columns='value', inplace=True)
+    yearly_hr_count['all_cadres'] = yearly_hr_count[[c for c in yearly_hr_count.columns if c in cadres]].sum(axis=1)
+    yearly_hr_count.rename(columns={'index': 'year'}, inplace=True)
+
+    # get extra count = staff count - staff count of no expansion s_1
+    # note that annual staff increase rate = scale up factor - 1
+    extra_staff = yearly_hr_count.drop(
+        yearly_hr_count[yearly_hr_count.year.isin(range(2010, 2024))].index, axis=0
+    ).reset_index(drop=True)
+    staff_increase_rate = extra_staff.copy()
+    staff_2024 = pd.DataFrame(extra_staff.loc[(extra_staff.year == 2024)
+                                              & (extra_staff.draw == 's_0'), :])
+    for i in extra_staff.index:
+        extra_staff.iloc[i, 2:] = extra_staff.iloc[i, 2:] - staff_2024.iloc[0, 2:]
+        staff_increase_rate.iloc[i, 2:] = (extra_staff.iloc[i, 2:] / staff_2024.iloc[0, 2:])
+        # checked that this is slightly different with hr_increase_rates from preparation script, due the calculation
+        # process are not the same
+
+    # check total cost calculated is increased as expected
+    # also checked (in excel) that the yearly_hr_count (s_0 and s_1) are expanded as expected
+    years = range(2025, the_target_period[1].year + 1)
+    budget_growth_rate = 0.042  # 0.042, 0.058, 0.026
+    for s in param_names[1:]:
+        assert (abs(
+            total_cost.loc[(total_cost.year == 2034) & (total_cost.draw == s), 'all_cadres'].values[0] -
+            (1 + budget_growth_rate) ** len(years) * total_cost.loc[
+                (total_cost.year == 2025) & (total_cost.draw == 's_0'), 'all_cadres'].values[0]
+        ) < 1e-6).all()
+
+    # Absolute Number of Deaths and DALYs and Services
+    num_deaths = extract_results(
+        results_folder,
+        module='tlo.methods.demography',
+        key='death',
+        custom_generate_series=get_num_deaths,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_yearly = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys_yearly,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_by_one_cause_yearly = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys_by_one_cause_yearly,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_by_cause = extract_results(
+        results_folder,
+        module="tlo.methods.healthburden",
+        key="dalys_by_wealth_stacked_by_age_and_time",
+        custom_generate_series=get_num_dalys_by_cause,
+        do_scaling=True,
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_by_cause_group = extract_results(
+        results_folder,
+        module="tlo.methods.healthburden",
+        key="dalys_by_wealth_stacked_by_age_and_time",
+        custom_generate_series=get_num_dalys_by_cause_group,
+        do_scaling=True,
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments_group = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments_group,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments_total = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments_total,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_never_ran_treatments_total = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments_total,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_never_ran_treatments = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    # get total service demand
+    assert len(num_services) == len(num_never_ran_services) == 1
+    assert (num_services.columns == num_never_ran_services.columns).all()
+    num_services_demand = num_services + num_never_ran_services
+    # ratio_services = num_services / num_services_demand
+
+    assert (num_appts.columns == num_never_ran_appts.columns).all()
+    num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
+    num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
+    assert (num_appts.index == num_never_ran_appts.index).all()
+    num_appts_demand = num_appts + num_never_ran_appts
+
+    hcw_time_usage = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
+        custom_generate_series=get_frac_of_hcw_time_used,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # get absolute numbers for scenarios
+    # sort the scenarios according to their DALYs values, in ascending order
+    num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean')
+    num_dalys_summarized.to_csv(output_folder / 'num_dalys_summarized.csv')
+    num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_dalys_by_cause_summarized.to_csv(output_folder / 'num_dalys_by_cause_summarized.csv')
+    num_dalys_by_cause_group_summarized = summarize(num_dalys_by_cause_group, only_mean=True
+                                                    ).T.reindex(param_names).reindex(num_dalys_summarized.index)
+    num_dalys_by_cause_group_summarized.to_csv(output_folder / 'num_dalys_by_cause_group_summarized.csv')
+
+    num_dalys_yearly_summarized = (summarize(num_dalys_yearly)
+                                   .stack([0, 1])
+                                   .rename_axis(['year', 'scenario', 'stat'])
+                                   .reset_index(name='count'))
+    num_dalys_yearly_summarized.to_csv(output_folder / 'num_dalys_yearly_summarized.csv')
+
+    num_dalys_by_one_cause_yearly_summarized = (summarize(num_dalys_by_one_cause_yearly)
+                                                .stack([0, 1])
+                                                .rename_axis(['year', 'scenario', 'stat'])
+                                                .reset_index(name='count'))
+    name_of_data = f'num_dalys_by_{the_cause}_yearly_summarized.csv'
+    num_dalys_by_one_cause_yearly_summarized.to_csv(output_folder / name_of_data)
+
+    # num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+
+    num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_services_summarized.to_csv(output_folder / 'num_services_summarized.csv')
+    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index).fillna(0.0)
+    num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(
+        param_names).reindex(num_dalys_summarized.index).fillna(0.0)
+    num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_treatments_group_summarized = summarize(num_treatments_group, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_treatments_group_summarized.to_csv(output_folder / 'num_treatments_area_summarized.csv')
+    # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+
+    num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    hcw_time_usage_summarized.columns = [col.replace('OfficerType=', '').replace('FacilityLevel=', '')
+                                         for col in hcw_time_usage_summarized.columns]
+    hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True)
+
+    # get relative numbers for scenarios, compared to no_expansion scenario: s_0
+    num_services_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_services_increased_percent = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_0',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_services_increased_percent.to_csv(output_folder / 'num_services_increased_percent.csv')
+
+    num_deaths_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_deaths_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_0',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_dalys_averted.to_csv(output_folder / 'num_dalys_averted.csv')
+
+    num_dalys_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_0',
+                scaled=True
+            )
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_dalys_averted_percent.to_csv(output_folder / 'num_dalys_averted_percent.csv')
+
+    num_dalys_by_cause_averted = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_by_cause_group_averted = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause_group,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_dalys_by_cause_group_averted.to_csv(output_folder / 'num_dalys_by_cause_area_averted.csv')
+
+    num_dalys_by_cause_averted_percent = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_0',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_dalys_by_cause_group_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause_group,
+    #         comparison='s_0',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False)
+    # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
+    num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_22', :].sort_values(
+        ascending=False)
+    # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
+    #     ascending=False)
+
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_appts_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_appts,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_never_ran_appts_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_appts,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_never_ran_treatments_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_treatments,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_appts_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_appts,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_treatments_increased.to_csv(output_folder / 'num_treatments_type_increased.csv')
+
+    num_treatments_group_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments_group,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_treatments_group_increased.to_csv(output_folder / 'num_treatments_area_increased.csv')
+
+    # num_treatments_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_treatments,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_total_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_treatments_total_increased_percent = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_0',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    num_treatments_total_increased_percent.to_csv(output_folder / 'num_treatments_total_increased_%.csv')
+
+    # service_ratio_increased = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1')
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # Check that when we sum across the causes/appt types,
+    # we get the same total as calculated when we didn't split by cause/appt type.
+    assert (
+        (num_appts_increased.sum(axis=1).sort_index()
+         - num_services_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_dalys_by_cause_averted.sum(axis=1).sort_index()
+         - num_dalys_averted['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_dalys_by_cause_group_averted.sum(axis=1).sort_index()
+         - num_dalys_averted['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_treatments_increased.sum(axis=1).sort_index()
+         - num_treatments_total_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_treatments_group_increased.sum(axis=1).sort_index()
+         - num_treatments_total_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    # get time used by services delivered
+    def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        # assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() -> ('2', 'Tomography')
+        # replace Tomography from level 2 to level 3
+        count_df.loc[:, ('3', 'Tomography')] += count_df.loc[:, ('2', 'Tomography')]
+        count_df.loc[:, ('2', 'Tomography')] = 0
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time gap
+        use = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in use.index:
+            for j in use.columns:
+                use.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with descending order of DALYs averted
+        use = use.reindex(num_dalys_summarized.index)
+
+        # add columns 'total' and 'other'
+        use['all'] = use.sum(axis=1)
+        use['Other'] = use[['Dental', 'Laboratory', 'Mental', 'Radiography']].sum(axis=1)
+        use.drop(columns=['Dental', 'Laboratory', 'Mental', 'Radiography'], inplace=True)
+
+        use_increased = use.subtract(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
+
+        use_increase_percent = use.subtract(use.loc['s_0', :], axis=1
+                                            ).divide(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
+
+        return use, use_increased, use_increase_percent
+
+    hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0]
+    hcw_time_increased_by_cadre = hcw_time_or_cost_used(time_cost_df=appt_time)[1]
+    hcw_time_increased_by_cadre_percent = hcw_time_or_cost_used(time_cost_df=appt_time)[2]
+    hcw_time_increased_by_cadre.to_csv(output_folder / 'hcw_time_increased_by_cadre.csv')
+    hcw_time_increased_by_cadre_percent.to_csv(output_folder / 'hcw_time_increased_by_cadre_percent.csv')
+
+    # get HCW time and cost needed to run the never run appts
+    def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time gap
+        gap = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in gap.index:
+            for j in gap.columns:
+                gap.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with
+        gap = gap.reindex(num_dalys_summarized.index)
+
+        return gap
+
+    hcw_time_gap = hcw_time_or_cost_gap(appt_time)
+    hcw_cost_gap = hcw_time_or_cost_gap(appt_cost)
+
+    # hcw time demand to meet ran + never ran services
+    # assert (hcw_time_used.index == hcw_time_gap.index).all()
+    # assert (hcw_time_used.columns == hcw_time_gap.columns).all()
+    # hcw_time_demand = hcw_time_used + hcw_time_gap
+
+    # cost gap proportions of cadres within each scenario
+    hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns)
+    for i in hcw_cost_gap_percent.index:
+        hcw_cost_gap_percent.loc[i, :] = hcw_cost_gap.loc[i, :] / hcw_cost_gap.loc[i, :].sum()
+    # add a column of 'other' to sum up other cadres
+    hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+    hcw_cost_gap['Other'] = hcw_cost_gap[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+    hcw_cost_gap.to_csv(output_folder / 'hcw_cost_gap.csv')
+    hcw_cost_gap_percent.to_csv(output_folder / 'hcw_cost_gap_percent.csv')
+
+    # # store the proportions of no expansion scenario as the "best" scenario that is to be tested
+    # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[
+    #     's_1', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']
+    # ].copy()  # [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]
+
+    # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery)
+    # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint)
+    # in never run set
+    # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios
+    # then the next question is what proportion for C and P and any indication for better extra budget allocation
+    # so that never ran appts will be reduced and DALYs could be averted further?
+    def get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'], appts_count_all=num_never_ran_appts_by_level_summarized
+    ):
+        # find the appts that need all cadres in cadres_to_find
+        def find_never_ran_appts_that_need_specific_cadres():
+            appts_to_find = []
+            _common_cols = appt_time.columns.intersection(appts_count_all.columns)
+            # already checked above that columns in the latter that are not in the former have 0 count
+            for col in _common_cols:
+                if ((appt_time.loc[cadres_to_find, col] > 0).all()
+                    and (appt_time.loc[~appt_time.index.isin(cadres_to_find), col] == 0).all()):
+                    appts_to_find.append(col)
+
+            return appts_to_find
+
+        # counts and count proportions of all never ran
+        _appts = find_never_ran_appts_that_need_specific_cadres()
+        _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum()
+                   .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                   .reindex(num_dalys_summarized.index))
+        _counts_all = (appts_count_all.groupby(level=1, axis=1).sum()
+                       .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                       .reindex(num_dalys_summarized.index))
+        assert (_counts.index == _counts_all.index).all()
+        _proportions = _counts / _counts_all[_counts.columns]
+
+        # hcw time gap and proportions
+        _time_gap = hcw_time_or_cost_gap(appt_time, appts_count_all[_appts])
+        assert (_time_gap.index == hcw_time_gap.index).all()
+        _time_gap_proportions = _time_gap / hcw_time_gap[_time_gap.columns]
+
+        # hcw cost gap and proportions
+        _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts])
+        assert (_cost_gap.index == hcw_cost_gap.index).all()
+        _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns]
+        # cost gap distribution among cadres
+        _cost_gap_percent = pd.DataFrame(index=_cost_gap.index, columns=_cost_gap.columns)
+        for i in _cost_gap_percent.index:
+            _cost_gap_percent.loc[i, :] = _cost_gap.loc[i, :] / _cost_gap.loc[i, :].sum()
+
+        # if sum up all appt types/cadres
+        _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
+        _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
+        _time_gap_proportions_total = _time_gap.sum(axis=1) / hcw_time_gap.sum(axis=1)
+
+        return (_proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent,
+                _time_gap_proportions_total, _time_gap)
+
+    never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'])
+    never_ran_appts_info_that_need_CN = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_NP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_C = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical'])
+    never_ran_appts_info_that_need_N = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Pharmacy'])
+
+    # cost/time proportions within never ran appts, in total of all cadres
+    p_cost = pd.DataFrame(index=num_services_summarized.index)
+    p_cost['C & P & NM'] = never_ran_appts_info_that_need_CNP[1]
+    p_cost['C & P'] = never_ran_appts_info_that_need_CP[1]
+    p_cost['C & NM'] = never_ran_appts_info_that_need_CN[1]
+    p_cost['P & NM'] = never_ran_appts_info_that_need_NP[1]
+    p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1]
+    p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1]
+    p_cost['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[1]
+    p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
+
+    p_time = pd.DataFrame(index=num_services_summarized.index)
+    p_time['C & P & NM'] = never_ran_appts_info_that_need_CNP[4]
+    p_time['C & P'] = never_ran_appts_info_that_need_CP[4]
+    p_time['C & NM'] = never_ran_appts_info_that_need_CN[4]
+    p_time['P & NM'] = never_ran_appts_info_that_need_NP[4]
+    p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4]
+    p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4]
+    p_time['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[4]
+    p_time['Other cases'] = 1 - p_time[p_time.columns[0:7]].sum(axis=1)
+
+    # absolute cost/time gap within never ran appts
+    a_cost = pd.DataFrame(index=num_services_summarized.index)
+    a_cost['C & P & NM'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
+    a_cost['C & P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
+    a_cost['C & NM'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
+    a_cost['P & NM'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
+    a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1)
+    a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1)
+    a_cost['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[2].sum(axis=1)
+    a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1)
+
+    a_time = pd.DataFrame(index=num_services_summarized.index)
+    a_time['C & P & NM'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1)
+    a_time['C & P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1)
+    a_time['C & NM'] = never_ran_appts_info_that_need_CN[5].sum(axis=1)
+    a_time['P & NM'] = never_ran_appts_info_that_need_NP[5].sum(axis=1)
+    a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1)
+    a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1)
+    a_time['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[5].sum(axis=1)
+    a_time['Other cases'] = hcw_time_gap.sum(axis=1) - a_time.sum(axis=1)
+
+    # appts count proportions within never ran appts, in total of all cadres
+    p_count = pd.DataFrame(index=num_services_summarized.index)
+    p_count['C & P & NM'] = never_ran_appts_info_that_need_CNP[0]
+    p_count['C & P'] = never_ran_appts_info_that_need_CP[0]
+    p_count['C & NM'] = never_ran_appts_info_that_need_CN[0]
+    p_count['P & NM'] = never_ran_appts_info_that_need_NP[0]
+    p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0]
+    p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0]
+    p_count['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[0]
+    p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1)
+
+    # define color for the cadres combinations above
+    cadre_comb_color = {
+        'C & P & NM': 'royalblue',
+        'C & P': 'turquoise',
+        'C & NM': 'gold',
+        'P & NM': 'yellowgreen',
+        'Clinical (C)': 'mediumpurple',
+        'Pharmacy (P)': 'limegreen',
+        'Nursing_and_Midwifery (NM)': 'pink',
+        'Other cases': 'gray',
+    }
+
+    # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
+
+    # hcw time by cadre and treatment: draw = 22: C + N + P vs no expansion, draw = 11, C + P vs no expansion
+    # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
+    # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
+
+    # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative
+    # #  (maybe only due to statistical variation; relative difference to s_1 are close to 0%),
+    # #  thus CE does not make sense.
+    # # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # for i in ROI.index:
+    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
+    # #     CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    # #     CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    # #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
+
+    # prepare colors for plots
+    appt_color = {
+        appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
+    }
+    treatment_color = {
+        treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
+        for treatment in num_treatments_summarized.columns
+    }
+    cause_color = {
+        cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
+        for cause in num_dalys_by_cause_summarized.columns
+    }
+    officer_category_color = {
+        'Clinical': 'blue',
+        'DCSA': 'orange',
+        'Nursing_and_Midwifery': 'red',
+        'Pharmacy': 'green',
+        'Dental': 'purple',
+        'Laboratory': 'orchid',
+        'Mental': 'plum',
+        'Nutrition': 'thistle',
+        'Radiography': 'lightgray',
+        'Other': 'gray'
+    }
+    # get scenario color
+    # scenario_groups = scenario_grouping_coloring(by='effect')
+    # scenario_groups = scenario_grouping_coloring(by='allocation_alt')
+    scenario_groups = scenario_grouping_coloring(by='allocation')
+    scenario_color = {}
+    for s in param_names:
+        for k in scenario_groups[1].keys():
+            if s in scenario_groups[0][k]:
+                scenario_color[s] = scenario_groups[1][k]
+
+    # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
+    # percentage of DALYs averted decides the color of that scatter point
+    # prepare extra budget allocation
+    extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index)
+    extra_budget_allocation['Other'] = extra_budget_allocation[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+    # prepare hrh increase rates in the same format for regression analysis
+    increase_rate_avg_exp = avg_increase_rate_exp.T.reindex(num_dalys_summarized.index)
+    increase_rate_avg_exp['Other'] = increase_rate_avg_exp['Dental'].copy()
+
+    name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
+    # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment (avg. HCW increase rate), {target_period()}'
+    heat_data = pd.merge(num_dalys_averted_percent['mean'],
+                         extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         # increase_rate_avg_exp[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         left_index=True, right_index=True, how='inner')
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
+                     alpha=0.8, marker='o', #s=heat_data['mean'] * 2000, c=colors,
+                     c=heat_data['mean'] * 100, cmap='viridis'
+                     )
+    # plot lines from the best point to three axes panes
+    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+              [0, heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+              [0, heat_data['Pharmacy'][0]],
+              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.plot3D([0, heat_data['Clinical'][0]],
+              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre', fontsize='small')
+    # ax.set_xlabel('Avg. annual increase rate of \nClinical cadre', fontsize='small')
+    ax.set_ylabel('Pharmacy cadre', fontsize='small')
+    #ax.invert_xaxis()
+    ax.invert_yaxis()
+    ax.set_zlabel('Nursing and Midwifery cadre', fontsize='small')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.1, label='DALYs averted %')
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig = plt.figure()
+    # ax = fig.add_subplot(111, projection='3d')
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o',
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # ax.set_zlabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2],
+    #            alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0],
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Services increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Treatments increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Services ratio increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Service delivery ratio increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # do some linear regression to see the isolated effects of individual cadres and combined effects of C, N, P cadres
+    outcome_data = num_dalys_averted_percent['mean']
+    # outcome_data = num_services_increased_percent['mean']
+    # outcome_data = num_treatments_total_increased_percent['mean']
+    regression_data = pd.merge(outcome_data,
+                               increase_rate_avg_exp,
+                               # extra_budget_allocation,
+                               left_index=True, right_index=True, how='inner')
+    # regression_data.drop(index='s_2', inplace=True)
+    # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+    #                              * regression_data['Nursing_and_Midwifery'])
+    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+    regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
+    predictor = regression_data[regression_data.columns[1:]]  # .drop(index=['s_*', 's_2', 's_1'], axis=0)
+    outcome = regression_data['mean']  # .drop(index=['s_*', 's_2', 's_1'], axis=0)  # regression model without "optimal" data
+    predictor = sm.add_constant(predictor)  # add constant term
+    est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    print(est.summary())
+
+    # calculate the predicted DALYs based on the regression results
+    for i in regression_data.index:
+        regression_data.loc[i, 'predicted'] = (
+            regression_data.loc[i, ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']].dot(
+                est.params[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']]
+            )
+            + est.params['const']
+        )
+
+    # plot mean and predicted DALYs from regression analysis
+    # name_of_plot = f'DALYs-averted simulated vs predicted from linear regression on extra budget allocation'
+    name_of_plot = 'DALYs-averted simulated vs predicted from linear regression on HRH increase rate'
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot = regression_data[['mean', 'predicted']] * 100
+    data_to_plot['strategy'] = data_to_plot.index
+    data_to_plot.rename(columns={'mean': 'simulated'}, inplace=True)
+    data_to_plot.plot.scatter(x='strategy', y='simulated', color='blue', label='simulated', ax=ax)
+    data_to_plot.plot.scatter(x='strategy', y='predicted', color='orange', label='predicted', ax=ax)
+    ax.set_ylabel('DALYs averted %', fontsize='small')
+    ax.set(xlabel=None)
+    ax.grid(axis="both")
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='upper right')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # do regression on DALYs averted % and Euclidean distance to "gap" solution
+    increase_rate_distance = increase_rate_avg_exp[
+        ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']
+    ].copy()
+    # increase_rate_distance = extra_budget_allocation[
+    #     ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']
+    # ].copy()
+    for idx in increase_rate_distance.index:
+        increase_rate_distance.loc[idx, 'Euclidean_distance'] = np.linalg.norm(
+            increase_rate_distance.loc[idx, ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']] -
+            increase_rate_distance.loc['s_2', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']]
+        )
+    regression_data_1 = pd.merge(outcome_data,
+                                 increase_rate_distance['Euclidean_distance'],
+                                 left_index=True, right_index=True, how='inner')
+    predictor = regression_data_1['Euclidean_distance'].drop(index=['s_2'], axis=0)
+    outcome = regression_data_1['mean'].drop(index=['s_2'], axis=0)  # regression model without "optimal" data
+    # calculate pearson correlation
+    print(predictor.corr(outcome))
+    predictor = sm.add_constant(predictor)  # add constant term
+    est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    print(est.summary())
+
+    # calculate the predicted DALYs based on the regression results
+    for i in regression_data_1.index:
+        regression_data_1.loc[i, 'predicted'] = (
+            regression_data_1.loc[i, 'Euclidean_distance'] * est.params['Euclidean_distance'] + est.params['const']
+        )
+
+    # plot mean and predicted DALYs from regression analysis
+    name_of_plot = 'DALYs-averted simulated vs predicted from linear regression on Euclidean distance'
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot = regression_data_1[['mean', 'predicted']] * 100
+    data_to_plot['strategy'] = data_to_plot.index
+    data_to_plot.rename(columns={'mean': 'simulated'}, inplace=True)
+    data_to_plot.plot.scatter(x='strategy', y='simulated', color='blue', label='simulated', ax=ax)
+    data_to_plot.plot.scatter(x='strategy', y='predicted', color='orange', label='predicted', ax=ax)
+    ax.set_ylabel('DALYs averted %', fontsize='small')
+    ax.set(xlabel=None)
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='upper right')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # todo: could do regression analysis of DALYs averted and Services increased
+
+    # # do anova analysis to test the difference of scenario groups
+    # def anova_oneway(df=num_dalys_averted_percent):
+    #     best = df.loc[list(scenario_groups['C + P + D/NM/O/None']), 'mean']
+    #     middle_C = df.loc[list(scenario_groups['C + D/NM/O/None']), 'mean']
+    #     middle_P = df.loc[list(scenario_groups['P + D/NM/O/None']), 'mean']
+    #     worst = df.loc[df.index.isin(scenario_groups['D/NM/O/None']), 'mean']
+    #
+    #     return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
+    #                                   groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
+
+    # anova_dalys = anova_oneway()
+    # anova_services = anova_oneway(num_services_increased_percent)
+    # anova_treatments = anova_oneway(num_treatments_total_increased_percent)
+
+    # plot absolute numbers for scenarios
+
+    # name_of_plot = f'Deaths, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Service demand, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Service delivery ratio, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ratio_service_summarized)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('services delivered / demand')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # plot yearly DALYs for s_0, s_1, s_2, s_22 scenarios
+    name_of_plot = f'Yearly DALYs, {target_period()}'
+    fig, ax = plt.subplots(figsize=(9, 6))
+    scenarios_to_plot = ['s_0', 's_1', 's_22', 's_2']
+    scenarios_color = {'s_0': 'red', 's_1': 'yellow', 's_22': 'blue', 's_2': 'green'}
+    for s in scenarios_to_plot:
+        data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :]
+                .drop(columns='scenario')
+                .pivot(index='year', columns='stat')
+                .droplevel(0, axis=1))
+        ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=scenarios_color[s], linewidth=2)
+        ax.fill_between(data.index.to_numpy(),
+                        (data['lower'] / 1e6).to_numpy(),
+                        (data['upper'] / 1e6).to_numpy(),
+                        color=scenarios_color[s],
+                        alpha=0.2)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    ax.set_xticks(data.index)
+    ax.set_xticklabels(data.index, rotation=90)
+    legend_labels = [substitute_labels[v] for v in scenarios_to_plot]
+    legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                    color=scenarios_color[v]) for v in scenarios_to_plot]
+    ax.legend(legend_handles, legend_labels,
+              loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+              title='selected scenarios')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # plot yearly DALYs for s_0, s_1, s_2, s_22 scenarios
+    name_of_plot = f'Yearly DALYs by {the_cause}, {target_period()}'  # TB (non-AIDS)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    scenarios_to_plot = ['s_0', 's_1', 's_22', 's_2']
+    scenarios_color = {'s_0': 'red', 's_1': 'yellow', 's_22': 'blue', 's_2': 'green'}
+    for s in scenarios_to_plot:
+        data = (num_dalys_by_one_cause_yearly_summarized.loc[num_dalys_by_one_cause_yearly_summarized.scenario == s, :]
+                .drop(columns='scenario')
+                .pivot(index='year', columns='stat')
+                .droplevel(0, axis=1))
+        ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=scenarios_color[s], linewidth=2)
+        ax.fill_between(data.index.to_numpy(),
+                        (data['lower'] / 1e6).to_numpy(),
+                        (data['upper'] / 1e6).to_numpy(),
+                        color=scenarios_color[s],
+                        alpha=0.2)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    ax.set_xticks(data.index)
+    ax.set_xticklabels(data.index, rotation=90)
+    legend_labels = [substitute_labels[v] for v in scenarios_to_plot]
+    legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                    color=scenarios_color[v]) for v in scenarios_to_plot]
+    ax.legend(legend_handles, legend_labels,
+              loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+              title='selected scenarios')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
+    # best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
+    # name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum(
+    #         axis=1)
+    #     ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s])
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Thousands)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services by appointment type, {target_period()}'
+    # num_appts_summarized_in_millions = num_appts_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_services_summarized['mean'] - num_services_summarized['lower']).values,
+    #     (num_services_summarized['upper'] - num_services_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services demand by appointment type, {target_period()}'
+    # num_appts_demand_to_plot = num_appts_demand_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_service_demand_summarized['mean'] - num_service_demand_summarized['lower']).values,
+    #     (num_service_demand_summarized['upper'] - num_service_demand_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_demand_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_service_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_demand_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Never ran services by appointment type, {target_period()}'
+    num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6
+    yerr_services = np.array([
+        (num_never_ran_services_summarized['mean'] - num_never_ran_services_summarized['lower']).values,
+        (num_never_ran_services_summarized['upper'] - num_never_ran_services_summarized['mean']).values,
+    ])/1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Total services demand by appointment type, {target_period()}'
+    data_to_plot = num_appts_demand_summarized / 1e6
+    yerr_services = np.array([
+        (num_services_demand_summarized['mean'] - num_services_demand_summarized['lower']).values,
+        (num_services_demand_summarized['upper'] - num_services_demand_summarized['mean']).values,
+    ])/1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)), num_services_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'Services by treatment type, {target_period()}'
+    # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_summarized['mean'] - num_treatments_total_summarized['lower']).values,
+    #     (num_treatments_total_summarized['upper'] - num_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services by treatment type, {target_period()}'
+    # num_never_ran_treatments_summarized_in_millions = num_never_ran_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_never_ran_treatments_total_summarized['mean'] - num_never_ran_treatments_total_summarized['lower']).values,
+    #     (num_never_ran_treatments_total_summarized['upper'] - num_never_ran_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_never_ran_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_never_ran_treatments_total_summarized['mean'].values / 1e6,
+    #             yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
+    # total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_staff_to_plot.pop('DCSA')
+    # total_staff_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}'
+    data_to_plot = (hcw_time_used.drop(columns='all') / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Minutes in Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
+    hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Minutes in Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW cost gap by cadre to deliver never ran appointments, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    hcw_cost_gap_to_plot = (hcw_cost_gap[cadres_to_plot] / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa = hcw_cost_gap_to_plot.pop('DCSA')
+    hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('USD in Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
+    xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index]
+    xtick_colors = [scenario_color[v] for v in hcw_cost_gap_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Count proportions of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = p_count * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average proportions of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW cost proportions of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = p_cost * 100
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average proportions of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Time proportions of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = p_time * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average proportions of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW cost of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = a_cost / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylabel('USD in millions')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average cost of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Time distribution of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = a_time / 1e6
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylabel('minutes in millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average cost of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW cost gap proportion by cadre to deliver never ran appointments, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
+    fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    hcw_cost_gap_percent_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    #ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
+    xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
+    xtick_colors = [scenario_color[v] for v in hcw_cost_gap_percent_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
+    # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
+    # data_to_plot = never_ran_appts_info_that_need_CNP[3][cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
+    # data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}'
+    # data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    # total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_cost_to_plot.pop('DCSA')
+    # total_cost_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs by cause, {target_period()}'
+    # num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
+    # yerr_dalys = np.array([
+    #     (num_dalys_summarized['mean'] - num_dalys_summarized['lower']).values,
+    #     (num_dalys_summarized['upper'] - num_dalys_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # fig.subplots_adjust(right=0.7)
+    # ax.legend(
+    #     loc="center left",
+    #     bbox_to_anchor=(0.750, 0.6),
+    #     bbox_transform=fig.transFigure,
+    #     title='Cause of death or injury',
+    #     title_fontsize='x-small',
+    #     fontsize='x-small',
+    #     reverse=True,
+    #     ncol=1
+    # )
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # plot relative numbers for scenarios
+    name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
+    ax.set_title(name_of_plot, fontsize='medium')
+    ax.set_ylabel('DALYs averted in Millions', fontsize='medium')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='medium')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # todo: plot Deaths averted by cause
+
+    # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('Percentage')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
+    # extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(['s_1']) / 1e3
+    # column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA')
+    # extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
+    extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
+        num_dalys_summarized.index).drop(index='s_0') / 1e6
+    column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
+    extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # # name_of_plot = f'Time used increased by cadre and treatment: C + NM + P vs no expansion, {target_period()}'
+    # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
+    # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
+    # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
+    # data_to_plot['total'] = data_to_plot.sum(axis=1)
+    # data_to_plot.sort_values(by='total', inplace=True, ascending=False)
+    # data_to_plot.drop('total', axis=1, inplace=True)
+    # data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
+    #                              'DCSA', 'Laboratory', 'Mental', 'Radiography']]
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time used increased by treatment and cadre: C + NM + P vs no expansion, {target_period()}'
+    # # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}'
+    # data_to_plot = data_to_plot.T
+    # data_to_plot = data_to_plot.add_suffix('*')
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
+    # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
+    # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
+    fig, ax = plt.subplots()
+    data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Treatment')
+    ax.set_xticklabels(data_to_plot.index, rotation=90)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+        ':', '').replace('\n', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
+    fig, ax = plt.subplots()
+    data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Treatment')
+    ax.set_xticklabels(data_to_plot.index, rotation=90)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+        ':', '').replace('\n', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
+    # num_appts_increased_in_millions = num_appts_increased / 1e6
+    # yerr_services = np.array([
+    #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = num_treatments_increased / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+    #     (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    # ]) / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, width=0.8, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # move bars to new xticks
+    new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)}
+    ax.set_xticks(list(new_xticks.keys()))
+    ax.set_xticklabels(list(new_xticks.values()))
+    for i, rect in enumerate(ax.patches):
+        # Shift the bars based on their new position
+        rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2)
+    ax.set_xlim(-1, len(data_to_plot.index) + 1)
+
+    ax.set_ylabel('Millions', fontsize='medium')
+    ax.set(xlabel=None)
+    ax.grid(axis='y')
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium')  # re-label scenarios
+
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.3), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot, fontsize='medium')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Services increased by treatment area vs no extra budget allocation, {target_period()}'
+    data_to_plot = num_treatments_group_increased / 1e6
+    data_to_plot = data_to_plot[
+        ['RMNCH', 'HIV/AIDS', 'Malaria', 'TB (non-AIDS)', 'NCDs', 'Transport Injuries']
+    ]
+    # yerr_services = np.array([
+    #     (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+    #     (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    # ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_group_color, rot=0, width=0.8, ax=ax)
+    # move bars to new xticks
+    new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)}
+    ax.set_xticks(list(new_xticks.keys()))
+    ax.set_xticklabels(list(new_xticks.values()))
+    for i, rect in enumerate(ax.patches):
+        # Shift the bars based on their new position
+        rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2)
+    ax.set_xlim(-1, len(data_to_plot.index) + 1)
+
+    # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # add annotation
+    assert (data_to_plot.index == num_treatments_total_increased_percent.index).all()
+    assert (data_to_plot.index == num_treatments_total_increased.index).all()
+    for xpos, ypos, text1 in zip(ax.get_xticks(),
+                                               (num_treatments_total_increased['upper'] / 1e6).values,
+                                               num_treatments_total_increased_percent['mean'].values):
+        text = f"{int(round(text1 * 100, 2))}%"  # \n{[round(text2, 2),round(text3, 2)]}"
+        ax.text(xpos, ypos + 0.05, text, horizontalalignment='center', fontsize='small')
+
+    ax.set_ylabel('Services increased in Millions', fontsize='medium')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='medium')
+    ax.grid(axis="y")
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium')  # re-label scenarios
+
+    plt.legend(title='Treatment area', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot, fontsize='medium')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_increased_by_treatment_type / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.set_ylabel('Million minutes', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by cadre vs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_increased_by_cadre.drop(columns='all') / 1e9
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, width=0.8, alpha=0.8, ax=ax)
+    # move bars to new xticks
+    new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)}
+    ax.set_xticks(list(new_xticks.keys()))
+    ax.set_xticklabels(list(new_xticks.values()))
+    for i, rect in enumerate(ax.patches):
+        # Shift the bars based on their new position
+        rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2)
+    ax.set_xlim(-1, len(data_to_plot.index) + 1)
+    # add annotation
+    assert (data_to_plot.index == hcw_time_increased_by_cadre_percent.index).all()
+    for xpos, ypos, text1 in zip(ax.get_xticks(),
+                                 (hcw_time_increased_by_cadre['all'] / 1e9).values,
+                                 hcw_time_increased_by_cadre_percent['all'].values):
+        text = f"{int(round(text1 * 100, 2))}%"
+        ax.text(xpos, ypos + 0.05, text, horizontalalignment='center', fontsize='small')
+    ax.set_ylabel('Billions minutes', fontsize='medium')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='medium')
+    ax.grid(axis="y")
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium')  # re-label scenarios
+
+    plt.legend(title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot, fontsize='medium')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
+    num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
+    # yerr_dalys = np.array([
+    #     (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
+    #     (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
+    # ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, width=0.8, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
+    # move bars to new xticks
+    new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)}
+    ax.set_xticks(list(new_xticks.keys()))
+    ax.set_xticklabels(list(new_xticks.values()))
+    for i, rect in enumerate(ax.patches):
+        # Shift the bars based on their new position
+        rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2)
+    ax.set_xlim(-1, len(data_to_plot.index) + 1)
+
+    ax.set_ylabel('Millions', fontsize='medium')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='medium')
+    ax.grid(axis='y')
+
+    xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index]
+    # xtick_colors = [scenario_color[v] for v in num_dalys_by_cause_averted.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium')  # re-label scenarios
+
+    fig.subplots_adjust(right=0.7)
+    ax.legend(
+        loc="center left",
+        bbox_to_anchor=(0.750, 0.6),
+        bbox_transform=fig.transFigure,
+        title='Cause of death or injury',
+        title_fontsize='x-small',
+        fontsize='x-small',
+        ncol=1,
+        reverse=True
+    )
+    plt.title(name_of_plot, fontsize='medium')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs by cause area averted vs no extra budget allocation, {target_period()}'
+    data_to_plot = num_dalys_by_cause_group_averted / 1e6
+    data_to_plot = data_to_plot[
+        ['RMNCH', 'HIV/AIDS', 'Malaria', 'TB (non-AIDS)', 'NCDs', 'Transport Injuries']
+    ]
+    # yerr_dalys = np.array([
+    #     (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
+    #     (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
+    # ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=cause_group_color, rot=0, width=0.8, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
+    # move bars to new xticks
+    new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)}
+    ax.set_xticks(list(new_xticks.keys()))
+    ax.set_xticklabels(list(new_xticks.values()))
+    for i, rect in enumerate(ax.patches):
+        # Shift the bars based on their new position
+        rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2)
+    ax.set_xlim(-1, len(data_to_plot.index) + 1)
+
+    ax.set_ylabel('DALYs averted in Millions', fontsize='medium')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='medium')
+    ax.grid(axis="y")
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    # for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+    #     xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    fig.subplots_adjust(right=0.7)
+    ax.legend(
+        # bbox_transform=fig.transFigure,
+        title='Cause of death or injury',
+        title_fontsize='small',
+        fontsize='small',
+        ncol=1,
+        reverse=True
+    )
+    plt.title(name_of_plot, fontsize='medium')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # plot ROI and CE for all expansion scenarios
+
+    # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ROI)
+    # ax.set_title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Cost per DALY averted, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(CE)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('USD dollars')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # todo
+    # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \
+    # (around the best possible extra budget allocation scenario)?
+    # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \
+    # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \
+    # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered.
+    # To plot time series of staff and budget in the target period to show \
+    # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
+    # Before submit a run, merge in the remote master.
+    # Think about a measure of Universal Health Service Coverage for the scenarios?
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("results_folder", type=Path)  # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z
+    args = parser.parse_args()
+
+    # Produce results for short-term analysis: 5 years
+
+    # # 2015-2019, before change, incl. mode, hr expansion, etc.
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31))
+    # )
+    #
+    # # 2020-2024
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
+    # )
+
+    # Produce results for long-term analysis: 10 years
+    # 2020-2029
+    apply(
+        results_folder=args.results_folder,
+        output_folder=args.results_folder,
+        resourcefilepath=Path('./resources'),
+        the_target_period=(Date(2025, 1, 1), Date(2034, 12, 31))
+    )
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
new file mode 100644
index 0000000000..959e96104f
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -0,0 +1,345 @@
+"""
+We calculate the salary cost of current and funded plus HCW.
+"""
+import itertools
+# import pickle
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+resourcefilepath = Path('./resources')
+
+mfl = pd.read_csv(resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv')
+
+hr_salary = pd.read_csv(resourcefilepath /
+                        'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False)
+hr_salary_per_level = pd.read_excel(resourcefilepath /
+                                    'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources')
+# as of 2019
+hr_current = pd.read_csv(resourcefilepath /
+                         'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+hr_established = pd.read_csv(resourcefilepath /
+                             'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv')
+# for 2020-2024
+historical_scaling = pd.read_excel(resourcefilepath /
+                                   'healthsystem' / 'human_resources' / 'scaling_capabilities' /
+                                   'ResourceFile_dynamic_HR_scaling.xlsx', sheet_name='historical_scaling'
+                                   ).set_index('year')
+integrated_historical_scaling = (
+    historical_scaling.loc[2020, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2021, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2022, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2023, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2024, 'dynamic_HR_scaling_factor']
+)
+
+# to get minute salary per cadre per level
+Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg(
+    {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index()
+Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count']
+
+# the hr salary by minute and facility id, as of 2019
+Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer')
+Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
+# store the minute salary by cadre and level
+Minute_Salary_by_Cadre_Level = Minute_Salary[
+    ['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']
+].copy().fillna(0.0)
+Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge(
+    mfl[['Facility_Level', 'Facility_ID']], on=['Facility_Level'], how='outer'
+)
+Minute_Salary.drop(columns=['Facility_Level'], inplace=True)
+Minute_Salary = Minute_Salary.fillna(0.0)
+Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True)
+
+Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
+
+# implement historical scaling to hr_current
+hr_current['Total_Mins_Per_Day'] *= integrated_historical_scaling
+hr_current['Staff_Count'] *= integrated_historical_scaling
+
+# calculate the current cost distribution of all cadres, as of 2024
+cadre_all = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+             'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+staff_count = hr_current.groupby('Officer_Category')['Staff_Count'].sum().reset_index()
+staff_cost = staff_count.merge(hr_salary, on=['Officer_Category'], how='outer')
+staff_cost['annual_cost'] = staff_cost['Staff_Count'] * staff_cost['Annual_Salary_USD']
+staff_cost['cost_frac'] = (staff_cost['annual_cost'] / staff_cost['annual_cost'].sum())
+assert abs(staff_cost.cost_frac.sum() - 1) < 1/1e8
+staff_cost.set_index('Officer_Category', inplace=True)
+staff_cost = staff_cost.reindex(index=cadre_all)
+
+# No expansion scenario, or zero-extra-budget-fraction scenario, "s_0"
+# Define the current cost fractions among all cadres as extra-budget-fraction scenario "s_1" \
+# to be matched with Margherita's 4.2% scenario.
+# Add in the scenario that is indicated by hcw cost gap distribution \
+# resulted from never ran services in no expansion scenario, "s_2"
+# Add in the scenario that is indicated by the regression analysis of all other scenarios, "s_*"
+# Define all other scenarios so that the extra budget fraction of each cadre, \
+# i.e., four main cadres and the "Other" cadre that groups up all other cadres, is the same (fair allocation)
+
+cadre_group = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']  # main cadres
+other_group = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+
+# create scenarios
+combination_list = ['s_0', 's_1', 's_2']  # the three special scenarios
+for n in range(1, len(cadre_group)+1):
+    for subset in itertools.combinations(cadre_group, n):
+        combination_list.append(str(subset))  # other equal-fraction scenarios
+# add in "s_*" in the end
+combination_list.append('s_*')
+
+# cadre groups to expand
+cadre_to_expand = pd.DataFrame(index=cadre_group, columns=combination_list).fillna(0.0)
+for c in cadre_group:
+    for i in cadre_to_expand.columns[3:len(combination_list) - 1]:  # for all equal-fraction scenarios
+        if c in i:
+            cadre_to_expand.loc[c, i] = 1  # value 1 indicate the cadre group will be expanded
+
+# prepare auxiliary dataframe for equal extra budget fractions scenarios
+auxiliary = cadre_to_expand.copy()
+for i in auxiliary.columns[3:len(combination_list) - 1]:  # for all equal-fraction scenarios
+    auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
+# for "gap" allocation strategy
+# auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
+auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
+# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget; same as above
+# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget; same as above
+# auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
+# auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
+# for "optimal" allocation strategy
+auxiliary.loc[:, 's_*'] = [0.6068, 0.0, 0.0830, 0.2496, 0.0606]  # historical scaling + main settings
+# auxiliary.loc[:, 's_*'] = [0.5827, 0.0, 0.1083, 0.2409, 0.0681]  # historical scaling + more_budget; same as above
+# auxiliary.loc[:, 's_*'] = [0.5981, 0.0, 0.0902, 0.2649, 0.0468]  # historical scaling + less_budget; same as above
+# auxiliary.loc[:, 's_*'] = [0.6109, 0.0, 0.1494, 0.2033, 0.0364]  # historical scaling + default_cons
+# auxiliary.loc[:, 's_*'] = [0.5430, 0.0, 0.3631, 0.0939, 0.0]  # historical scaling + max_hs_function
+
+# define extra budget fracs for each cadre
+extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
+assert (extra_budget_fracs.columns == auxiliary.columns).all()
+assert (extra_budget_fracs.index[0:4] == auxiliary.index[0:4]).all()
+
+extra_budget_fracs.loc[:, 's_0'] = 0
+assert (staff_cost.index == extra_budget_fracs.index).all()
+extra_budget_fracs.loc[:, 's_1'] = staff_cost.loc[:, 'cost_frac'].values
+
+for i in extra_budget_fracs.columns[2:]:
+    for c in extra_budget_fracs.index:
+        if c in auxiliary.index:  # the four main cadres
+            extra_budget_fracs.loc[c, i] = auxiliary.loc[c, i]
+        else:  # the other 5 cadres
+            extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] * (
+                staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum()
+            )  # current cost distribution among the 5 other cadres
+            # extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5  # equal fracs among the 5 other cadres
+
+assert (abs(extra_budget_fracs.iloc[:, 1:len(extra_budget_fracs.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
+
+# rename scenarios
+# make the scenario of equal fracs for all five cadre groups (i.e., the last column) to be s_3
+simple_scenario_name = {extra_budget_fracs.columns[-2]: 's_3'}
+for i in range(3, len(extra_budget_fracs.columns)-2):
+    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1)  # name scenario from s_4 to s_33
+extra_budget_fracs.rename(columns=simple_scenario_name, inplace=True)
+
+# reorder columns
+col_order = ['s_' + str(i) for i in range(0, len(extra_budget_fracs.columns) - 1)]
+col_order += ['s_*']
+assert len(col_order) == len(extra_budget_fracs.columns)
+extra_budget_fracs = extra_budget_fracs.reindex(columns=col_order)
+
+# prepare samples for extra budget fracs that changes values for C, NM and P
+# (the main cadres for service delivery and directly impacting health outcomes),
+# where DCSA = 2% and Other = 4% are fixed according to "gap" strategies
+# and that these cadres either have limited impacts as estimated, deliver a very small proportion of services,
+# or can deliver relevant services without being constrained by other cadres.
+# value_list = list(np.arange(0, 95, 1))
+# combinations = []
+# for i in itertools.product(value_list, repeat=3):
+#     if sum(i) == 94:
+#         combinations.append(i)
+# extra_budget_fracs_sample = pd.DataFrame(index=extra_budget_fracs.index, columns=range(len(combinations)+1))
+# extra_budget_fracs_sample.iloc[:, 0] = 0
+# extra_budget_fracs_sample.loc['DCSA', 1:] = 2
+# for c in other_group:
+#     extra_budget_fracs_sample.loc[c, 1:] = 4 * (
+#         staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum())
+# for i in range(1, len(combinations)+1):
+#     extra_budget_fracs_sample.loc[['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'], i] = combinations[i-1]
+# extra_budget_fracs_sample /= 100
+# assert (abs(extra_budget_fracs_sample.iloc[:, 1:].sum(axis=0) - 1.0) < 1e-9).all()
+# extra_budget_fracs_sample.rename(columns={0: 's_0'}, inplace=True)
+#
+# extra_budget_fracs = extra_budget_fracs_sample.copy()
+
+# if do not fix DCSA and Other
+# value_list = list(np.arange(0, 105, 5))
+# combinations = []
+# for i in itertools.product(value_list, repeat=5):
+#     if sum(i) == 100:
+#         combinations.append(i)
+# extra_budget_fracs_sample = pd.DataFrame(index=extra_budget_fracs.index, columns=range(len(combinations)+1))
+# extra_budget_fracs_sample.iloc[:, 0] = 0
+# for i in range(1, len(combinations)+1):
+#     extra_budget_fracs_sample.loc[['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA'], i] = combinations[i-1][:-1]
+#     for c in other_group:
+#         extra_budget_fracs_sample.loc[c, i] = combinations[i-1][4] * (
+#             staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum())
+# extra_budget_fracs_sample /= 100
+# assert (abs(extra_budget_fracs_sample.iloc[:, 1:].sum(axis=0) - 1.0) < 1e-9).all()
+# extra_budget_fracs_sample.rename(columns={0: 's_0'}, inplace=True)
+#
+# extra_budget_fracs = extra_budget_fracs_sample.copy()
+
+# define the HRH budget growth rate
+R = 0.042  # 0.042, 0.058, 0.026
+
+
+# calculate hr scale up factor for years 2020-2030 (10 years in total) outside the healthsystem module
+def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario, r=R) -> pd.DataFrame:
+    """This function calculates the yearly hr scale up factor for cadres for a year yr,
+    given a fraction of an extra budget allocated to each cadre and a yearly budget growth rate of 4.2%.
+    Parameter extra_budget_frac (list) is a list of 9 floats, representing the fractions.
+    Parameter yr (int) is a year between 2025 and 2035 (exclusive).
+    Parameter scenario (string) is a column name in the extra budget fractions resource file.
+    Parameter r (float) is the HRH budget growth rate.
+    Output dataframe stores scale up factors and relevant for the year yr.
+    """
+    # get data of previous year
+    prev_year = yr - 1
+    prev_data = scale_up_factor_dict[scenario][prev_year].copy()
+
+    # calculate and update scale_up_factor
+    prev_data['extra_budget_frac'] = extra_budget_frac
+    prev_data['extra_budget'] = r * prev_data.annual_cost.sum() * prev_data.extra_budget_frac
+    prev_data['extra_staff'] = prev_data.extra_budget / prev_data.Annual_Salary_USD
+    prev_data['scale_up_factor'] = (prev_data.Staff_Count + prev_data.extra_staff) / prev_data.Staff_Count
+
+    # store the updated data for the year yr
+    new_data = prev_data[['Annual_Salary_USD', 'scale_up_factor']].copy()
+    new_data['Staff_Count'] = prev_data.Staff_Count + prev_data.extra_staff
+    new_data['annual_cost'] = prev_data.annual_cost + prev_data.extra_budget
+    new_data['increase_rate'] = new_data['scale_up_factor'] - 1.0
+
+    return new_data
+
+
+# calculate scale up factors for all defined scenarios and years
+staff_cost['scale_up_factor'] = 1
+staff_cost['increase_rate'] = 0.0
+scale_up_factor_dict = {s: {y: {} for y in range(2025, 2035)} for s in extra_budget_fracs.columns}
+for s in extra_budget_fracs.columns:
+    # for the initial/current year of 2024
+    scale_up_factor_dict[s][2024] = staff_cost.drop(columns='cost_frac').copy()
+    # for the years with scaled up hr
+    for y in range(2025, 2035):
+        scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_fracs[s]), y, s)
+
+# get the total cost and staff count for each year between 2024-2034 and each scenario
+total_cost = pd.DataFrame(index=range(2024, 2035), columns=extra_budget_fracs.columns)
+total_staff = pd.DataFrame(index=range(2024, 2035), columns=extra_budget_fracs.columns)
+for y in total_cost.index:
+    for s in extra_budget_fracs.columns:
+        total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum()
+        total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum()
+
+# check the total cost after 10 years are increased as expected
+assert (
+    abs(total_cost.loc[2034, total_cost.columns[1:]] - (1 + R) ** 10 * total_cost.loc[2024, 's_0']) < 1/1e6
+).all()
+
+# get the integrated scale up factors by the end of year 2034 and each scenario
+integrated_scale_up_factor = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(1.0)
+for s in total_cost.columns[1:]:
+    for yr in range(2025, 2035):
+        integrated_scale_up_factor.loc[:, s] = np.multiply(
+            integrated_scale_up_factor.loc[:, s].values,
+            scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values
+        )
+
+# get normal average increase rate over all years
+sum_increase_rate = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(0.0)
+for s in total_cost.columns[1:]:
+    for yr in range(2025, 2035):
+        sum_increase_rate.loc[:, s] = (
+            sum_increase_rate.loc[:, s].values +
+            scale_up_factor_dict[s][yr].loc[:, 'increase_rate'].values
+        )
+avg_increase_rate = pd.DataFrame(sum_increase_rate / 10)
+
+# get the staff increase rate: 2034 vs 2024
+increase_rate_2034 = pd.DataFrame(integrated_scale_up_factor - 1.0)
+avg_increase_rate_exp = pd.DataFrame(integrated_scale_up_factor**(1/10) - 1.0)
+
+# get the linear regression prediction
+# main analysis 10 runs
+# -0.0699 + 1.0046 * x_clinical + 0.4170 * x_dcsa + 1.0309 * x_nursing + 0.2691 * x_pharmacy + 0.1965 * x_other,
+
+# const = -0.0699
+# coefs = [1.0046, 0.4170, 1.0309, 0.2691, 0.1965]
+# predict_dalys_averted_percent = avg_increase_rate_exp.loc[
+#                                 ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Dental'],
+#                                 :].mul(coefs, axis=0).sum() + const
+# extra_budget_fracs_sample = extra_budget_fracs_sample.T
+# extra_budget_fracs_sample.loc[:, 'DALYs averted %'] = predict_dalys_averted_percent.values * 100
+
+# save the sample (fixing DCSA = 2%, Other = 4%) to plot 3D-plot
+# SAMPLE = extra_budget_fracs_sample[
+#     ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DALYs averted %']
+# ].drop(index='s_0', axis=0)
+
+# extra_budget_fracs_sample.drop(
+#     index=extra_budget_fracs_sample[extra_budget_fracs_sample['DALYs averted %'] < 8.0].index, inplace=True)
+# extra_budget_fracs_sample['C + P'] = extra_budget_fracs_sample['Clinical'] + extra_budget_fracs_sample['Pharmacy']
+# extra_budget_fracs_sample['C + NM'] = (extra_budget_fracs_sample['Clinical']
+#                                        + extra_budget_fracs_sample['Nursing_and_Midwifery'])
+# extra_budget_fracs_sample['NM + P'] = (extra_budget_fracs_sample['Nursing_and_Midwifery']
+#                                        + extra_budget_fracs_sample['Pharmacy'])
+# extra_budget_fracs_sample['C + NM + P'] = (extra_budget_fracs_sample['Nursing_and_Midwifery']
+#                                            + extra_budget_fracs_sample['Pharmacy']
+#                                            + extra_budget_fracs_sample['Clinical'])
+# min_row = pd.DataFrame(extra_budget_fracs_sample.min(axis=0)).T.rename(index={0: 'Min'})
+# max_row = pd.DataFrame(extra_budget_fracs_sample.max(axis=0)).T.rename(index={0: 'Max'})
+# extra_budget_fracs_sample = pd.concat([extra_budget_fracs_sample, min_row, max_row])
+# extra_budget_fracs_sample['Other'] = extra_budget_fracs_sample[other_group].sum(axis=1)
+# extra_budget_fracs_sample.drop(columns=other_group, inplace=True)
+
+
+def func_of_avg_increase_rate(cadre, scenario='s_2', r=R):
+    """
+    This return the average growth rate of the staff of a cadre from 2025 to 2034.
+    The total HRH cost growth rate is r.
+    """
+    overall_scale_up = 1 + (staff_cost.annual_cost.sum()
+                            * extra_budget_fracs.loc[cadre, scenario]
+                            / staff_cost.loc[cadre, 'annual_cost']
+                            * ((1+r)**10 - 1)
+                            )
+
+    return overall_scale_up ** (1/10) - 1.0
+
+
+# prepare 2024 cost info for Other cadre and Total
+extra_rows = pd.DataFrame(columns=staff_cost.columns, index=['Other', 'Total'])
+staff_cost = pd.concat([staff_cost, extra_rows], axis=0)
+staff_cost.loc['Other', 'annual_cost'] = staff_cost.loc[staff_cost.index.isin(other_group), 'annual_cost'].sum()
+staff_cost.loc['Total', 'annual_cost'] = staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum()
+staff_cost.loc['Other', 'Staff_Count'] = staff_cost.loc[staff_cost.index.isin(other_group), 'Staff_Count'].sum()
+staff_cost.loc['Total', 'Staff_Count'] = staff_cost.loc[staff_cost.index.isin(cadre_all), 'Staff_Count'].sum()
+staff_cost.loc['Other', 'cost_frac'] = (staff_cost.loc['Other', 'annual_cost']
+                                        / staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum())
+staff_cost.loc['Total', 'cost_frac'] = (staff_cost.loc['Total', 'annual_cost']
+                                        / staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum())
+staff_cost.annual_cost = staff_cost.annual_cost.astype(str)
+staff_cost.cost_frac = staff_cost.cost_frac.astype(str)
+
+# # save and read pickle file
+# pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
+#                         'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')
+#
+# with open(pickle_file_path, 'wb') as f:
+#     pickle.dump(scale_up_factor_dict, f)
+#
+# with open(pickle_file_path, 'rb') as f:
+#     x = pickle.load(f)
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
new file mode 100644
index 0000000000..b94efa7b46
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -0,0 +1,170 @@
+"""
+This file defines a batch run of a large population for a long time with all disease modules and full use of HSIs
+It's used for analysis of impact of expanding funded hcw, assuming all other setting as default.
+
+Run on the batch system using:
+```
+tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+```
+
+or locally using:
+```
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+```
+"""
+
+from pathlib import Path
+from typing import Dict
+
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
+    extra_budget_fracs,
+)
+from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
+from tlo.scenario import BaseScenario
+
+
+class HRHExpansionByCadreWithExtraBudget(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 0  # change seed to 1 if to do another 5 runs per draw
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = Date(2035, 1, 1)
+        self.pop_size = 100_000
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
+        self.runs_per_draw = 10
+
+    def log_configuration(self):
+        return {
+            'filename': 'scenario_run_for_hcw_expansion_analysis',
+            'directory': Path('./outputs'),  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.demography.detail': logging.WARNING,
+                'tlo.methods.healthburden': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+            }
+        }
+
+    def modules(self):
+        return (fullmodel(resourcefilepath=self.resources) +
+                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])
+
+    def draw_parameters(self, draw_number, rng):
+        if draw_number < len(self._scenarios):
+            return list(self._scenarios.values())[draw_number]
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+        """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
+
+        self.YEAR_OF_MODE_CHANGE = 2020
+        # HCW capabilities from data source are for year 2019,
+        # and we want to rescale to effective capabilities in the end of 2019 considering model calibration
+        self.YEAR_OF_HRH_EXPANSION = 2025
+        # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
+
+        self.scenarios = extra_budget_fracs['s_*'].to_frame()
+        # Run 'optimal' scenario for main analysis
+
+        # Baseline settings for change
+        self.cons_availability = ['all', 'default']
+        self.hr_budget = [0.042, 0.058, 0.026]
+        self.hs_function = [[False, False], [False, True]]
+
+        self.baselines = {
+            'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
+            # 'default_cons': self._baseline_default_cons(),
+            # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'max_hs_function': self._baseline_max_hs_function(),
+        }
+
+        return {
+            b + ' ' + self.scenarios.columns[i]:
+                mix_scenarios(
+                    self.baselines[b],
+                    {'HealthSystem': {
+                        'HR_expansion_by_officer_type': self.scenarios.iloc[:, i].to_dict()
+                    }
+                    }
+                ) for b in self.baselines.keys() for i in range(len(self.scenarios.columns))
+        }
+
+    def _baseline_of_baseline(self) -> Dict:
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {
+                'HealthSystem': {
+                    'mode_appt_constraints': 1,
+                    'mode_appt_constraints_postSwitch': 2,
+                    "scale_to_effective_capabilities": True,
+                    # This happens in the year before mode change, as the model calibration is done by that year
+                    "year_mode_switch": self.YEAR_OF_MODE_CHANGE,
+                    'cons_availability': 'default',
+                    'cons_availability_postSwitch': self.cons_availability[0],
+                    'year_cons_availability_switch': self.YEAR_OF_HRH_EXPANSION,
+                    'HR_budget_growth_rate': self.hr_budget[0],
+                    'yearly_HR_scaling_mode': 'historical_scaling',  # for 5 years of 2020-2024; source data year 2019
+                    'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION,
+                    'end_year_HR_expansion_by_officer_type': self.end_date.year,
+                    "policy_name": 'Naive',
+                    "tclose_overwrite": 1,
+                    "tclose_days_offset_overwrite": 7,
+                },
+                'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
+                    'max_healthcare_seeking': [False, False],
+                    'max_healthsystem_function': self.hs_function[0],
+                    'year_of_switch': self.YEAR_OF_HRH_EXPANSION,
+                }
+            },
+        )
+
+    def _baseline_default_cons(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    'cons_availability_postSwitch': self.cons_availability[1],
+                },
+            },
+        )
+
+    def _baseline_more_budget(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    'HR_budget_growth_rate': self.hr_budget[1],
+                },
+            },
+        )
+
+    def _baseline_less_budget(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    'HR_budget_growth_rate': self.hr_budget[2],
+                },
+            },
+        )
+
+    def _baseline_max_hs_function(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
+                    'max_healthsystem_function': self.hs_function[1],
+                }
+            },
+        )
+
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index a8ad554a40..a15e56abc4 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -307,6 +307,35 @@ class HealthSystem(Module):
                           "a worksheet of the file `ResourceFile_dynamic_HR_scaling.xlsx`."
         ),
 
+        'HR_expansion_by_officer_type': Parameter(
+            Types.DICT, "This DICT has keys of nine officer types, each with a float value that "
+                        "specifies the proportion of extra budget allocated to that officer type."
+                        "The extra budget for this year is (100 * HR_budget_growth_rate) percent of the total salary "
+                        "of these officers in last year. Given the allocated extra budget and annual salary, "
+                        "we calculate the extra minutes for these staff of this year. The expansion is done "
+                        "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."
+        ),
+        "HR_budget_growth_rate": Parameter(
+            Types.REAL, "This number is the annual growth rate of HR budget. "
+                        "The default value is 0.042 (4.2%), assuming the annual GDP growth rate is 4.2% and "
+                        "the proportion of GDP expenditure on paying salaries of these staff is fixed "
+        ),
+
+        'start_year_HR_expansion_by_officer_type': Parameter(
+            Types.INT, "Year from which the HR expansion by officer type will take place. The change happens "
+                       "on 1 Jan of every year onwards."
+        ),
+
+        'end_year_HR_expansion_by_officer_type': Parameter(
+            Types.INT, "Year in which the HR expansion by officer type will stop. This happens on 1 Jan of "
+                       "that year. When submit the scenario to run, this should be the same year of the end year of "
+                       "the run."
+        ),
+
+        'minute_salary': Parameter(
+            Types.DATA_FRAME, "This specifies the minute salary in USD per officer type per facility id."
+        ),
+
         'tclose_overwrite': Parameter(
             Types.INT, "Decide whether to overwrite tclose variables assigned by disease modules"),
 
@@ -627,6 +656,20 @@ def read_parameters(self, data_folder):
         # Ensure that a value for the year at the start of the simulation is provided.
         assert all(2010 in sheet['year'].values for sheet in self.parameters['yearly_HR_scaling'].values())
 
+        # Read in ResourceFile_Annual_Salary_Per_Cadre.csv
+        self.parameters['minute_salary'] = pd.read_csv(
+            Path(self.resourcefilepath) / 'costing' / 'Minute_Salary_HR.csv')
+
+        # Set default values for HR_expansion_by_officer_type, start_year_HR_expansion_by_officer_type,
+        # end_year_HR_expansion_by_officer_type
+        self.parameters['HR_expansion_by_officer_type'] = {
+            'Clinical': 0, 'DCSA': 0, 'Nursing_and_Midwifery': 0, 'Pharmacy': 0,
+            'Dental': 0, 'Laboratory': 0, 'Mental': 0, 'Nutrition': 0, 'Radiography': 0
+        }
+        self.parameters['HR_budget_growth_rate'] = 0.042
+        self.parameters['start_year_HR_expansion_by_officer_type'] = 2025
+        self.parameters['end_year_HR_expansion_by_officer_type'] = 2035
+
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""
 
@@ -775,6 +818,12 @@ def initialise_simulation(self, sim):
         # whilst the actual scaling will only take effect from 2011 onwards.
         sim.schedule_event(DynamicRescalingHRCapabilities(self), Date(sim.date))
 
+        # Schedule recurring event that expands HR by officer type
+        # from the start_year_HR_expansion_by_officer_type to the end_year_HR_expansion_by_officer_type.
+        for yr in range(self.parameters["start_year_HR_expansion_by_officer_type"],
+                        self.parameters["end_year_HR_expansion_by_officer_type"]):
+            sim.schedule_event(HRExpansionByOfficerType(self), Date(yr, 1, 1))
+
         # Schedule the logger to occur at the start of every year
         sim.schedule_event(HealthSystemLogger(self), Date(sim.date.year, 1, 1))
 
@@ -961,7 +1010,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
                 self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}']
         )
         capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'})  # neaten
-        
+
         # Create new column where capabilities per staff are computed
         capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count']
 
@@ -984,10 +1033,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
         # Merge in information about facility from Master Facilities List
         mfl = self.parameters['Master_Facilities_List']
         capabilities_ex = capabilities_ex.merge(mfl, on='Facility_ID', how='left')
-        
+
         # Create a copy of this to store staff counts
         capabilities_per_staff_ex = capabilities_ex.copy()
-        
+
         # Merge in information about officers
         # officer_types = self.parameters['Officer_Types_Table'][['Officer_Type_Code', 'Officer_Type']]
         # capabilities_ex = capabilities_ex.merge(officer_types, on='Officer_Type_Code', how='left')
@@ -1000,7 +1049,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
             how='left',
         )
         capabilities_ex = capabilities_ex.fillna(0)
-        
+
         capabilities_per_staff_ex = capabilities_per_staff_ex.merge(
             capabilities[['Facility_ID', 'Officer_Type_Code', 'Mins_Per_Day_Per_Staff']],
             on=['Facility_ID', 'Officer_Type_Code'],
@@ -1015,7 +1064,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
             + '_Officer_'
             + capabilities_ex['Officer_Type_Code']
         )
-        
+
         # Give the standard index:
         capabilities_per_staff_ex = capabilities_per_staff_ex.set_index(
             'FacilityID_'
@@ -1041,6 +1090,11 @@ def _rescale_capabilities_to_capture_effective_capability(self):
         # Note: Currently relying on module variable rather than parameter for
         # scale_to_effective_capabilities, in order to facilitate testing. However
         # this may eventually come into conflict with the Switcher functions.
+
+        # In addition, for Class HRExpansionByOfficerType,
+        # for the purpose of keep cost not scaled, need to scale down minute salary when capabilities are scaled up
+
+        minute_salary = self.parameters['minute_salary']
         pattern = r"FacilityID_(\w+)_Officer_(\w+)"
         for officer in self._daily_capabilities.keys():
             matches = re.match(pattern, officer)
@@ -1050,16 +1104,22 @@ def _rescale_capabilities_to_capture_effective_capability(self):
             level = self._facility_by_facility_id[facility_id].level
             # Only rescale if rescaling factor is greater than 1 (i.e. don't reduce
             # available capabilities if these were under-used the previous year).
+            # Later, we might want to rescale capabilities by rescaling factor of officer type and facility id
+            # (i.e., officer type, district and level specific),
+            # which will need fraction of time used by officer type and facility id.
             rescaling_factor = self._summary_counter.frac_time_used_by_officer_type_and_level(
                 officer_type=officer_type, level=level
             )
             if rescaling_factor > 1 and rescaling_factor != float("inf"):
                 self._daily_capabilities[officer] *= rescaling_factor
-                
+
                 # We assume that increased daily capabilities is a result of each staff performing more
                 # daily patient facing time per day than contracted (or equivalently performing appts more
                 # efficiently).
                 self._daily_capabilities_per_staff[officer] *= rescaling_factor
+                minute_salary.loc[(minute_salary.Facility_ID == facility_id)
+                                  & (minute_salary.Officer_Type_Code == officer_type),
+                                  'Minute_Salary_USD'] /= rescaling_factor
 
     def update_consumables_availability_to_represent_merging_of_levels_1b_and_2(self, df_original):
         """To represent that facility levels '1b' and '2' are merged together under the label '2', we replace the
@@ -1766,6 +1826,7 @@ def write_to_hsi_log(
                 squeeze_factor=squeeze_factor,
                 appt_footprint=event_details.appt_footprint,
                 level=event_details.facility_level,
+                fac_id=facility_id if facility_id is not None else -99,
             )
 
     def call_and_record_never_ran_hsi_event(self, hsi_event, priority=None):
@@ -1824,6 +1885,7 @@ def write_to_never_ran_hsi_log(
             hsi_event_name=event_details.event_name,
             appt_footprint=event_details.appt_footprint,
             level=event_details.facility_level,
+            fac_id=facility_id if facility_id is not None else -99,
         )
 
     def log_current_capabilities_and_usage(self):
@@ -1847,23 +1909,41 @@ def log_current_capabilities_and_usage(self):
             comparison['Minutes_Used'].sum() / total_available if total_available > 0 else 0
         )
 
-        # Compute Fraction of Time Used In Each Facility
-        facility_id = [_f.split('_')[1] for _f in comparison.index]
-        summary_by_fac_id = comparison.groupby(by=facility_id)[['Total_Minutes_Per_Day', 'Minutes_Used']].sum()
-        summary_by_fac_id['Fraction_Time_Used'] = (
-            summary_by_fac_id['Minutes_Used'] / summary_by_fac_id['Total_Minutes_Per_Day']
-        ).replace([np.inf, -np.inf, np.nan], 0.0)
+        def compute_fraction_of_time_used(groups):
+            """
+            This will take in the groups for the groupby and calculate the fraction of time used for each group.
+            :param groups: list of groups
+            :return: dataframe with groups as the index and time measures as the columns
+            """
+            _summary = comparison.groupby(by=groups)[['Total_Minutes_Per_Day', 'Minutes_Used']].sum()
+            _summary['Fraction_Time_Used'] = (
+                _summary['Minutes_Used'] / _summary['Total_Minutes_Per_Day']
+            ).replace([np.inf, -np.inf, np.nan], 0.0)
 
-        # Compute Fraction of Time For Each Officer and level
+            return _summary
+
+        # Get facility id, officer, level, district groups
+        facility_id = [_f.split('_')[1] for _f in comparison.index]
         officer = [_f.rsplit('Officer_')[1] for _f in comparison.index]
         level = [self._facility_by_facility_id[int(_fac_id)].level for _fac_id in facility_id]
         level = list(map(lambda x: x.replace('1b', '2'), level))
-        summary_by_officer = comparison.groupby(by=[officer, level])[['Total_Minutes_Per_Day', 'Minutes_Used']].sum()
-        summary_by_officer['Fraction_Time_Used'] = (
-            summary_by_officer['Minutes_Used'] / summary_by_officer['Total_Minutes_Per_Day']
-        ).replace([np.inf, -np.inf, np.nan], 0.0)
+        district = [self._facility_by_facility_id[int(_fac_id)].name.split('_')[-1] for _fac_id in facility_id]
+
+        # Compute Fraction of Time Used In Each Facility
+        summary_by_fac_id = compute_fraction_of_time_used(facility_id)
+
+        # Compute Fraction of Time For Each Officer and Level
+        summary_by_officer = compute_fraction_of_time_used([officer, level])
         summary_by_officer.index.names = ['Officer_Type', 'Facility_Level']
 
+        # Compute raction of Time For Each Officer and District
+        summary_by_officer_district = compute_fraction_of_time_used([officer, district])
+        summary_by_officer_district.index.names = ['Officer_Type', 'District']
+
+        # Compute Fraction of Time by Officer, Level and District
+        summary_by_officer_level_district = compute_fraction_of_time_used([officer, level, district])
+        summary_by_officer_level_district.index.names = ['Officer_Type', 'Facility_Level', 'District']
+
         logger.info(key='Capacity',
                     data={
                         'Frac_Time_Used_Overall': fraction_time_used_overall,
@@ -1876,7 +1956,10 @@ def log_current_capabilities_and_usage(self):
 
         self._summary_counter.record_hs_status(
             fraction_time_used_across_all_facilities=fraction_time_used_overall,
-            fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict()
+            fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict(),
+            fraction_time_used_by_officer_district=summary_by_officer_district["Fraction_Time_Used"].to_dict(),
+            fraction_time_used_by_officer_level_district=summary_by_officer_level_district[
+                'Fraction_Time_Used'].to_dict(),
         )
 
     def remove_beddays_footprint(self, person_id):
@@ -2635,14 +2718,19 @@ def _reset_internal_stores(self) -> None:
         self._no_blank_appt_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s with non-blank footprint
         self._no_blank_appt_appts = defaultdict(int)  # As above, but for `HSI_Event`s that with non-blank footprint
         self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
+        fac_ids = list(range(133)) + [-1, -99]  # 133 "real" facilities + 2 dummy facilities
+        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         # Log HSI_Events that never ran to monitor shortcoming of Health System
         self._never_ran_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s that never ran
         self._never_ran_appts = defaultdict(int)  # As above, but for `HSI_Event`s that have never ran
         self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
+        self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()
+        self._sum_of_daily_frac_time_used_by_officer_district = Counter()
+        self._sum_of_daily_frac_time_used_by_officer_level_district = Counter()
         self._squeeze_factor_by_hsi_event_name = defaultdict(list)  # Running record the squeeze-factor applying to each
         #                                                           treatment_id. Key is of the form:
         #                                                           "<TREATMENT_ID>:<HSI_EVENT_NAME>"
@@ -2652,7 +2740,8 @@ def record_hsi_event(self,
                          hsi_event_name: str,
                          squeeze_factor: float,
                          appt_footprint: Counter,
-                         level: str
+                         level: str,
+                         fac_id: int,
                          ) -> None:
         """Add information about an `HSI_Event` to the running summaries."""
 
@@ -2675,12 +2764,14 @@ def record_hsi_event(self,
             for appt_type, number in appt_footprint:
                 self._no_blank_appt_appts[appt_type] += number
                 self._no_blank_appt_by_level[level][appt_type] += number
+                self._no_blank_appt_by_fac_id[fac_id][appt_type] += number
 
     def record_never_ran_hsi_event(self,
                                    treatment_id: str,
                                    hsi_event_name: str,
                                    appt_footprint: Counter,
-                                   level: str
+                                   level: str,
+                                   fac_id: int,
                                    ) -> None:
         """Add information about a never-ran `HSI_Event` to the running summaries."""
 
@@ -2691,17 +2782,24 @@ def record_never_ran_hsi_event(self,
         for appt_type, number in appt_footprint:
             self._never_ran_appts[appt_type] += number
             self._never_ran_appts_by_level[level][appt_type] += number
+            self._never_ran_appts_by_fac_id[fac_id][appt_type] += number
 
     def record_hs_status(
         self,
         fraction_time_used_across_all_facilities: float,
         fraction_time_used_by_officer_type_and_level: Dict[Tuple[str, int], float],
+        fraction_time_used_by_officer_district: Dict[Tuple[str, str], float],
+        fraction_time_used_by_officer_level_district: Dict[Tuple[str, str, str], float],
     ) -> None:
         """Record a current status metric of the HealthSystem."""
         # The fraction of all healthcare worker time that is used:
         self._frac_time_used_overall.append(fraction_time_used_across_all_facilities)
         for officer_type_facility_level, fraction_time in fraction_time_used_by_officer_type_and_level.items():
             self._sum_of_daily_frac_time_used_by_officer_type_and_level[officer_type_facility_level] += fraction_time
+        for officer_district, fraction_time in fraction_time_used_by_officer_district.items():
+            self._sum_of_daily_frac_time_used_by_officer_district[officer_district] += fraction_time
+        for officer_level_district, fraction_time in fraction_time_used_by_officer_level_district.items():
+            self._sum_of_daily_frac_time_used_by_officer_level_district[officer_level_district] += fraction_time
 
     def write_to_log_and_reset_counters(self):
         """Log summary statistics reset the data structures. This usually occurs at the end of the year."""
@@ -2724,9 +2822,10 @@ def write_to_log_and_reset_counters(self):
             key="HSI_Event_non_blank_appt_footprint",
             description="Same as for key 'HSI_Event' but limited to HSI_Event that have non-blank footprints",
             data={
-            "TREATMENT_ID": self._no_blank_appt_treatment_ids,
-            "Number_By_Appt_Type_Code": self._no_blank_appt_appts,
-            "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level,
+                "TREATMENT_ID": self._no_blank_appt_treatment_ids,
+                "Number_By_Appt_Type_Code": self._no_blank_appt_appts,
+                "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level,
+                "Number_By_Appt_Type_Code_And_FacilityID": self._no_blank_appt_by_fac_id,
             },
         )
 
@@ -2739,6 +2838,7 @@ def write_to_log_and_reset_counters(self):
                 "TREATMENT_ID": self._never_ran_treatment_ids,
                 "Number_By_Appt_Type_Code": self._never_ran_appts,
                 "Number_By_Appt_Type_Code_And_Level": self._never_ran_appts_by_level,
+                "Number_By_Appt_Type_Code_And_FacilityID": self._never_ran_appts_by_fac_id,
             },
         )
 
@@ -2762,6 +2862,26 @@ def write_to_log_and_reset_counters(self):
                 self.frac_time_used_by_officer_type_and_level()),
         )
 
+        # Log mean of 'fraction time used by officer type and district' from daily entries from the previous
+        # year.
+        logger_summary.info(
+            key="Capacity_By_OfficerType_And_District",
+            description="The fraction of healthcare worker time that is used each day, averaged over this "
+                        "calendar year, for each officer type in each district.",
+            data=flatten_multi_index_series_into_dict_for_logging(
+                self.frac_time_used_by_officer_district()),
+        )
+
+        # Log mean of 'fraction time used by officer type and facility level and district' from daily entries from the
+        # previous year.
+        logger_summary.info(
+            key="Capacity_By_OfficerType_And_FacilityLevel_And_District",
+            description="The fraction of healthcare worker time that is used each day, averaged over this "
+                        "calendar year, for each officer type at each facility level in each district.",
+            data=flatten_multi_index_series_into_dict_for_logging(
+                self.frac_time_used_by_officer_level_district()),
+        )
+
         self._reset_internal_stores()
 
     def frac_time_used_by_officer_type_and_level(
@@ -2794,6 +2914,70 @@ def frac_time_used_by_officer_type_and_level(
                 data=mean_frac_time_used.values()
             ).sort_index()
 
+    def frac_time_used_by_officer_district(
+        self,
+        officer_type: Optional[str]=None,
+        district: Optional[str]=None,
+    ) -> Union[float, pd.Series]:
+        """Average fraction of time used by officer type and district since last reset.
+        If `officer_type` and/or `district` is not provided (left to default to `None`) then a pd.Series with a multi-index
+        is returned giving the result for all officer_types/levels."""
+
+        if (officer_type is not None) and (district is not None):
+            return (
+                self._sum_of_daily_frac_time_used_by_officer_district[officer_type, district]
+                / len(self._frac_time_used_overall)
+                # Use len(self._frac_time_used_overall) as proxy for number of days in past year.
+            )
+        else:
+            # Return multiple in the form of a pd.Series with multiindex
+            mean_frac_time_used = {
+                (_officer_type, _district): v / len(self._frac_time_used_overall)
+                for (_officer_type, _district), v in self._sum_of_daily_frac_time_used_by_officer_district.items()
+                if (_officer_type == officer_type or officer_type is None) and (
+                    _district == district or district is None)
+            }
+            return pd.Series(
+                index=pd.MultiIndex.from_tuples(
+                    mean_frac_time_used.keys(),
+                    names=['OfficerType', 'District']
+                ),
+                data=mean_frac_time_used.values()
+            ).sort_index()
+
+    def frac_time_used_by_officer_level_district(
+        self,
+        officer_type: Optional[str]=None,
+        level: Optional[str]=None,
+        district: Optional[str]=None,
+    ) -> Union[float, pd.Series]:
+        """Average fraction of time used by officer, level and district since last reset.
+        If `officer_type` and/or `level` and/or 'district' is not provided (left to default to `None`),
+        then a pd.Series with a multi-index is returned giving the result for all officer_types/levels/districts."""
+
+        if (officer_type is not None) and (level is not None) and (district is not None):
+            return (
+                self._sum_of_daily_frac_time_used_by_officer_level_district[officer_type, level, district]
+                / len(self._frac_time_used_overall)
+                # Use len(self._frac_time_used_overall) as proxy for number of days in past year.
+            )
+        else:
+            # Return multiple in the form of a pd.Series with multiindex
+            mean_frac_time_used = {
+                (_officer_type, _level, _district): v / len(self._frac_time_used_overall)
+                for (_officer_type, _level, _district), v in self._sum_of_daily_frac_time_used_by_officer_level_district.items()
+                if (_officer_type == officer_type or officer_type is None) and (_level == level or level is None) and (
+                    _district == district or district is None)
+            }
+            return pd.Series(
+                index=pd.MultiIndex.from_tuples(
+                    mean_frac_time_used.keys(),
+                    names=['OfficerType', 'FacilityLevel', 'District']
+                ),
+                data=mean_frac_time_used.values()
+            ).sort_index()
+
+
 class HealthSystemChangeParameters(Event, PopulationScopeEventMixin):
     """Event that causes certain internal parameters of the HealthSystem to be changed; specifically:
         * `mode_appt_constraints`
@@ -2917,6 +3101,8 @@ def apply(self, population):
         HR_scaling_factor_by_district = self.module.parameters['HR_scaling_by_district_table'][
             self.module.parameters['HR_scaling_by_district_mode']
         ].set_index('District').to_dict()
+        # todo: add entries for facilities at and beyond level 3,
+        #  so that the district list would match the facility IDs fully.
 
         pattern = r"FacilityID_(\w+)_Officer_(\w+)"
 
@@ -2925,10 +3111,79 @@ def apply(self, population):
             # Extract ID and officer type from
             facility_id = int(matches.group(1))
             district = self.module._facility_by_facility_id[facility_id].district
+            # todo: check if district callable; a fix might be
+            #  district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1]
             if district in HR_scaling_factor_by_district:
                 self.module._daily_capabilities[officer] *= HR_scaling_factor_by_district[district]
 
 
+class HRExpansionByOfficerType(Event, PopulationScopeEventMixin):
+    """ This event exists to expand the HR by officer type (Clinical, DCSA, Nursing_and_Midwifery, Pharmacy)
+    given an extra budget. This is done for daily capabilities, as a year consists of 365.25 equal days."""
+    def __init__(self, module):
+        super().__init__(module)
+
+    def apply(self, population):
+
+        # get minute salary
+        minute_salary_by_officer_facility_id = self.module.parameters['minute_salary']
+
+        # get current daily minutes and format it to be consistent with minute salary
+        daily_minutes = pd.DataFrame(self.module._daily_capabilities).reset_index().rename(
+            columns={'index': 'facilityid_officer'})
+        daily_minutes[['Facility_ID', 'Officer_Type_Code']] = daily_minutes.facilityid_officer.str.split(
+            pat='_', n=3, expand=True)[[1, 3]]
+        daily_minutes['Facility_ID'] = daily_minutes['Facility_ID'].astype(int)
+
+        # get daily cost per officer type per facility id
+        daily_cost = minute_salary_by_officer_facility_id.merge(
+            daily_minutes, on=['Facility_ID', 'Officer_Type_Code'], how='outer')
+        daily_cost['Total_Cost_Per_Day'] = daily_cost['Minute_Salary_USD'] * daily_cost['Total_Minutes_Per_Day']
+
+        # get daily cost per officer type
+        daily_cost = daily_cost.groupby('Officer_Type_Code').agg({'Total_Cost_Per_Day': 'sum'})
+
+        # get daily extra budget for this year
+        daily_extra_budget = (self.module.parameters['HR_budget_growth_rate']
+                              * daily_cost.Total_Cost_Per_Day.sum())
+
+        # get proportional daily extra budget for each officer type
+        extra_budget_fraction = pd.Series(self.module.parameters['HR_expansion_by_officer_type'])
+        assert set(extra_budget_fraction.index) == set(daily_cost.index), \
+            "Input officer types do not match the defined officer types"
+        daily_cost = daily_cost.reindex(index=extra_budget_fraction.index)
+        daily_cost['extra_budget_per_day'] = daily_extra_budget * extra_budget_fraction
+
+        # get the scale up factor for each officer type, assumed to be the same for each facility id of that
+        # officer type (note "cost = available minutes * minute salary", thus we could directly calculate
+        # scale up factor using cost)
+        daily_cost['scale_up_factor'] = (
+            (daily_cost.extra_budget_per_day + daily_cost.Total_Cost_Per_Day) / daily_cost.Total_Cost_Per_Day
+        )
+
+        # scale up the daily minutes per cadre per facility id
+        pattern = r"FacilityID_(\w+)_Officer_(\w+)"
+        for officer in self.module._daily_capabilities.keys():
+            matches = re.match(pattern, officer)
+            # Extract officer type
+            officer_type = matches.group(2)
+            self.module._daily_capabilities[officer] *= daily_cost.loc[officer_type, 'scale_up_factor']
+
+        # save the scale up factor, updated cost and updated capabilities into logger
+        # note that cost and capabilities are on the actual scale,
+        # not normalised by the self.capabilities_coefficient parameter
+        total_cost_this_year = 365.25 * (daily_cost.Total_Cost_Per_Day + daily_cost.extra_budget_per_day)
+        total_capabilities_this_year = (365.25 * self.module._daily_capabilities)
+        logger_summary.info(key='HRScaling',
+                            description='The HR scale up factor by office type given fractions of an extra budget',
+                            data={
+                                'scale_up_factor': daily_cost.scale_up_factor.to_dict(),
+                                'total_hr_salary': total_cost_this_year.to_dict(),
+                                'total_hr_capabilities': total_capabilities_this_year.to_dict()
+                            }
+                            )
+
+
 class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin):
     """ This event exists to change the priority policy adopted by the
     HealthSystem at a given year.    """
diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 875e3e03d4..f9db70dd06 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2573,6 +2573,106 @@ def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bo
     assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling
 
 
+def test_HR_expansion_by_officer_type(seed, tmpdir):
+    """Check that we can use the parameter `HR_expansion_by_officer_type` to update the minutes of time available
+    for healthcare workers."""
+
+    def get_initial_capabilities() -> pd.DataFrame:
+        sim = Simulation(start_date=start_date, seed=seed)
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath)
+        )
+        popsize=100
+        sim.make_initial_population(n=popsize)
+        sim.simulate(end_date=start_date + pd.DateOffset(days=0))
+
+        caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today)
+        caps = caps[caps != 0]
+
+        return caps
+
+    def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.Series:
+        sim = Simulation(start_date=start_date, seed=seed)
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath),
+            simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+
+        )
+        params = sim.modules['HealthSystem'].parameters
+        params['start_year_HR_expansion_by_officer_type'] = 2011  # first update happens on 1 Jan 2011
+        params['end_year_HR_expansion_by_officer_type'] = end_year  # last update happens on 1 Jan (end_year - 1)
+        params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type.to_dict()
+
+        # for testing _rescale_capabilities_to_capture_effective_capability
+        params['year_mode_switch'] = 2011
+        params['scale_to_effective_capabilities'] = True
+
+        popsize = 100
+        sim.make_initial_population(n=popsize)
+
+        sim.simulate(end_date=Date(end_year, 1, 1))
+
+        caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today)
+        caps = caps[caps != 0]
+
+        return caps
+
+    initial_caps = get_initial_capabilities()
+    test_fracs = pd.DataFrame(
+            index=['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'],
+            data={'no_update': [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                  'clinical_one_update': [1, 0, 0, 0, 0, 0, 0, 0, 0],
+                  'clinical_dcsa_one_update': [0.5, 0.5, 0, 0, 0, 0, 0, 0, 0],
+                  'clinical_two_updates': [1, 0, 0, 0, 0, 0, 0, 0, 0]}
+        )
+    caps_clinical_no_update = get_capabilities_after_update(2012, test_fracs.no_update)
+    caps_clinical_one_update = get_capabilities_after_update(2012, test_fracs.clinical_one_update)
+    caps_clinical_dcsa_one_update = get_capabilities_after_update(2012,
+                                                                  test_fracs.clinical_dcsa_one_update)
+    caps_clinical_two_updates = get_capabilities_after_update(2013, test_fracs.clinical_two_updates)
+
+    # check that the cadres are expanded as expected
+    def compare(cadre, caps_1, caps_2) -> tuple:
+
+        assert (caps_1.index == caps_2.index).all()
+        comp_caps_0 = caps_1.merge(caps_2, left_index=True, right_index=True)
+        comp_caps_0 = comp_caps_0[comp_caps_0.index.str.contains(cadre, regex=True)]
+        ratio = (comp_caps_0.iloc[:, 1] / comp_caps_0.iloc[:, 0]).dropna()
+
+        return (ratio > 1).all(), (abs(ratio - ratio.unique()[0]) < 1e-6).all()
+
+    # initial_caps vs caps_clinical_no_update
+    # check if the clinical cadre of each facility id is not expanded
+    assert not compare('Clinical', initial_caps, caps_clinical_no_update)[0]
+
+    # initial_caps vs caps_clinical_one_update
+    # check if the clinical cadre of each facility id is expanded
+    assert compare('Clinical', initial_caps, caps_clinical_one_update)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('Clinical', initial_caps, caps_clinical_one_update)[1]
+
+    # caps_clinical_one_update vs caps_clinical_two_updates
+    # check if the clinical cadre of each facility id is expanded more in the latter scenario with two updates
+    assert compare('Clinical', caps_clinical_one_update, caps_clinical_two_updates)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('Clinical', caps_clinical_one_update, caps_clinical_two_updates)[1]
+
+    # initial_caps vs caps_clinical_dcsa_one_update
+    # check if the DCSA cadre of each facility id is expanded
+    assert compare('DCSA', initial_caps, caps_clinical_dcsa_one_update)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('DCSA', initial_caps, caps_clinical_dcsa_one_update)[1]
+
+    # caps_clinical_one_update vs caps_clinical_dcsa_one_update
+    # check if the cadre of each facility id is expanded less in the latter scenario with a smaller frac of extra budget
+    assert compare('Clinical', caps_clinical_dcsa_one_update, caps_clinical_one_update)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('Clinical', caps_clinical_dcsa_one_update, caps_clinical_one_update)[1]
+
+
 def test_logging_of_only_hsi_events_with_non_blank_footprints(tmpdir):
     """Run the simulation with an HSI_Event that may have a blank_footprint and examine the healthsystem.summary logger.
      * If the footprint is blank, the HSI event should be recorded in the usual loggers but not the 'no_blank' logger