diff --git a/resources/costing/Minute_Salary_HR.csv b/resources/costing/Minute_Salary_HR.csv new file mode 100644 index 0000000000..64fec2c8f1 --- /dev/null +++ b/resources/costing/Minute_Salary_HR.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1731535fc81a7918dcaf6eceda21452999828515bb1b781c433361af6acd00e2 +size 35276 diff --git a/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv new file mode 100644 index 0000000000..ae50af04f5 --- /dev/null +++ b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2457b9b914a1b356ba64168790f99467a86f459760268a729a6ddaf719b45b7 +size 245 diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py new file mode 100644 index 0000000000..fddfd2eddd --- /dev/null +++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py @@ -0,0 +1,2444 @@ +""" +This file analyses and plots the services, DALYs, Deaths within different scenarios of expanding current hr by officer +type given some extra budget. Return on investment and marginal productivity of each officer type will be examined. + +The scenarios are defined in scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py. +""" + +import argparse +from collections import Counter +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt + +from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import ( + Minute_Salary_by_Cadre_Level, + extra_budget_fracs, +) +from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import ( + HRHExpansionByCadreWithExtraBudget, +) +from tlo import Date +from tlo.analysis.utils import ( + APPT_TYPE_TO_COARSE_APPT_TYPE_MAP, + CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + SHORT_TREATMENT_ID_TO_COLOR_MAP, + bin_hsi_event_details, + compute_mean_across_runs, + extract_results, + summarize, +) + +# rename scenarios +substitute_labels = { + 's_0': 'no_extra_budget_allocation', + 's_1': 'all_cadres_current_allocation', + 's_2': 'all_cadres_gap_allocation', + 's_3': 'all_cadres_equal_allocation', + 's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (N&M)', 's_7': 'Pharmacy (P)', + 's_8': 'Other (O)', + 's_9': 'C + D', 's_10': 'C + N&M', 's_11': 'C + P', 's_12': 'C + O', 's_13': 'D + N&M', + 's_14': 'D + P', 's_15': 'D + O', 's_16': 'N&M + P', 's_17': 'N&M + O', 's_18': 'P + O', + 's_19': 'C + D + N&M', 's_20': 'C + D + P', 's_21': 'C + D + O', 's_22': 'C + N&M + P', 's_23': 'C + N&M + O', + 's_24': 'C + P + O', 's_25': 'D + N&M + P', 's_26': 'D + N&M + O', 's_27': 'D + P + O', 's_28': 'N&M + P + O', + 's_29': 'C + D + N&M + P', 's_30': 'C + D + N&M + O', 's_31': 'C + D + P + O', 's_32': 'C + N&M + P + O', + 's_33': 'D + N&M + P + O', +} + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, + the_target_period: Tuple[Date, Date] = None): + """ + Extract results of number of services by appt type, number of DALYs, number of Deaths in the target period. + (To see whether to extract these results by short treatment id and/or disease.) + Calculate the extra budget allocated, extra staff by cadre, return on investment and marginal productivity by cadre. + """ + TARGET_PERIOD = the_target_period + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + e = HRHExpansionByCadreWithExtraBudget() + return tuple(e._scenarios.keys()) + + def get_num_appts(_df): + """Return the number of services by appt type (total within the TARGET_PERIOD)""" + return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code'] + .apply(pd.Series) + .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP) + .groupby(level=0, axis=1).sum() + .sum()) + + def get_num_appts_by_level(_df): + """Return the number of services by appt type and facility level (total within the TARGET_PERIOD)""" + def unpack_nested_dict_in_series(_raw: pd.Series): + return pd.concat( + { + idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items() + } + ).unstack().fillna(0.0).astype(int) + + return _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \ + .pipe(unpack_nested_dict_in_series) \ + .sum(axis=0) + + def get_num_services(_df): + """Return the number of services in total of all appt types (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code'] + .apply(pd.Series).sum().sum() + ) + + def get_num_treatments(_df): + """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)""" + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum() + _df.index = _df.index.map(lambda x: x.split('_')[0] + "*") + _df = _df.groupby(level=0).sum() + return _df + + def get_num_treatments_total(_df): + """Return the number of treatments in total of all treatments (total within the TARGET_PERIOD)""" + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum() + _df.index = _df.index.map(lambda x: x.split('_')[0] + "*") + _df = _df.groupby(level=0).sum().sum() + return pd.Series(_df) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD)""" + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYS (Stacked) (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + return pd.Series( + data=_df + .loc[_df.year.between(*years_needed)] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_yearly(_df): + """Return total number of DALYS (Stacked) for every year in the TARGET_PERIOD. + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = (_df.loc[_df.year.between(*years_needed)] + .drop(columns=['date', 'sex', 'age_range']) + .groupby('year').sum() + .sum(axis=1) + ) + return _df + + def get_num_dalys_by_cause(_df): + """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + return (_df + .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth']) + .sum(axis=0) + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison_series( + _ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return (_ser + .unstack(level=0) + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) + .drop(columns=([comparison] if drop_comparison else [])) + .stack() + ) + + def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs): + """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe""" + return pd.concat({ + _idx: find_difference_relative_to_comparison_series(row, **kwargs) + for _idx, row in _df.iterrows() + }, axis=1).T + + # group scenarios for presentation + def scenario_grouping_coloring(by='effect'): + if by == 'effect': # based on DALYs averted/whether to expand Clinical + Pharmacy + grouping = { + 'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'}, + 'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'}, + 'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'}, + 'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'} + } + grouping_color = { + 'D/N&M/O/None': 'lightpink', + 'P + D/N&M/O/None': 'violet', + 'C + D/N&M/O/None': 'darkorchid', + 'C + P + D/N&M/O/None': 'darkturquoise', + } + elif by == 'expansion': # based on how many cadres are expanded + grouping = { + 'no_expansion': {'s_0'}, + 'all_cadres_equal_expansion': {'s_3'}, + 'all_cadres_gap_expansion': {'s_2'}, + 'all_cadres_current_expansion': {'s_1'}, + 'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'}, + 'two_cadres_equal_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13', + 's_14', 's_15', 's_16', 's_17', 's_18'}, + 'three_cadres_equal_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23', + 's_24', 's_25', 's_26', 's_27', 's_28'}, + 'four_cadres_equal_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'} + + } + grouping_color = { + 'no_expansion': 'gray', + 'one_cadre_expansion': 'lightpink', + 'two_cadres_equal_expansion': 'violet', + 'three_cadres_equal_expansion': 'darkorchid', + 'four_cadres_equal_expansion': 'paleturquoise', + 'all_cadres_equal_expansion': 'darkturquoise', + 'all_cadres_current_expansion': 'deepskyblue', + 'all_cadres_gap_expansion': 'royalblue', + } + return grouping, grouping_color + + def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False): + """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the + extent of the error bar. + Annotated with percent statistics from _df_percent, if annotation=True and _df_percent not None.""" + + yerr = np.array([ + (_df['mean'] - _df['lower']).values, + (_df['upper'] - _df['mean']).values, + ]) + + xticks = {(i + 0.5): k for i, k in enumerate(_df.index)} + + colors = [scenario_color[s] for s in _df.index] + + fig, ax = plt.subplots(figsize=(18, 6)) + ax.bar( + xticks.keys(), + _df['mean'].values, + yerr=yerr, + alpha=0.8, + ecolor='black', + color=colors, + capsize=10, + label=xticks.values(), + zorder=100, + ) + + if annotation: + assert (_df.index == _df_percent.index).all() + for xpos, ypos, text1, text2, text3 in zip(xticks.keys(), _df['upper'].values, + _df_percent['mean'].values, + _df_percent['lower'].values, + _df_percent['upper'].values): + text = f"{int(round(text1 * 100, 2))}%\n{[round(text2, 2),round(text3, 2)]}" + ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize='xx-small') + + ax.set_xticks(list(xticks.keys())) + + xtick_label_detail = [substitute_labels[v] for v in xticks.values()] + ax.set_xticklabels(xtick_label_detail, rotation=90) + + legend_labels = list(scenario_groups[1].keys()) + legend_handles = [plt.Rectangle((0, 0), 1, 1, + color=scenario_groups[1][label]) for label in legend_labels] + ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5), + title='Scenario groups') + + ax.grid(axis="y") + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + fig.tight_layout() + + return fig, ax + + def get_scale_up_factor(_df): + """ + Return a series of yearly scale up factors for all cadres, + with index of year and value of list of scale up factors. + """ + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor'] + ].set_index('year_of_scale_up') + _df = _df['scale_up_factor'].apply(pd.Series) + assert (_df.columns == cadres).all() + _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index} + _df_1 = pd.DataFrame(data=_dict).T + return pd.Series( + _df_1.loc[:, 0], index=_df_1.index + ) + + def get_total_cost(_df): + """ + Return a series of yearly total cost for all cadres, + with index of year and values of list of total cost. + """ + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'total_hr_salary'] + ].set_index('year_of_scale_up') + _df = _df['total_hr_salary'].apply(pd.Series) + assert (_df.columns == cadres).all() + _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index} + _df_1 = pd.DataFrame(data=_dict).T + return pd.Series( + _df_1.loc[:, 0], index=_df_1.index + ) + + def get_current_hr(cadres): + """ + Return current (year of 2018/2019) staff counts and capabilities for the cadres specified. + """ + curr_hr_path = Path(resourcefilepath + / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv') + curr_hr = pd.read_csv(curr_hr_path).groupby('Officer_Category').agg( + {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index() + curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25 + curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True) + curr_hr_counts = curr_hr.loc[ + curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count'] + ].set_index('Officer_Category').T + curr_hr_capabilities = curr_hr.loc[ + curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Total_Minutes_Per_Year'] + ].set_index('Officer_Category').T + + return curr_hr_counts[cadres], curr_hr_capabilities[cadres] + + def get_hr_salary(cadres): + """ + Return annual salary for the cadres specified. + """ + salary_path = Path(resourcefilepath + / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv') + salary = pd.read_csv(salary_path, index_col=False) + salary = salary.loc[ + salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD'] + ].set_index('Officer_Category').T + return salary[cadres] + + def format_appt_time_and_cost(): + """ + Return the formatted appointment time requirements and costs per cadre + """ + file_path = Path(resourcefilepath + / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv') + _df = pd.read_csv(file_path, index_col=False) + + time = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category', + values='Time_Taken_Mins').fillna(0.0).T + minute_salary = Minute_Salary_by_Cadre_Level + cost = _df.merge(minute_salary, on=['Facility_Level', 'Officer_Category'], how='left') + cost['cost_USD'] = cost['Time_Taken_Mins'] * cost['Minute_Salary_USD'] + cost = cost.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category', + values='cost_USD').fillna(0.0).T + + return time, cost + + def get_frac_of_hcw_time_used(_df): + """Return the fraction of time used by cadre and facility level""" + # CNP_cols = ['date'] + # for col in _df.columns[1:]: + # if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col): + # CNP_cols.append(col) + # + # _df = _df[CNP_cols].copy() + _df = _df.loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), :] + _df = _df.set_index('date').mean(axis=0) # average over years + + return _df + + def get_hcw_time_by_treatment(): + appointment_time_table = pd.read_csv( + resourcefilepath + / 'healthsystem' + / 'human_resources' + / 'definitions' + / 'ResourceFile_Appt_Time_Table.csv', + index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"] + ) + + appt_type_facility_level_officer_category_to_appt_time = ( + appointment_time_table.Time_Taken_Mins.to_dict() + ) + + officer_categories = appointment_time_table.index.levels[ + appointment_time_table.index.names.index("Officer_Category") + ].to_list() + + times_by_officer_category_treatment_id_per_draw_run = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + officer_category, + event_details["treatment_id"].split("_")[0] + ): + count + * appt_number + * appt_type_facility_level_officer_category_to_appt_time.get( + ( + appt_type, + event_details["facility_level"], + officer_category + ), + 0 + ) + for officer_category in officer_categories + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + True + ) + + time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run) + + # transform counter to dataframe + def format_time_by_cadre_treatment(_df): + _df.reset_index(drop=False, inplace=True) + for idx in _df.index: + _df.loc[idx, 'Cadre'] = _df.loc[idx, 'index'][0] + _df.loc[idx, 'Treatment'] = _df.loc[idx, 'index'][1] + _df = _df.drop('index', axis=1).rename(columns={0: 'value'}).pivot( + index='Treatment', columns='Cadre', values='value').fillna(0.0) + + _series = _df.sum(axis=1) # sum up cadres + + return _df, _series + + # time_by_cadre_treatment_all_scenarios = { + # f's_{key}': format_time_by_cadre_treatment( + # pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index') + # )[0] for key in range(len(param_names)) + # } + # + # time_increased_by_cadre_treatment = { + # key: time_by_cadre_treatment_all_scenarios[key] - time_by_cadre_treatment_all_scenarios['s_2'] + # for key in time_by_cadre_treatment_all_scenarios.keys() + # } + + time_by_treatment_all_scenarios = { + f's_{key}': format_time_by_cadre_treatment( + pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index') + )[1] for key in range(len(param_names)) + + } + time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T + + # rename index of scenario to match with real draw number + time_by_treatment_all_scenarios.rename( + index={'s_1': 's_10', 's_2': 's_11', 's_3': 's_16', 's_4': 's_22'}, + inplace=True) + + time_increased_by_treatment = time_by_treatment_all_scenarios.subtract( + time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*') + + return time_increased_by_treatment + + # Get parameter/scenario names + param_names = ('s_0', 's_10', 's_11', 's_16', 's_22') + + # Define cadres in order + cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'] + + # Get appointment time and cost requirement + appt_time, appt_cost = format_appt_time_and_cost() + + # Get current (year of 2018/2019) hr counts + # curr_hr = get_current_hr(cadres)[0] + curr_hr_cap = get_current_hr(cadres)[1] + + # Get scale up factors for all scenarios + scale_up_factors = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HRScaling', + custom_generate_series=get_scale_up_factor, + do_scaling=False + ).pipe(set_param_names_as_column_index_level_0).stack(level=0) + # check that the scale up factors are all most the same between each run within each draw + # assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all() + # keep scale up factors of only one run within each draw + scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='index') + scale_up_factors[cadres] = scale_up_factors.value.tolist() + scale_up_factors.drop(columns='value', inplace=True) + + # get total capabilities by cadre in the target period + hcw_time_capabilities = scale_up_factors.copy() + assert (hcw_time_capabilities.columns[2:] == curr_hr_cap.columns).all() + hcw_time_capabilities[hcw_time_capabilities.columns[2:]] = ( + hcw_time_capabilities[hcw_time_capabilities.columns[2:]].mul(curr_hr_cap.values, axis=1)) + hcw_time_capabilities = hcw_time_capabilities.groupby(by=['draw']).sum().drop(columns=['index']) # sum up years + + # # Get salary + # salary = get_hr_salary(cadres) + # + # # Get total cost for all scenarios + # total_cost = extract_results( + # results_folder, + # module='tlo.methods.healthsystem.summary', + # key='HRScaling', + # custom_generate_series=get_total_cost, + # do_scaling=False + # ).pipe(set_param_names_as_column_index_level_0).stack(level=0) + # total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index') + # total_cost[cadres] = total_cost.value.tolist() + # total_cost.drop(columns='value', inplace=True) + # total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1) + # total_cost.rename(columns={'index': 'year'}, inplace=True) + # + # # total cost of all expansion years + # total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year') + # + # # total extra cost of all expansion years + # extra_cost_all_yrs = total_cost_all_yrs.copy() + # for s in param_names[1:]: + # extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :] + # extra_cost_all_yrs.drop(index='s_0', inplace=True) + # + # # get staff count = total cost / salary + # staff_count = total_cost.copy() + # for c in cadres: + # staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0] + # staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1) + # + # # get extra count = staff count - staff count of no expansion s_1 + # # note that annual staff increase rate = scale up factor - 1 + # extra_staff = staff_count.copy() + # for i in staff_count.index: + # extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:] + # + # # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop( + # # index='s_1' + # # ) + # # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw') + # + # # check total cost calculated is increased as expected + # years = range(2019, the_target_period[1].year + 1) + # for s in param_names[1:]: + # assert (abs( + # total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] - + # (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'), + # 'all_cadres'].values[0] + # ) < 1e6).all() + + # Absolute Number of Deaths and DALYs and Services + num_deaths = extract_results( + results_folder, + module='tlo.methods.demography', + key='death', + custom_generate_series=get_num_deaths, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_dalys = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + # num_dalys_yearly = extract_results( + # results_folder, + # module='tlo.methods.healthburden', + # key='dalys_stacked', + # custom_generate_series=get_num_dalys_yearly, + # do_scaling=True + # ).pipe(set_param_names_as_column_index_level_0) + + num_dalys_by_cause = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_by_wealth_stacked_by_age_and_time", + custom_generate_series=get_num_dalys_by_cause, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0) + + num_appts = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_appts, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_appts_by_level = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_appts_by_level, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_services = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_services, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_treatments = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_treatments, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_treatments_total = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_treatments_total, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_never_ran_appts = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_num_appts, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_never_ran_appts_by_level = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_num_appts_by_level, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_never_ran_services = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_num_services, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + # num_never_ran_treatments_total = extract_results( + # results_folder, + # module='tlo.methods.healthsystem.summary', + # key='Never_ran_HSI_Event', + # custom_generate_series=get_num_treatments_total, + # do_scaling=True + # ).pipe(set_param_names_as_column_index_level_0) + + # num_never_ran_treatments = extract_results( + # results_folder, + # module='tlo.methods.healthsystem.summary', + # key='Never_ran_HSI_Event', + # custom_generate_series=get_num_treatments, + # do_scaling=True + # ).pipe(set_param_names_as_column_index_level_0) + + # get total service demand + assert len(num_services) == len(num_never_ran_services) == 1 + assert (num_services.columns == num_never_ran_services.columns).all() + # num_services_demand = num_services + num_never_ran_services + # ratio_services = num_services / num_services_demand + + assert (num_appts.columns == num_never_ran_appts.columns).all() + num_never_ran_appts.loc['Lab / Diagnostics', :] = 0 + num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0) + assert (num_appts.index == num_never_ran_appts.index).all() + # num_appts_demand = num_appts + num_never_ran_appts + + hcw_time_usage = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel', + custom_generate_series=get_frac_of_hcw_time_used, + do_scaling=False + ).pipe(set_param_names_as_column_index_level_0) + + # get absolute numbers for scenarios + # sort the scenarios according to their DALYs values, in ascending order + num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean') + num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + + # num_dalys_yearly_summarized = (summarize(num_dalys_yearly) + # .stack([0, 1]) + # .rename_axis(['year', 'scenario', 'stat']) + # .reset_index(name='count')) + # + # num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + + num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex( + num_dalys_summarized.index + ) + # num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index).fillna(0.0) + num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex( + param_names).reindex(num_dalys_summarized.index).fillna(0.0) + # num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + + # num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + # num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + # num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + hcw_time_usage_summarized.columns = [col.replace('OfficerType=', '').replace('FacilityLevel=', '') + for col in hcw_time_usage_summarized.columns] + hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True) + + # get relative numbers for scenarios, compared to no_expansion scenario: s_0 + num_services_increased = summarize( + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_services.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_services_increased_percent = summarize( + # pd.DataFrame( + # find_difference_relative_to_comparison_series( + # num_services.loc[0], + # comparison='s_1', + # scaled=True) + # ).T + # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1']) + + num_deaths_averted = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_deaths.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_deaths_averted_percent = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_deaths.loc[0], + comparison='s_0', + scaled=True) + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_dalys_averted = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_dalys.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_dalys_averted_percent = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_dalys.loc[0], + comparison='s_0', + scaled=True + ) + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_dalys_by_cause_averted = summarize( + -1.0 * find_difference_relative_to_comparison_dataframe( + num_dalys_by_cause, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_dalys_by_cause_averted_percent = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_dalys_by_cause, + # comparison='s_0', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False) + # # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False) + # num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_2', :].sort_values( + # ascending=False) + # # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values( + # # ascending=False) + + # num_dalys_by_cause_averted_percent = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_dalys_by_cause, + # comparison='s_1', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + num_appts_increased = summarize( + find_difference_relative_to_comparison_dataframe( + num_appts, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_never_ran_appts_reduced = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_never_ran_appts, + # comparison='s_1', + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + # num_never_ran_treatments_reduced = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_never_ran_treatments, + # comparison='s_1', + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + # num_appts_increased_percent = summarize( + # find_difference_relative_to_comparison_dataframe( + # num_appts, + # comparison='s_1', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + num_treatments_increased = summarize( + find_difference_relative_to_comparison_dataframe( + num_treatments, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_treatments_increased_percent = summarize( + # find_difference_relative_to_comparison_dataframe( + # num_treatments, + # comparison='s_1', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + num_treatments_total_increased = summarize( + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_treatments_total.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_treatments_total_increased_percent = summarize( + # pd.DataFrame( + # find_difference_relative_to_comparison_series( + # num_treatments_total.loc[0], + # comparison='s_1', + # scaled=True) + # ).T + # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1']) + + # service_ratio_increased = summarize( + # pd.DataFrame( + # find_difference_relative_to_comparison_series( + # ratio_services.loc[0], + # comparison='s_1') + # ).T + # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1']) + + # service_ratio_increased_percent = summarize( + # pd.DataFrame( + # find_difference_relative_to_comparison_series( + # ratio_services.loc[0], + # comparison='s_1', + # scaled=True) + # ).T + # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1']) + + # Check that when we sum across the causes/appt types, + # we get the same total as calculated when we didn't split by cause/appt type. + assert ( + (num_appts_increased.sum(axis=1).sort_index() + - num_services_increased['mean'].sort_index() + ) < 1e-6 + ).all() + + assert ( + (num_dalys_by_cause_averted.sum(axis=1).sort_index() + - num_dalys_averted['mean'].sort_index() + ) < 1e-6 + ).all() + + assert ( + (num_treatments_increased.sum(axis=1).sort_index() + - num_treatments_total_increased['mean'].sort_index() + ) < 1e-6 + ).all() + + # get time used by services delivered + def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_summarized): + cols_1 = count_df.columns + cols_2 = time_cost_df.columns + # check that appts (at a level) not in appt_time (as defined) have count 0 and drop them + # assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() -> ('2', 'Tomography') + # replace Tomography from level 2 to level 3 + count_df.loc[:, ('3', 'Tomography')] += count_df.loc[:, ('2', 'Tomography')] + count_df.loc[:, ('2', 'Tomography')] = 0 + assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() + if len(list(set(cols_1) - set(cols_2))) > 0: + _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2))) + else: + _count_df = count_df.copy() + assert set(_count_df.columns).issubset(set(cols_2)) + # calculate hcw time used + use = pd.DataFrame(index=_count_df.index, + columns=time_cost_df.index) + for i in use.index: + for j in use.columns: + use.loc[i, j] = _count_df.loc[i, :].mul( + time_cost_df.loc[j, _count_df.columns] + ).sum() + # reorder columns to be consistent with cadres + use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Radiography']] + # reorder index to be consistent with descending order of DALYs averted + use = use.reindex(num_dalys_summarized.index) + + # calculate time used by cadre and level + used_by_cadre_level = { + key: time_cost_df[_count_df.columns].mul(_count_df.loc[key, :].values, axis=1).rename( + columns={'1b': '2'}, level=0).groupby( + level=0, axis=1).sum().T.unstack().T for key in _count_df.index + } + used_by_cadre_level = pd.DataFrame.from_dict(used_by_cadre_level, orient='index') + + return use, used_by_cadre_level + + hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0] + hcw_time_used_increased = pd.DataFrame( + hcw_time_used.subtract(hcw_time_used.loc['s_0', :], axis=1).drop('s_0', axis=0) + ) + hcw_time_used_by_cadre_level = hcw_time_or_cost_used(time_cost_df=appt_time)[1] + + # get hcw capabilities rescaled + assert set(hcw_time_used_by_cadre_level.columns).issubset(set(hcw_time_usage_summarized.columns)) + assert (hcw_time_usage_summarized[ + list(set(hcw_time_usage_summarized.columns) - set(hcw_time_used_by_cadre_level.columns)) + ] == 0.0).all().all() + hcw_time_capabilities_rescaled = (hcw_time_used_by_cadre_level / + hcw_time_usage_summarized[hcw_time_used_by_cadre_level.columns]) + hcw_time_capabilities_rescaled = hcw_time_capabilities_rescaled.groupby(level=0, axis=1).sum() + hcw_time_capabilities_rescaled = hcw_time_capabilities_rescaled[hcw_time_used.columns] + hcw_time_capabilities_increased = pd.DataFrame( + hcw_time_capabilities_rescaled.subtract( + hcw_time_capabilities_rescaled.loc['s_0', :], axis=1).drop('s_0', axis=0) + ) + + # get hcw capabilities not rescaled + hcw_time_capabilities = hcw_time_capabilities.reindex(num_dalys_summarized.index).drop(columns='Nutrition') + + # get HCW time and cost needed to run the never run appts + def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized): + cols_1 = count_df.columns + cols_2 = time_cost_df.columns + # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them + assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() + if len(list(set(cols_1) - set(cols_2))) > 0: + _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2))) + else: + _count_df = count_df.copy() + assert set(_count_df.columns).issubset(set(cols_2)) + # calculate hcw time gap + gap = pd.DataFrame(index=_count_df.index, + columns=time_cost_df.index) + for i in gap.index: + for j in gap.columns: + gap.loc[i, j] = _count_df.loc[i, :].mul( + time_cost_df.loc[j, _count_df.columns] + ).sum() + # reorder columns to be consistent with cadres + gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Radiography']] + # reorder index to be consistent with + gap = gap.reindex(num_dalys_summarized.index) + + return gap + + hcw_time_gap = hcw_time_or_cost_gap(appt_time) + hcw_cost_gap = hcw_time_or_cost_gap(appt_cost) + + # hcw time demand to meet ran + never ran services + assert (hcw_time_used.index == hcw_time_gap.index).all() + assert (hcw_time_used.columns == hcw_time_gap.columns).all() + hcw_time_demand = hcw_time_used + hcw_time_gap + # hcw_time_demand_increased = pd.DataFrame( + # hcw_time_demand.subtract(hcw_time_demand.loc['s_0', :], axis=1).drop('s_0', axis=0) + # ) + + # cost gap proportions of cadres within each scenario + hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns) + for i in hcw_cost_gap_percent.index: + hcw_cost_gap_percent.loc[i, :] = hcw_cost_gap.loc[i, :] / hcw_cost_gap.loc[i, :].sum() + # add a column of 'other' to sum up other cadres + hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[ + ['Dental', 'Laboratory', 'Mental', 'Radiography'] + ].sum(axis=1) + + # # store the proportions of no expansion scenario as the "best" scenario that is to be tested + # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[ + # 's_1', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other'] + # ].copy() # [0.4586, 0.0272, 0.3502, 0.1476, 0.0164] + + # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery) + # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint) + # in never run set + # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios + # then the next question is what proportion for C and P and any indication for better extra budget allocation + # so that never ran appts will be reduced and DALYs could be averted further? + def get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Pharmacy'], appts_count_all=num_never_ran_appts_by_level_summarized + ): + # find the appts that need all cadres in cadres_to_find + def find_never_ran_appts_that_need_specific_cadres(): + appts_to_find = [] + _common_cols = appt_time.columns.intersection(appts_count_all.columns) + # already checked above that columns in the latter that are not in the former have 0 count + for col in _common_cols: + if ((appt_time.loc[cadres_to_find, col] > 0).all() + and (appt_time.loc[~appt_time.index.isin(cadres_to_find), col] == 0).all()): + appts_to_find.append(col) + + return appts_to_find + + # counts and count proportions of all never ran + _appts = find_never_ran_appts_that_need_specific_cadres() + _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum() + .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum() + .reindex(num_dalys_summarized.index)) + _counts_all = (appts_count_all.groupby(level=1, axis=1).sum() + .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum() + .reindex(num_dalys_summarized.index)) + assert (_counts.index == _counts_all.index).all() + _proportions = _counts / _counts_all[_counts.columns] + + # hcw time gap and proportions + _time_gap = hcw_time_or_cost_gap(appt_time, appts_count_all[_appts]) + assert (_time_gap.index == hcw_time_gap.index).all() + _time_gap_proportions = _time_gap / hcw_time_gap[_time_gap.columns] + + # hcw cost gap and proportions + _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts]) + assert (_cost_gap.index == hcw_cost_gap.index).all() + _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns] + # cost gap distribution among cadres + _cost_gap_percent = pd.DataFrame(index=_cost_gap.index, columns=_cost_gap.columns) + for i in _cost_gap_percent.index: + _cost_gap_percent.loc[i, :] = _cost_gap.loc[i, :] / _cost_gap.loc[i, :].sum() + + # if sum up all appt types/cadres + _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1) + _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1) + _time_gap_proportions_total = _time_gap.sum(axis=1) / hcw_time_gap.sum(axis=1) + + return (_proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent, + _time_gap_proportions_total, _time_gap) + + never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']) + never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Pharmacy']) + never_ran_appts_info_that_need_CN = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Nursing_and_Midwifery']) + never_ran_appts_info_that_need_NP = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Nursing_and_Midwifery', 'Pharmacy']) + never_ran_appts_info_that_need_C = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical']) + never_ran_appts_info_that_need_N = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Nursing_and_Midwifery']) + never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Pharmacy']) + + # cost/time proportions within never ran appts, in total of all cadres + p_cost = pd.DataFrame(index=num_services_summarized.index) + p_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[1] + p_cost['C and P'] = never_ran_appts_info_that_need_CP[1] + p_cost['C and N&M'] = never_ran_appts_info_that_need_CN[1] + p_cost['N&M and P'] = never_ran_appts_info_that_need_NP[1] + p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1] + p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1] + p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1] + p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1) + + p_time = pd.DataFrame(index=num_services_summarized.index) + p_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[4] + p_time['C and P'] = never_ran_appts_info_that_need_CP[4] + p_time['C and N&M'] = never_ran_appts_info_that_need_CN[4] + p_time['N&M and P'] = never_ran_appts_info_that_need_NP[4] + p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4] + p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4] + p_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[4] + p_time['Other cases'] = 1 - p_time[p_time.columns[0:7]].sum(axis=1) + + # absolute cost/time gap within never ran appts + a_cost = pd.DataFrame(index=num_services_summarized.index) + a_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1) + a_cost['C and P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1) + a_cost['C and N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1) + a_cost['N&M and P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1) + a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1) + a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1) + a_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[2].sum(axis=1) + a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1) + + a_time = pd.DataFrame(index=num_services_summarized.index) + a_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1) + a_time['C and P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1) + a_time['C and N&M'] = never_ran_appts_info_that_need_CN[5].sum(axis=1) + a_time['N&M and P'] = never_ran_appts_info_that_need_NP[5].sum(axis=1) + a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1) + a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1) + a_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[5].sum(axis=1) + a_time['Other cases'] = hcw_time_gap.sum(axis=1) - a_time.sum(axis=1) + + # appts count proportions within never ran appts, in total of all cadres + p_count = pd.DataFrame(index=num_services_summarized.index) + p_count['C and P and N&M'] = never_ran_appts_info_that_need_CNP[0] + p_count['C and P'] = never_ran_appts_info_that_need_CP[0] + p_count['C and N&M'] = never_ran_appts_info_that_need_CN[0] + p_count['N&M and P'] = never_ran_appts_info_that_need_NP[0] + p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0] + p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0] + p_count['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[0] + p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1) + + # define color for the cadres combinations above + # cadre_comb_color = { + # 'C and P and N&M': 'royalblue', + # 'C and P': 'turquoise', + # 'C and N&M': 'gold', + # 'N&M and P': 'yellowgreen', + # 'Clinical (C)': 'mediumpurple', + # 'Pharmacy (P)': 'limegreen', + # 'Nursing_and_Midwifery (N&M)': 'pink', + # 'Other cases': 'gray', + # } + + # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results + + # hcw time by cadre and treatment: draw = 22: C + N + P vs no expansion, draw = 11, C + P vs no expansion + # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21) + # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10) + + # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1 + # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios + # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns) + # # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative + # # (maybe only due to statistical variation; relative difference to s_1 are close to 0%), + # # thus CE does not make sense. + # # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns) + # for i in ROI.index: + # ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres'] + # # CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean'] + # # CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper'] + # # CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower'] + + # prepare colors for plots + # appt_color = { + # appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns + # } + treatment_color = { + treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan) + for treatment in num_treatments_summarized.columns + } + cause_color = { + cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan) + for cause in num_dalys_by_cause_summarized.columns + } + officer_category_color = { + 'Clinical': 'blue', + 'DCSA': 'orange', + 'Nursing_and_Midwifery': 'red', + 'Pharmacy': 'green', + 'Dental': 'purple', + 'Laboratory': 'orchid', + 'Mental': 'plum', + 'Nutrition': 'thistle', + 'Radiography': 'lightgray', + 'Other': 'gray' + } + # get scenario color + # scenario_groups = scenario_grouping_coloring(by='effect') + scenario_groups = scenario_grouping_coloring(by='expansion') + scenario_color = {} + for s in param_names: + for k in scenario_groups[1].keys(): + if s in scenario_groups[0][k]: + scenario_color[s] = scenario_groups[1][k] + + # representative_scenarios_color = {} + # cmap_list = list(map(plt.get_cmap("Set3"), range(len(param_names)))) + # for i in range(len(param_names)): + # representative_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i] + + # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\ + # percentage of DALYs averted decides the color of that scatter point + extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index) + extra_budget_allocation['Other'] = extra_budget_allocation[ + ['Dental', 'Laboratory', 'Mental', 'Radiography'] + ].sum(axis=1) + name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}' + heat_data = pd.merge(num_dalys_averted_percent['mean'], + extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']], + left_index=True, right_index=True, how='inner') + # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + colors = [scenario_color[s] for s in heat_data.index] + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'], + alpha=0.8, marker='o', s=heat_data['mean'] * 2000, + #c=heat_data['mean'] * 100, cmap='viridis', + c=colors) + # plot lines from the best point to three axes panes + # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]], + # [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]], + # [0, heat_data['Nursing_and_Midwifery'][0]], + # linestyle='--', color='gray', alpha=0.8) + # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]], + # [0, heat_data['Pharmacy'][0]], + # [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]], + # linestyle='--', color='gray', alpha=0.8) + # ax.plot3D([0, heat_data['Clinical'][0]], + # [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]], + # [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]], + # linestyle='--', color='gray', alpha=0.8) + ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre (C)') + ax.set_ylabel('Pharmacy cadre (P)') + #ax.invert_xaxis() + ax.invert_yaxis() + ax.set_zlabel('Nursing and Midwifery (N&M)') + ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='-', color='orange', alpha=1.0, linewidth=2) + legend_labels = list(scenario_groups[1].keys()) + ['line of C = P = N&M'] + legend_handles = [plt.Line2D([0, 0], [0, 0], + linestyle='none', marker='o', color=scenario_groups[1][label] + ) for label in legend_labels[0:len(legend_labels) - 1] + ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')] + plt.legend(legend_handles, legend_labels, + loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + title='Scenario groups') + # plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25) + plt.title(name_of_plot) + plt.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_0, 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig = plt.figure() + # ax = fig.add_subplot(111, projection='3d') + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', + # c=colors) + # ax.set_xlabel('Services increased %') + # ax.set_ylabel('Treatments increased %') + # ax.set_zlabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], + # alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0], + # c=colors) + # ax.set_xlabel('Services increased %') + # ax.set_ylabel('Treatments increased %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs averted and Services increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', c=colors) + # ax.set_xlabel('Services increased %') + # ax.set_ylabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', c=colors) + # ax.set_xlabel('Treatments increased %') + # ax.set_ylabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2) + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs averted and Services ratio increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', c=colors) + # ax.set_xlabel('Service delivery ratio increased %') + # ax.set_ylabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2) + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres + # outcome_data = num_dalys_averted_percent['mean'] + # # outcome = num_services_increased_percent['mean'] + # # outcome = num_treatments_total_increased_percent['mean'] + # regression_data = pd.merge(outcome_data, + # extra_budget_allocation, + # left_index=True, right_index=True, how='inner') + # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy'] + # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery'] + # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery'] + # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy'] + # * regression_data['Nursing_and_Midwifery']) + # cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other'] + # regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True) + # predictor = regression_data[regression_data.columns[1:]] + # outcome = regression_data['mean'] + # predictor = sm.add_constant(predictor) + # est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit() + # print(est.summary()) + + # todo: could do regression analysis of DALYs averted and Services increased + + # # do anova analysis to test the difference of scenario groups + # def anova_oneway(df=num_dalys_averted_percent): + # best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean'] + # middle_C = df.loc[list(scenario_groups['C + D/N&M/O/None']), 'mean'] + # middle_P = df.loc[list(scenario_groups['P + D/N&M/O/None']), 'mean'] + # worst = df.loc[df.index.isin(scenario_groups['D/N&M/O/None']), 'mean'] + # + # return ss.oneway.anova_oneway((best, middle_C, middle_P, worst), + # groups=None, use_var='unequal', welch_correction=True, trim_frac=0) + + # anova_dalys = anova_oneway() + # anova_services = anova_oneway(num_services_increased_percent) + # anova_treatments = anova_oneway(num_treatments_total_increased_percent) + + # plot absolute numbers for scenarios + + # name_of_plot = f'Deaths, {target_period()}' + # fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs, {target_period()}' + # fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Service demand, {target_period()}' + # fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Service delivery ratio, {target_period()}' + # fig, ax = do_bar_plot_with_ci(ratio_service_summarized) + # ax.set_title(name_of_plot) + # ax.set_ylabel('services delivered / demand') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # # plot yearly DALYs for best 9 scenarios + # name_of_plot = f'Yearly DALYs, {target_period()}' + # fig, ax = plt.subplots(figsize=(9, 6)) + # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1'] + # for s in best_scenarios: + # data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :] + # .drop(columns='scenario') + # .pivot(index='year', columns='stat') + # .droplevel(0, axis=1)) + # ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s], linewidth=2) + # # ax.fill_between(data.index.to_numpy(), + # # (data['lower'] / 1e6).to_numpy(), + # # (data['upper'] / 1e6).to_numpy(), + # # color=best_scenarios_color[s], + # # alpha=0.2) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # ax.set_xticks(data.index) + # legend_labels = [substitute_labels[v] for v in best_scenarios] + # legend_handles = [plt.Rectangle((0, 0), 1, 1, + # color=best_scenarios_color[v]) for v in best_scenarios] + # ax.legend(legend_handles, legend_labels, + # loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5), + # title='Best scenario group') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios + # best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery'] + # name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}' + # fig, ax = plt.subplots(figsize=(9, 6)) + # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1'] + # for s in best_scenarios: + # data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum( + # axis=1) + # ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s]) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Thousands)') + # ax.set_xticks(data.index) + # legend_labels = [substitute_labels[v] for v in best_scenarios] + # legend_handles = [plt.Rectangle((0, 0), 1, 1, + # color=best_scenarios_color[v]) for v in best_scenarios] + # ax.legend(legend_handles, legend_labels, + # loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5), + # title='Best scenario group') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Services by appointment type, {target_period()}' + # num_appts_summarized_in_millions = num_appts_summarized / 1e6 + # yerr_services = np.array([ + # (num_services_summarized['mean'] - num_services_summarized['lower']).values, + # (num_services_summarized['upper'] - num_services_summarized['mean']).values, + # ])/1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Services demand by appointment type, {target_period()}' + # num_appts_demand_to_plot = num_appts_demand_summarized / 1e6 + # yerr_services = np.array([ + # (num_service_demand_summarized['mean'] - num_service_demand_summarized['lower']).values, + # (num_service_demand_summarized['upper'] - num_service_demand_summarized['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_appts_demand_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_service_demand_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_appts_demand_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services by appointment type, {target_period()}' + # num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6 + # yerr_services = np.array([ + # (num_never_ran_services_summarized['mean'] - num_never_ran_services_summarized['lower']).values, + # (num_never_ran_services_summarized['upper'] - num_never_ran_services_summarized['mean']).values, + # ])/1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Total services demand by appointment type, {target_period()}' + # data_to_plot = num_appts_demand_summarized / 1e6 + # yerr_services = np.array([ + # (num_services_demand_summarized['mean'] - num_services_demand_summarized['lower']).values, + # (num_services_demand_summarized['upper'] - num_services_demand_summarized['mean']).values, + # ])/1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # data_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_services_demand_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Services by treatment type, {target_period()}' + # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6 + # yerr_services = np.array([ + # (num_treatments_total_summarized['mean'] - num_treatments_total_summarized['lower']).values, + # (num_treatments_total_summarized['upper'] - num_treatments_total_summarized['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(10, 6)) + # num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services by treatment type, {target_period()}' + # num_never_ran_treatments_summarized_in_millions = num_never_ran_treatments_summarized / 1e6 + # yerr_services = np.array([ + # (num_never_ran_treatments_total_summarized['mean'] - num_never_ran_treatments_total_summarized['lower']).values, + # (num_never_ran_treatments_total_summarized['upper'] - num_never_ran_treatments_total_summarized['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(10, 6)) + # num_never_ran_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_never_ran_treatments_total_summarized['mean'].values / 1e6, + # yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}' + # total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index) + # column_dcsa = total_staff_to_plot.pop('DCSA') + # total_staff_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Thousands', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}' + # data_to_plot = (hcw_time_used / 1e6).reindex(num_dalys_summarized.index) + # column_dcsa = data_to_plot.pop('DCSA') + # data_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Minutes in Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}' + # hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index) + # column_dcsa = hcw_time_gap_to_plot.pop('DCSA') + # hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Minutes in Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'HCW time needed to deliver ran + never ran appointments, {target_period()}' + hcw_time_gap_to_plot = (hcw_time_demand / 1e9).reindex(num_dalys_summarized.index) + column_dcsa = hcw_time_gap_to_plot.pop('DCSA') + hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Billion minutes', fontsize='small') + ax.set(xlabel=None) + xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'HCW cost needed by cadre to deliver never ran appointments, {target_period()}' + # hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index) + # column_dcsa = hcw_cost_gap_to_plot.pop('DCSA') + # hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('USD in Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + # + # name_of_plot = f'Count proportions of never ran appointments that require specific cadres only, {target_period()}' + # data_to_plot = p_count * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + # ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # # plot the average proportions of all scenarios + # # for c in data_to_plot.columns: + # # plt.axhline(y=data_to_plot[c].mean(), + # # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + # + # name_of_plot = f'Cost proportions of never ran appointments that require specific cadres only, {target_period()}' + # data_to_plot = p_cost * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + # ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # # plot the average proportions of all scenarios + # # for c in data_to_plot.columns: + # # plt.axhline(y=data_to_plot[c].mean(), + # # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Time proportions of never ran appointments that require specific cadres only, {target_period()}' + # data_to_plot = p_time * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + # ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # # plot the average proportions of all scenarios + # # for c in data_to_plot.columns: + # # plt.axhline(y=data_to_plot[c].mean(), + # # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + # + # name_of_plot = f'Cost distribution of never ran appointments that require specific cadres only, {target_period()}' + # data_to_plot = a_cost / 1e6 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + # ax.set_ylabel('USD in millions') + # ax.set_xlabel('Extra budget allocation scenario') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # # plot the average cost of all scenarios + # # for c in data_to_plot.columns: + # # plt.axhline(y=data_to_plot[c].mean(), + # # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Time distribution of never ran appointments that require specific cadres only, {target_period()}' + # data_to_plot = a_time / 1e6 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + # ax.set_ylabel('minutes in millions') + # ax.set_xlabel('Extra budget allocation scenario') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # # plot the average cost of all scenarios + # # for c in data_to_plot.columns: + # # plt.axhline(y=data_to_plot[c].mean(), + # # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + # + # name_of_plot = f'HCW cost gap by cadre distribution of never ran appointments, {target_period()}' + # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other'] + # hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + # #ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + # # plot the average proportions of all scenarios + # for c in cadres_to_plot: + # plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(), + # linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2, + # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}' + # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'] + # data_to_plot = never_ran_appts_info_that_need_CNP[3][cadres_to_plot] * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + # #ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + # # plot the average proportions of all scenarios + # for c in cadres_to_plot: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2, + # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}' + data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100 + fig, ax = plt.subplots(figsize=(12, 8)) + data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + #ax.set_ylim(0, 100) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90) + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}' + data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100 + fig, ax = plt.subplots(figsize=(12, 8)) + data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + # ax.set_ylim(0, 100) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90) + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'Extra budget allocation among cadres, {target_period()}' + # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other'] + # extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}' + # total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index) + # column_dcsa = total_cost_to_plot.pop('DCSA') + # total_cost_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs by cause, {target_period()}' + # num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6 + # yerr_dalys = np.array([ + # (num_dalys_summarized['mean'] - num_dalys_summarized['lower']).values, + # (num_dalys_summarized['upper'] - num_dalys_summarized['mean']).values, + # ])/1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # fig.subplots_adjust(right=0.7) + # ax.legend( + # loc="center left", + # bbox_to_anchor=(0.750, 0.6), + # bbox_transform=fig.transFigure, + # title='Cause of death or injury', + # title_fontsize='x-small', + # fontsize='x-small', + # reverse=True, + # ncol=1 + # ) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # plot relative numbers for scenarios + name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}' + fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True) + ax.set_title(name_of_plot) + ax.set_ylabel('Millions') + ax.set_xlabel('Extra budget allocation scenario') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}' + fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True) + ax.set_title(name_of_plot) + ax.set_ylabel('Millions') + ax.set_xlabel('Extra budget allocation scenario') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # todo: plot Deaths averted by cause + + # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}' + # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True) + # ax.set_title(name_of_plot) + # ax.set_ylabel('Percentage') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}' + # extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex( + # num_dalys_summarized.index).drop(['s_1']) / 1e3 + # column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA') + # extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Thousands', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}' + # extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex( + # num_dalys_summarized.index).drop(index='s_0') / 1e6 + # column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA') + # extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}' + # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6 + # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}' + # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6 + # data_to_plot['total'] = data_to_plot.sum(axis=1) + # data_to_plot.sort_values(by='total', inplace=True, ascending=False) + # data_to_plot.drop('total', axis=1, inplace=True) + # data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery', + # 'DCSA', 'Laboratory', 'Mental', 'Radiography']] + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Millions Minutes') + # ax.set_xlabel('Treatment') + # ax.set_xticklabels(data_to_plot.index, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + # ':', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Time used increased by treatment and cadre: C + N&M + P vs no expansion, {target_period()}' + # # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}' + # data_to_plot = data_to_plot.T + # data_to_plot = data_to_plot.add_suffix('*') + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # ax.set_ylabel('Millions Minutes') + # ax.set_xlabel('Treatment') + # ax.set_xticklabels(data_to_plot.index, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + # ':', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}' + # data_to_plot = num_dalys_by_cause_averted_CNP / 1e6 + # # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}' + # # data_to_plot = num_dalys_by_cause_averted_CP / 1e6 + # fig, ax = plt.subplots() + # data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values) + # ax.set_ylabel('Millions') + # ax.set_xlabel('Treatment') + # ax.set_xticklabels(data_to_plot.index, rotation=90) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + # ':', '').replace('\n', ''))) + # fig.show() + # plt.close(fig) + # + # name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}' + # data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100 + # fig, ax = plt.subplots() + # data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Treatment') + # ax.set_xticklabels(data_to_plot.index, rotation=90) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + # ':', '').replace('\n', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}' + # num_appts_increased_in_millions = num_appts_increased / 1e6 + # yerr_services = np.array([ + # (num_services_increased['mean'] - num_services_increased['lower']).values, + # (num_services_increased['upper'] - num_services_increased['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name( + # name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + # ) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}' + # num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6 + # # yerr_services = np.array([ + # # (num_services_increased['mean'] - num_services_increased['lower']).values, + # # (num_services_increased['upper'] - num_services_increased['mean']).values, + # # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services, + # # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name( + # name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + # ) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}' + # num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6 + # # yerr_services = np.array([ + # # (num_services_increased['mean'] - num_services_increased['lower']).values, + # # (num_services_increased['upper'] - num_services_increased['mean']).values, + # # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services, + # # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name( + # name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + # ) + # fig.show() + # plt.close(fig) + + name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}' + data_to_plot = num_treatments_increased / 1e6 + yerr_services = np.array([ + (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values, + (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values, + ]) / 1e6 + fig, ax = plt.subplots(figsize=(10, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services, + fmt=".", color="black", zorder=100) + ax.set_ylabel('Millions', fontsize='small') + ax.set(xlabel=None) + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW time-used increased by treatment type \nvs no extra budget allocation, {target_period()}' + data_to_plot = hcw_time_increased_by_treatment_type / 1e9 + fig, ax = plt.subplots(figsize=(10, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + ax.set_ylabel('Billion minutes', fontsize='small') + ax.set(xlabel=None) + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW time-used increased by cadre \nvs no extra budget allocation, {target_period()}' + data_to_plot = hcw_time_used_increased / 1e9 + column_dcsa = data_to_plot.pop('DCSA') + data_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Billion minutes', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW capabilities increased by cadre \nvs no extra budget allocation, {target_period()}' + data_to_plot = hcw_time_capabilities_increased / 1e9 + column_dcsa = data_to_plot.pop('DCSA') + data_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Billion minutes', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW time - used, needed, capabilities rescaled, capabilities - by cadre \nvs no extra budget allocation, {target_period()}' + # name_of_plot = f'HCW time - used, needed - by cadre \nvs no extra budget allocation, {target_period()}' + assert (hcw_time_used.index == hcw_time_capabilities_rescaled.index).all().all() + assert (hcw_time_used.index == hcw_time_demand.index).all().all() + assert (hcw_time_used.index == hcw_time_capabilities.index).all().all() + assert (hcw_time_used.columns == hcw_time_capabilities_rescaled.columns).all().all() + assert (hcw_time_used.columns == hcw_time_demand.columns).all().all() + assert (hcw_time_used.columns == hcw_time_capabilities.columns).all().all() + use_to_plot = hcw_time_used / 1e9 + cap_to_plot = hcw_time_capabilities / 1e9 + cap_rescaled_to_plot = hcw_time_capabilities_rescaled / 1e9 + demand_to_plot = hcw_time_demand / 1e9 + fig, ax = plt.subplots(figsize=(8, 5)) + use_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=1.0, position=3, + width=0.15, edgecolor='dimgrey', rot=0, ax=ax) + cap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.3, position=0, + width=0.15, edgecolor='dimgrey', rot=0, ax=ax) + cap_rescaled_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.6, position=1, + width=0.15, edgecolor='dimgrey', rot=0, ax=ax) + demand_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.8, position=2, + width=0.15, edgecolor='dimgrey', rot=0, ax=ax) + ax.set_xlim(right=len(use_to_plot) - 0.45) + ax.set_ylabel('Billion minutes', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in use_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + legend_1 = plt.legend(use_to_plot.columns, loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize='small', + title='Officer category', title_fontsize='small', reverse=True) + fig.add_artist(legend_1) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}' + num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6 + yerr_dalys = np.array([ + (num_dalys_averted['mean'] - num_dalys_averted['lower']).values, + (num_dalys_averted['upper'] - num_dalys_averted['mean']).values, + ]) / 1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax) + ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys, + fmt=".", color="black", zorder=100) + ax.set_ylabel('Millions', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + fig.subplots_adjust(right=0.7) + ax.legend( + loc="center left", + bbox_to_anchor=(0.750, 0.6), + bbox_transform=fig.transFigure, + title='Cause of death or injury', + title_fontsize='x-small', + fontsize='x-small', + ncol=1, + reverse=True + ) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # plot ROI and CE for all expansion scenarios + + # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}' + # fig, ax = do_bar_plot_with_ci(ROI) + # ax.set_title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Cost per DALY averted, {target_period()}' + # fig, ax = do_bar_plot_with_ci(CE) + # ax.set_title(name_of_plot) + # ax.set_ylabel('USD dollars') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # todo + # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \ + # (around the best possible extra budget allocation scenario)? + # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \ + # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \ + # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered. + # To plot time series of staff and budget in the target period to show \ + # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)? + # Before submit a run, merge in the remote master. + # Think about a measure of Universal Health Service Coverage for the scenarios? + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("results_folder", type=Path) # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z + args = parser.parse_args() + + # Produce results for short-term analysis: 5 years + + # # 2015-2019, before change, incl. mode, hr expansion, etc. + # apply( + # results_folder=args.results_folder, + # output_folder=args.results_folder, + # resourcefilepath=Path('./resources'), + # the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31)) + # ) + # + # # 2020-2024 + # apply( + # results_folder=args.results_folder, + # output_folder=args.results_folder, + # resourcefilepath=Path('./resources'), + # the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31)) + # ) + + # Produce results for long-term analysis: 10 years + # 2020-2029 + apply( + results_folder=args.results_folder, + output_folder=args.results_folder, + resourcefilepath=Path('./resources'), + the_target_period=(Date(2019, 1, 1), Date(2029, 12, 31)) + ) diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py new file mode 100644 index 0000000000..1685a3dcaa --- /dev/null +++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py @@ -0,0 +1,2926 @@ +""" +This file analyses and plots the services, DALYs, Deaths within different scenarios of expanding current hr by officer +type given some extra budget. Return on investment and marginal productivity of each officer type will be examined. + +The scenarios are defined in scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py. +""" + +import argparse +from collections import Counter +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +import statsmodels.api as sm +# import statsmodels.stats as ss +from matplotlib import pyplot as plt + +from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import ( + Minute_Salary_by_Cadre_Level, + avg_increase_rate_exp, + extra_budget_fracs, +) +from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import ( + HRHExpansionByCadreWithExtraBudget, +) +from tlo import Date +from tlo.analysis.utils import ( + APPT_TYPE_TO_COARSE_APPT_TYPE_MAP, + CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + COARSE_APPT_TYPE_TO_COLOR_MAP, + SHORT_TREATMENT_ID_TO_COLOR_MAP, + bin_hsi_event_details, + compute_mean_across_runs, + extract_results, + summarize, +) + +# rename scenarios +substitute_labels = { + 's_0': 'no_allocation', + 's_1': 'current_allocation', + 's_2': 'gap_allocation', + 's_3': 'C = P = NM = D = O', + 's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (NM)', 's_7': 'Pharmacy (P)', + 's_8': 'Other (O)', + 's_9': 'C = D', 's_10': 'C = NM', 's_11': 'C = P', 's_12': 'C = O', 's_13': 'NM = D', + 's_14': 'P = D', 's_15': 'D = O', 's_16': 'P = NM', 's_17': 'NM = O', 's_18': 'P = O', + 's_19': 'C = NM = D', 's_20': 'C = P = D', 's_21': 'C = D = O', 's_22': 'C = P = NM', 's_23': 'C = NM = O', + 's_24': 'C = P = O', 's_25': 'P = NM = D', 's_26': 'NM = D = O', 's_27': 'P = D = O', 's_28': 'P = NM = O', + 's_29': 'C = P = NM = D', 's_30': 'C = NM = D = O', 's_31': 'C = P = D = O', 's_32': 'C = P = NM = O', + 's_33': 'P = NM = D = O', + 's_*': 'optimal_allocation' +} + +# grouping causes of DALYs and types of treatments +cause_group = { + 'AIDS': 'HIV/AIDS', + 'TB (non-AIDS)': 'TB (non-AIDS)', + 'Malaria': 'Malaria', + 'Childhood Diarrhoea': 'RMNCH', + 'Congenital birth defects': 'RMNCH', + 'Lower respiratory infections': 'RMNCH', + 'Maternal Disorders': 'RMNCH', + 'Measles': 'RMNCH', + 'Neonatal Disorders': 'RMNCH', + 'Schistosomiasis': 'RMNCH', + 'COPD': 'NCDs', + 'Cancer (Bladder)': 'NCDs', + 'Cancer (Breast)': 'NCDs', + 'Cancer (Oesophagus)': 'NCDs', + 'Cancer (Other)': 'NCDs', + 'Cancer (Prostate)': 'NCDs', + 'Depression / Self-harm': 'NCDs', + 'Diabetes': 'NCDs', + 'Epilepsy': 'NCDs', + 'Heart Disease': 'NCDs', + 'Kidney Disease': 'NCDs', + 'Lower Back Pain': 'NCDs', + 'Stroke': 'NCDs', + 'Transport Injuries': 'Transport Injuries', + 'Other': 'Other', +} +cause_group_color = { + 'HIV/AIDS': 'deepskyblue', + 'TB (non-AIDS)': 'mediumslateblue', + 'Malaria': 'khaki', + 'RMNCH': 'mediumaquamarine', + 'NCDs': 'violet', + 'Transport Injuries': 'lightsalmon', + 'Other': 'dimgrey', +} + +treatment_group = { + 'Alri*': 'RMNCH', + 'AntenatalCare*': 'RMNCH', + 'BladderCancer*': 'NCDs', + 'BreastCancer*': 'NCDs', + 'CardioMetabolicDisorders*': 'NCDs', + 'Contraception*': 'RMNCH', + 'Copd*': 'NCDs', + 'DeliveryCare*': 'RMNCH', + 'Depression*': 'NCDs', + 'Diarrhoea*': 'RMNCH', + 'Epi*': 'RMNCH', + 'Epilepsy*': 'NCDs', + 'FirstAttendance*': 'First Attendance', + 'Hiv*': 'HIV/AIDS', + 'Inpatient*': 'Inpatient', + 'Malaria*': 'Malaria', + 'Measles*': 'RMNCH', + 'OesophagealCancer*': 'NCDs', + 'OtherAdultCancer*': 'NCDs', + 'PostnatalCare*': 'RMNCH', + 'ProstateCancer*': 'NCDs', + 'Rti*': 'Transport Injuries', + 'Schisto*': 'RMNCH', + 'Tb*': 'TB (non-AIDS)', + 'Undernutrition*': 'RMNCH', +} +treatment_group_color = { + 'HIV/AIDS': 'deepskyblue', + 'TB (non-AIDS)': 'mediumslateblue', + 'Malaria': 'khaki', + 'RMNCH': 'mediumaquamarine', + 'NCDs': 'violet', + 'Transport Injuries': 'lightsalmon', + 'First Attendance': 'darkgrey', + 'Inpatient': 'lightgrey', +} + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, + the_target_period: Tuple[Date, Date] = None): + """ + Extract results of number of services by appt type, number of DALYs, number of Deaths in the target period. + (To see whether to extract these results by short treatment id and/or disease.) + Calculate the extra budget allocated, extra staff by cadre, return on investment and marginal productivity by cadre. + """ + TARGET_PERIOD = the_target_period + the_cause = 'TB (non-AIDS)' # the cause to investigate for yearly DALYs + # TB (non-AIDS), Transport Injuries, Lower respiratory infections, Transport Injuries + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + e = HRHExpansionByCadreWithExtraBudget() + return tuple(e._scenarios.keys()) + + def get_num_appts(_df): + """Return the number of services by appt type (total within the TARGET_PERIOD)""" + return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code'] + .apply(pd.Series) + .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP) + .groupby(level=0, axis=1).sum() + .sum()) + + def get_num_appts_by_level(_df): + """Return the number of services by appt type and facility level (total within the TARGET_PERIOD)""" + def unpack_nested_dict_in_series(_raw: pd.Series): + return pd.concat( + { + idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items() + } + ).unstack().fillna(0.0).astype(int) + + return _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \ + .pipe(unpack_nested_dict_in_series) \ + .sum(axis=0) + + def get_num_services(_df): + """Return the number of services in total of all appt types (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code'] + .apply(pd.Series).sum().sum() + ) + + def get_num_treatments(_df): + """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)""" + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum() + _df.index = _df.index.map(lambda x: x.split('_')[0] + "*") + _df = _df.groupby(level=0).sum() + return _df + + def get_num_treatments_group(_df): + """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)""" + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum() + _df.index = _df.index.map(lambda x: x.split('_')[0] + "*") + _df = _df.rename(index=treatment_group) + _df = _df.groupby(level=0).sum() + return _df + + def get_num_treatments_total(_df): + """Return the number of treatments in total of all treatments (total within the TARGET_PERIOD)""" + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum() + _df.index = _df.index.map(lambda x: x.split('_')[0] + "*") + _df = _df.groupby(level=0).sum().sum() + return pd.Series(_df) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD)""" + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYS (Stacked) (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + return pd.Series( + data=_df + .loc[_df.year.between(*years_needed)] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_yearly(_df): + """Return total number of DALYS (Stacked) for every year in the TARGET_PERIOD. + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + period = (Date(2010, 1, 1), Date(2034, 12, 31)) + years_needed = [i.year for i in period] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = (_df.loc[_df.year.between(*years_needed)] + .drop(columns=['date', 'sex', 'age_range']) + .groupby('year').sum() + .sum(axis=1) + ) + return _df + + def get_num_dalys_by_one_cause_yearly(_df, one_cause=the_cause): + """Return total number of DALYS by TB (Stacked) for every year in simulation period 2010-2034. + Throw error if not a record for every year in the period (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + period = (Date(2010, 1, 1), Date(2034, 12, 31)) + years_needed = [i.year for i in period] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = (_df.loc[_df.year.between(*years_needed)] + .drop(columns=['date', 'sex', 'age_range']) + .groupby('year').sum() + ) + _df = _df[one_cause] + return _df + + def get_num_dalys_by_cause(_df): + """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + return (_df + .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth']) + .sum(axis=0) + ) + + def get_num_dalys_by_cause_group(_df): + """Return total number of DALYS by cause group (Stacked) (total within the TARGET_PERIOD). + Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using + results from runs that crashed mid-way through the simulation). + """ + years_needed = [i.year for i in TARGET_PERIOD] + assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded." + _df = _df.rename(columns=cause_group) # rename cause as cause group + _df = _df.groupby(_df.columns, axis=1).sum() # group up causes in each cause group + return (_df + .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth']) + .sum(axis=0) + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison_series( + _ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return (_ser + .unstack(level=0) + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) + .drop(columns=([comparison] if drop_comparison else [])) + .stack() + ) + + def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs): + """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe""" + return pd.concat({ + _idx: find_difference_relative_to_comparison_series(row, **kwargs) + for _idx, row in _df.iterrows() + }, axis=1).T + + # group scenarios for presentation + def scenario_grouping_coloring(by='effect'): + if by == 'effect': # based on DALYs averted/whether to expand Clinical + Pharmacy + grouping = { + 'C & P & D/NM/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32', + 's_*'}, + 'C & D/NM/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'}, + 'P & D/NM/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'}, + 'D/O/None': {'s_5', 's_8', 's_15', 's_0'}, + 'NM & D/O/None': {'s_6', 's_13', 's_17', 's_26'}, + } + grouping_color = { + 'D/O/None': 'silver', + 'NM & D/O/None': 'lightpink', + 'P & D/NM/O/None': 'violet', + 'C & D/NM/O/None': 'darkorchid', + 'C & P & D/NM/O/None': 'darkturquoise', + } + elif by == 'allocation': + grouping = { + 'D/O': {'s_5', 's_8', 's_15'}, + 'C & D/O/None': {'s_4', 's_9', 's_12', 's_21'}, + 'P & D/O/None': {'s_7', 's_14', 's_18', 's_27'}, + 'NM & D/O/None': {'s_6', 's_13', 's_17', 's_26'}, + 'C & P & D/O/None': {'s_11', 's_20', 's_24', 's_31'}, + 'C & NM & D/O/None': {'s_10', 's_19', 's_23', 's_30'}, + 'P & NM & D/O/None': {'s_16', 's_25', 's_28', 's_33'}, + 'C & P & NM & D/O/None': {'s_3', 's_22', 's_29', 's_32'}, + 'gap_allocation': {'s_2'}, + 'current_allocation': {'s_1'}, + 'optimal_allocation': {'s_*'}, + 'no_allocation': {'s_0'}, + } + keys = ['gap_allocation', 'C & P & NM & D/O/None', 'C & P & D/O/None', + 'current_allocation', 'C & NM & D/O/None', 'C & D/O/None', 'P & NM & D/O/None', + 'P & D/O/None', 'NM & D/O/None', 'D/O', 'optimal_allocation', 'no_allocation', + ] + cmap_list = list(map(plt.get_cmap("Set3"), range(len(keys)))) + grouping_color = {keys[idx]: cmap_list[idx] for idx in range(len(keys))} + # grouping_color = { + # 'D/O': 'silver', + # 'C & D/O/None': 'lightskyblue', + # 'P & D/O/None': 'lightgreen', + # 'NM & D/O/None': 'lightpink', + # 'C & P & D/O/None': 'khaki', + # 'C & NM & D/O/None': 'violet', + # 'P & NM & D/O/None': 'burlywood', + # 'C & P & NM & D/O/None': 'darkturquoise', + # 'gap_allocation': 'yellowgreen', + # 'current_allocation': 'thistle', + # 'optimal_allocation': 'gold', + # 'no_allocation': 'lavender', + # } + elif by == 'allocation_alt': # based on how many cadres are expanded + grouping = { + 'no_allocation': {'s_0'}, + '5_cadres_equal_allocation': {'s_3'}, + 'gap_allocation': {'s_2'}, + 'current_allocation': {'s_1'}, + 'optimal_allocation': {'s_*'}, + '1_cadre_allocation': {'s_4', 's_5', 's_6', 's_7', 's_8'}, + '2_cadres_equal_allocation': {'s_9', 's_10', 's_11', 's_12', 's_13', + 's_14', 's_15', 's_16', 's_17', 's_18'}, + '3_cadres_equal_allocation': {'s_19', 's_20', 's_21', 's_22', 's_23', + 's_24', 's_25', 's_26', 's_27', 's_28'}, + '4_cadres_equal_allocation': {'s_29', 's_30', 's_31', 's_32', 's_33'} + + } + # grouping_color = { + # 'no_allocation': 'gray', + # '1_cadre_allocation': 'lightpink', + # '2_cadres_equal_allocation': 'violet', + # '3_cadres_equal_allocation': 'darkorchid', + # '4_cadres_equal_allocation': 'paleturquoise', + # '5_cadres_equal_allocation': 'darkturquoise', + # '5_cadres_current_allocation': 'deepskyblue', + # '5_cadres_gap_allocation': 'royalblue', + # '5_cadres_optimal_allocation': 'khaki', + # } + keys = ['gap_allocation', 'current_allocation', + '5_cadres_equal_allocation', '4_cadres_equal_allocation', '3_cadres_equal_allocation', + '2_cadres_equal_allocation', '1_cadre_allocation', 'optimal_allocation', 'no_allocation'] + cmap_list = list(map(plt.get_cmap("Set3"), range(len(keys)))) + grouping_color = {keys[idx]: cmap_list[idx] for idx in range(len(keys))} + return grouping, grouping_color + + def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False): + """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the + extent of the error bar. + Annotated with percent statistics from _df_percent, if annotation=True and _df_percent not None.""" + + yerr = np.array([ + (_df['mean'] - _df['lower']).values, + (_df['upper'] - _df['mean']).values, + ]) + + xticks = {(i + 0.5): k for i, k in enumerate(_df.index)} + + colors = [scenario_color[s] for s in _df.index] + + fig, ax = plt.subplots(figsize=(9, 6)) + ax.bar( + xticks.keys(), + _df['mean'].values, + yerr=yerr, + alpha=0.8, + ecolor='dimgrey', + color=colors, + capsize=6, + label=xticks.values(), + zorder=100, + ) + + if annotation: + assert (_df.index == _df_percent.index).all() + for xpos, ypos, text1, text2, text3 in zip(xticks.keys(), _df['upper'].values, + _df_percent['mean'].values, + _df_percent['lower'].values, + _df_percent['upper'].values): + text = f"{int(round(text1 * 100, 2))}%" # \n{[round(text2, 2),round(text3, 2)]}" + ax.text(xpos, ypos + 0.2, text, horizontalalignment='center', fontsize='x-small') + + ax.set_xticks(list(xticks.keys())) + + xtick_label_detail = [substitute_labels[v] for v in xticks.values()] + ax.set_xticklabels(xtick_label_detail, rotation=90, fontsize='medium') + + legend_labels = list(scenario_groups[1].keys())[:-2] + legend_handles = [plt.Rectangle((0, 0), 1, 1, + color=scenario_groups[1][label]) for label in legend_labels] + ax.legend(legend_handles, legend_labels, ncol=2, # loc='center left', bbox_to_anchor=(1, 0.5), + title='Scenario groups') + + ax.grid(axis="y") + ax.set_ylim((None, 12)) + # ax.spines['top'].set_visible(False) + # ax.spines['right'].set_visible(False) + fig.tight_layout() + + return fig, ax + + # def get_scale_up_factor(_df): + # """ + # Return a series of yearly scale up factors for all cadres, + # with index of year and value of list of scale up factors. + # """ + # _df['year'] = _df['date'].dt.year + # _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year', 'scale_up_factor'] + # ].set_index('year') + # _df = _df['scale_up_factor'].apply(pd.Series) + # assert (_df.columns == cadres).all() + # _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index} + # _df_1 = pd.DataFrame(data=_dict).T + # return pd.Series( + # _df_1.loc[:, 0], index=_df_1.index + # ) + + def get_total_cost(_df): + """ + Return a series of yearly total cost for all cadres, + with index of year and values of list of total cost. + """ + _df['year'] = _df['date'].dt.year + _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year', 'total_hr_salary']].set_index('year') + _df = _df['total_hr_salary'].apply(pd.Series) + assert (_df.columns == cadres).all() + _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index} + _df_1 = pd.DataFrame(data=_dict).T + return pd.Series( + _df_1.loc[:, 0], index=_df_1.index + ) + + def get_yearly_hr_count(_df): + """ + Return a series of yearly total cost for all cadres, + with index of year and values of list of total cost. + """ + # format + _df['year'] = _df['date'].dt.year + _df = _df.drop(columns='date').set_index('year').fillna(0) + _df.columns = _df.columns.map(lambda x: x.split('_')[-1]) + _df.rename(columns={'Midwifery': 'Nursing_and_Midwifery'}, inplace=True) + _df = _df.groupby(level=0, axis=1).sum() + assert set(_df.columns) == set(cadres) + _df = _df[cadres] + # get multiplier for popsize=100,000: 145.39609000000002 + _df = _df * 145.39609000000002 + # reformat as a series + _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index} + _df_1 = pd.DataFrame(data=_dict).T + return pd.Series( + _df_1.loc[:, 0], index=_df_1.index + ) + + def get_current_hr(cadres): + """ + Return current (year of 2018/2019) staff counts and capabilities for the cadres specified. + """ + curr_hr_path = Path(resourcefilepath + / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv') + curr_hr = pd.read_csv(curr_hr_path).groupby('Officer_Category').agg( + {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index() + curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25 + curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True) + curr_hr = curr_hr.loc[ + curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count'] + ].set_index('Officer_Category').T + return curr_hr[cadres] + + def get_hr_salary(cadres): + """ + Return annual salary for the cadres specified. + """ + salary_path = Path(resourcefilepath + / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv') + salary = pd.read_csv(salary_path, index_col=False) + salary = salary.loc[ + salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD'] + ].set_index('Officer_Category').T + return salary[cadres] + + def format_appt_time_and_cost(): + """ + Return the formatted appointment time requirements and costs per cadre + """ + file_path = Path(resourcefilepath + / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv') + _df = pd.read_csv(file_path, index_col=False) + + time = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category', + values='Time_Taken_Mins').fillna(0.0).T + minute_salary = Minute_Salary_by_Cadre_Level + cost = _df.merge(minute_salary, on=['Facility_Level', 'Officer_Category'], how='left') + cost['cost_USD'] = cost['Time_Taken_Mins'] * cost['Minute_Salary_USD'] + cost = cost.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category', + values='cost_USD').fillna(0.0).T + + return time, cost + + def get_frac_of_hcw_time_used(_df): + """Return the fraction of time used by cadre and facility level""" + CNP_cols = ['date'] + for col in _df.columns[1:]: + if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col): + CNP_cols.append(col) + + _df = _df[CNP_cols].copy() + _df = _df.loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), :] + _df = _df.set_index('date').mean(axis=0) # average over years + + return _df + + def get_hcw_time_by_treatment(): + appointment_time_table = pd.read_csv( + resourcefilepath + / 'healthsystem' + / 'human_resources' + / 'definitions' + / 'ResourceFile_Appt_Time_Table.csv', + index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"] + ) + + appt_type_facility_level_officer_category_to_appt_time = ( + appointment_time_table.Time_Taken_Mins.to_dict() + ) + + officer_categories = appointment_time_table.index.levels[ + appointment_time_table.index.names.index("Officer_Category") + ].to_list() + + times_by_officer_category_treatment_id_per_draw_run = bin_hsi_event_details( + results_folder, + lambda event_details, count: sum( + [ + Counter({ + ( + officer_category, + event_details["treatment_id"].split("_")[0] + ): + count + * appt_number + * appt_type_facility_level_officer_category_to_appt_time.get( + ( + appt_type, + event_details["facility_level"], + officer_category + ), + 0 + ) + for officer_category in officer_categories + }) + for appt_type, appt_number in event_details["appt_footprint"] + ], + Counter() + ), + *TARGET_PERIOD, + True + ) + + time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run) + + # transform counter to dataframe + def format_time_by_cadre_treatment(_df): + _df.reset_index(drop=False, inplace=True) + for idx in _df.index: + _df.loc[idx, 'Cadre'] = _df.loc[idx, 'index'][0] + _df.loc[idx, 'Treatment'] = _df.loc[idx, 'index'][1] + _df = _df.drop('index', axis=1).rename(columns={0: 'value'}).pivot( + index='Treatment', columns='Cadre', values='value').fillna(0.0) + + _series = _df.sum(axis=1) # sum up cadres + + return _df, _series + + # time_by_cadre_treatment_all_scenarios = { + # f's_{key}': format_time_by_cadre_treatment( + # pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index') + # )[0] for key in range(len(param_names)) + # } + + time_by_treatment_all_scenarios = { + f's_{key}': format_time_by_cadre_treatment( + pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index') + )[1] for key in range(len(param_names)) + + } + time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T + + # rename scenarios according to param_names + time_by_treatment_all_scenarios.rename( + index={time_by_treatment_all_scenarios.index[i]: param_names[i] + for i in range(len(time_by_treatment_all_scenarios.index))}, inplace=True) + + time_increased_by_treatment = time_by_treatment_all_scenarios.subtract( + time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*') + + return time_increased_by_treatment + + # Get parameter/scenario names + param_names = tuple(extra_budget_fracs.drop(columns='s_*')) + # param_names = get_parameter_names_from_scenario_file() + + # Define cadres in order + cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'] + + # Get appointment time and cost requirement + appt_time, appt_cost = format_appt_time_and_cost() + + # # Get scale up factors for all scenarios + # scale_up_factors = extract_results( + # results_folder, + # module='tlo.methods.healthsystem.summary', + # key='HRScaling', + # custom_generate_series=get_scale_up_factor, + # do_scaling=False + # ).pipe(set_param_names_as_column_index_level_0).stack(level=0) + # # check that the scale up factors are all most the same between each run within each draw + # # assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all() + # # keep scale up factors of only one run within each draw + # scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='index') + # scale_up_factors[cadres] = scale_up_factors.value.tolist() + # scale_up_factors.drop(columns='value', inplace=True) + + # Get total cost for all scenarios + total_cost = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HRScaling', + custom_generate_series=get_total_cost, + do_scaling=False + ).pipe(set_param_names_as_column_index_level_0).stack(level=0) + total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index') + total_cost[cadres] = total_cost.value.tolist() + total_cost.drop(columns='value', inplace=True) + total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1) + total_cost.rename(columns={'index': 'year'}, inplace=True) + + # total cost of all expansion years + total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year') + + # total extra cost of all expansion years + extra_cost_all_yrs = total_cost_all_yrs.copy() + for s in param_names[1:]: + extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :] + extra_cost_all_yrs.drop(index='s_0', inplace=True) + + # get yearly hr count + yearly_hr_count = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='number_of_hcw_staff', + custom_generate_series=get_yearly_hr_count, + do_scaling=False + ).pipe(set_param_names_as_column_index_level_0).stack(level=0) + # check that the staff counts are the same between each run within each draw + for i in range(len(yearly_hr_count.index)): + for j in yearly_hr_count.columns[1:]: + for k in range(len(cadres)): + assert abs(yearly_hr_count.iloc[i, j][k] - yearly_hr_count.iloc[i, 0][k]) < 1/1e8 + # store results for only one run per draw + yearly_hr_count = yearly_hr_count.iloc[:, 0].unstack().reset_index().melt(id_vars='index') + yearly_hr_count[cadres] = yearly_hr_count.value.tolist() + yearly_hr_count.drop(columns='value', inplace=True) + yearly_hr_count['all_cadres'] = yearly_hr_count[[c for c in yearly_hr_count.columns if c in cadres]].sum(axis=1) + yearly_hr_count.rename(columns={'index': 'year'}, inplace=True) + + # get extra count = staff count - staff count of no expansion s_1 + # note that annual staff increase rate = scale up factor - 1 + extra_staff = yearly_hr_count.drop( + yearly_hr_count[yearly_hr_count.year.isin(range(2010, 2024))].index, axis=0 + ).reset_index(drop=True) + staff_increase_rate = extra_staff.copy() + staff_2024 = pd.DataFrame(extra_staff.loc[(extra_staff.year == 2024) + & (extra_staff.draw == 's_0'), :]) + for i in extra_staff.index: + extra_staff.iloc[i, 2:] = extra_staff.iloc[i, 2:] - staff_2024.iloc[0, 2:] + staff_increase_rate.iloc[i, 2:] = (extra_staff.iloc[i, 2:] / staff_2024.iloc[0, 2:]) + # checked that this is slightly different with hr_increase_rates from preparation script, due the calculation + # process are not the same + + # check total cost calculated is increased as expected + # also checked (in excel) that the yearly_hr_count (s_0 and s_1) are expanded as expected + years = range(2025, the_target_period[1].year + 1) + budget_growth_rate = 0.042 # 0.042, 0.058, 0.026 + for s in param_names[1:]: + assert (abs( + total_cost.loc[(total_cost.year == 2034) & (total_cost.draw == s), 'all_cadres'].values[0] - + (1 + budget_growth_rate) ** len(years) * total_cost.loc[ + (total_cost.year == 2025) & (total_cost.draw == 's_0'), 'all_cadres'].values[0] + ) < 1e-6).all() + + # Absolute Number of Deaths and DALYs and Services + num_deaths = extract_results( + results_folder, + module='tlo.methods.demography', + key='death', + custom_generate_series=get_num_deaths, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_dalys = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_dalys_yearly = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_yearly, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_dalys_by_one_cause_yearly = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_one_cause_yearly, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_dalys_by_cause = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_by_wealth_stacked_by_age_and_time", + custom_generate_series=get_num_dalys_by_cause, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0) + + num_dalys_by_cause_group = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_by_wealth_stacked_by_age_and_time", + custom_generate_series=get_num_dalys_by_cause_group, + do_scaling=True, + ).pipe(set_param_names_as_column_index_level_0) + + num_appts = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_appts, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_appts_by_level = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_appts_by_level, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_services = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_services, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_treatments = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_treatments, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_treatments_group = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_treatments_group, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_treatments_total = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event_non_blank_appt_footprint', + custom_generate_series=get_num_treatments_total, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_never_ran_appts = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_num_appts, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_never_ran_appts_by_level = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_num_appts_by_level, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + num_never_ran_services = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_num_services, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + # num_never_ran_treatments_total = extract_results( + # results_folder, + # module='tlo.methods.healthsystem.summary', + # key='Never_ran_HSI_Event', + # custom_generate_series=get_num_treatments_total, + # do_scaling=True + # ).pipe(set_param_names_as_column_index_level_0) + + # num_never_ran_treatments = extract_results( + # results_folder, + # module='tlo.methods.healthsystem.summary', + # key='Never_ran_HSI_Event', + # custom_generate_series=get_num_treatments, + # do_scaling=True + # ).pipe(set_param_names_as_column_index_level_0) + + # get total service demand + assert len(num_services) == len(num_never_ran_services) == 1 + assert (num_services.columns == num_never_ran_services.columns).all() + num_services_demand = num_services + num_never_ran_services + # ratio_services = num_services / num_services_demand + + assert (num_appts.columns == num_never_ran_appts.columns).all() + num_never_ran_appts.loc['Lab / Diagnostics', :] = 0 + num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0) + assert (num_appts.index == num_never_ran_appts.index).all() + num_appts_demand = num_appts + num_never_ran_appts + + hcw_time_usage = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel', + custom_generate_series=get_frac_of_hcw_time_used, + do_scaling=False + ).pipe(set_param_names_as_column_index_level_0) + + # get absolute numbers for scenarios + # sort the scenarios according to their DALYs values, in ascending order + num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean') + num_dalys_summarized.to_csv(output_folder / 'num_dalys_summarized.csv') + num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_dalys_by_cause_summarized.to_csv(output_folder / 'num_dalys_by_cause_summarized.csv') + num_dalys_by_cause_group_summarized = summarize(num_dalys_by_cause_group, only_mean=True + ).T.reindex(param_names).reindex(num_dalys_summarized.index) + num_dalys_by_cause_group_summarized.to_csv(output_folder / 'num_dalys_by_cause_group_summarized.csv') + + num_dalys_yearly_summarized = (summarize(num_dalys_yearly) + .stack([0, 1]) + .rename_axis(['year', 'scenario', 'stat']) + .reset_index(name='count')) + num_dalys_yearly_summarized.to_csv(output_folder / 'num_dalys_yearly_summarized.csv') + + num_dalys_by_one_cause_yearly_summarized = (summarize(num_dalys_by_one_cause_yearly) + .stack([0, 1]) + .rename_axis(['year', 'scenario', 'stat']) + .reset_index(name='count')) + name_of_data = f'num_dalys_by_{the_cause}_yearly_summarized.csv' + num_dalys_by_one_cause_yearly_summarized.to_csv(output_folder / name_of_data) + + # num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + + num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_services_summarized.to_csv(output_folder / 'num_services_summarized.csv') + num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index).fillna(0.0) + num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex( + param_names).reindex(num_dalys_summarized.index).fillna(0.0) + num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_treatments_group_summarized = summarize(num_treatments_group, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_treatments_group_summarized.to_csv(output_folder / 'num_treatments_area_summarized.csv') + # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + + num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex( + num_dalys_summarized.index + ) + num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex( + num_dalys_summarized.index + ) + # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex( + # num_dalys_summarized.index + # ) + hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex( + num_dalys_summarized.index + ) + hcw_time_usage_summarized.columns = [col.replace('OfficerType=', '').replace('FacilityLevel=', '') + for col in hcw_time_usage_summarized.columns] + hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True) + + # get relative numbers for scenarios, compared to no_expansion scenario: s_0 + num_services_increased = summarize( + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_services.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0']) + + num_services_increased_percent = summarize( + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_services.loc[0], + comparison='s_0', + scaled=True) + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + num_services_increased_percent.to_csv(output_folder / 'num_services_increased_percent.csv') + + num_deaths_averted = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_deaths.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_deaths_averted_percent = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_deaths.loc[0], + comparison='s_0', + scaled=True) + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_dalys_averted = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_dalys.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + num_dalys_averted.to_csv(output_folder / 'num_dalys_averted.csv') + + num_dalys_averted_percent = summarize( + -1.0 * + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_dalys.loc[0], + comparison='s_0', + scaled=True + ) + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + num_dalys_averted_percent.to_csv(output_folder / 'num_dalys_averted_percent.csv') + + num_dalys_by_cause_averted = summarize( + -1.0 * find_difference_relative_to_comparison_dataframe( + num_dalys_by_cause, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + num_dalys_by_cause_group_averted = summarize( + -1.0 * find_difference_relative_to_comparison_dataframe( + num_dalys_by_cause_group, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + num_dalys_by_cause_group_averted.to_csv(output_folder / 'num_dalys_by_cause_area_averted.csv') + + num_dalys_by_cause_averted_percent = summarize( + -1.0 * find_difference_relative_to_comparison_dataframe( + num_dalys_by_cause, + comparison='s_0', + scaled=True + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_dalys_by_cause_group_averted_percent = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_dalys_by_cause_group, + # comparison='s_0', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False) + # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False) + num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_22', :].sort_values( + ascending=False) + # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values( + # ascending=False) + + # num_dalys_by_cause_averted_percent = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_dalys_by_cause, + # comparison='s_1', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + num_appts_increased = summarize( + find_difference_relative_to_comparison_dataframe( + num_appts, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + + # num_never_ran_appts_reduced = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_never_ran_appts, + # comparison='s_1', + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + # num_never_ran_treatments_reduced = summarize( + # -1.0 * find_difference_relative_to_comparison_dataframe( + # num_never_ran_treatments, + # comparison='s_1', + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + # num_appts_increased_percent = summarize( + # find_difference_relative_to_comparison_dataframe( + # num_appts, + # comparison='s_1', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + num_treatments_increased = summarize( + find_difference_relative_to_comparison_dataframe( + num_treatments, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + num_treatments_increased.to_csv(output_folder / 'num_treatments_type_increased.csv') + + num_treatments_group_increased = summarize( + find_difference_relative_to_comparison_dataframe( + num_treatments_group, + comparison='s_0', + ), + only_mean=True + ).T.reindex(num_dalys_summarized.index).drop(['s_0']) + num_treatments_group_increased.to_csv(output_folder / 'num_treatments_area_increased.csv') + + # num_treatments_increased_percent = summarize( + # find_difference_relative_to_comparison_dataframe( + # num_treatments, + # comparison='s_1', + # scaled=True + # ), + # only_mean=True + # ).T.reindex(num_dalys_summarized.index).drop(['s_1']) + + num_treatments_total_increased = summarize( + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_treatments_total.loc[0], + comparison='s_0') + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + + num_treatments_total_increased_percent = summarize( + pd.DataFrame( + find_difference_relative_to_comparison_series( + num_treatments_total.loc[0], + comparison='s_0', + scaled=True) + ).T + ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0']) + num_treatments_total_increased_percent.to_csv(output_folder / 'num_treatments_total_increased_%.csv') + + # service_ratio_increased = summarize( + # pd.DataFrame( + # find_difference_relative_to_comparison_series( + # ratio_services.loc[0], + # comparison='s_1') + # ).T + # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1']) + + # service_ratio_increased_percent = summarize( + # pd.DataFrame( + # find_difference_relative_to_comparison_series( + # ratio_services.loc[0], + # comparison='s_1', + # scaled=True) + # ).T + # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1']) + + # Check that when we sum across the causes/appt types, + # we get the same total as calculated when we didn't split by cause/appt type. + assert ( + (num_appts_increased.sum(axis=1).sort_index() + - num_services_increased['mean'].sort_index() + ) < 1e-6 + ).all() + + assert ( + (num_dalys_by_cause_averted.sum(axis=1).sort_index() + - num_dalys_averted['mean'].sort_index() + ) < 1e-6 + ).all() + + assert ( + (num_dalys_by_cause_group_averted.sum(axis=1).sort_index() + - num_dalys_averted['mean'].sort_index() + ) < 1e-6 + ).all() + + assert ( + (num_treatments_increased.sum(axis=1).sort_index() + - num_treatments_total_increased['mean'].sort_index() + ) < 1e-6 + ).all() + + assert ( + (num_treatments_group_increased.sum(axis=1).sort_index() + - num_treatments_total_increased['mean'].sort_index() + ) < 1e-6 + ).all() + + # get time used by services delivered + def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_summarized): + cols_1 = count_df.columns + cols_2 = time_cost_df.columns + # check that appts (at a level) not in appt_time (as defined) have count 0 and drop them + # assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() -> ('2', 'Tomography') + # replace Tomography from level 2 to level 3 + count_df.loc[:, ('3', 'Tomography')] += count_df.loc[:, ('2', 'Tomography')] + count_df.loc[:, ('2', 'Tomography')] = 0 + assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() + if len(list(set(cols_1) - set(cols_2))) > 0: + _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2))) + else: + _count_df = count_df.copy() + assert set(_count_df.columns).issubset(set(cols_2)) + # calculate hcw time gap + use = pd.DataFrame(index=_count_df.index, + columns=time_cost_df.index) + for i in use.index: + for j in use.columns: + use.loc[i, j] = _count_df.loc[i, :].mul( + time_cost_df.loc[j, _count_df.columns] + ).sum() + # reorder columns to be consistent with cadres + use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Radiography']] + # reorder index to be consistent with descending order of DALYs averted + use = use.reindex(num_dalys_summarized.index) + + # add columns 'total' and 'other' + use['all'] = use.sum(axis=1) + use['Other'] = use[['Dental', 'Laboratory', 'Mental', 'Radiography']].sum(axis=1) + use.drop(columns=['Dental', 'Laboratory', 'Mental', 'Radiography'], inplace=True) + + use_increased = use.subtract(use.loc['s_0', :], axis=1).drop('s_0', axis=0) + + use_increase_percent = use.subtract(use.loc['s_0', :], axis=1 + ).divide(use.loc['s_0', :], axis=1).drop('s_0', axis=0) + + return use, use_increased, use_increase_percent + + hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0] + hcw_time_increased_by_cadre = hcw_time_or_cost_used(time_cost_df=appt_time)[1] + hcw_time_increased_by_cadre_percent = hcw_time_or_cost_used(time_cost_df=appt_time)[2] + hcw_time_increased_by_cadre.to_csv(output_folder / 'hcw_time_increased_by_cadre.csv') + hcw_time_increased_by_cadre_percent.to_csv(output_folder / 'hcw_time_increased_by_cadre_percent.csv') + + # get HCW time and cost needed to run the never run appts + def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized): + cols_1 = count_df.columns + cols_2 = time_cost_df.columns + # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them + assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() + if len(list(set(cols_1) - set(cols_2))) > 0: + _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2))) + else: + _count_df = count_df.copy() + assert set(_count_df.columns).issubset(set(cols_2)) + # calculate hcw time gap + gap = pd.DataFrame(index=_count_df.index, + columns=time_cost_df.index) + for i in gap.index: + for j in gap.columns: + gap.loc[i, j] = _count_df.loc[i, :].mul( + time_cost_df.loc[j, _count_df.columns] + ).sum() + # reorder columns to be consistent with cadres + gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Radiography']] + # reorder index to be consistent with + gap = gap.reindex(num_dalys_summarized.index) + + return gap + + hcw_time_gap = hcw_time_or_cost_gap(appt_time) + hcw_cost_gap = hcw_time_or_cost_gap(appt_cost) + + # hcw time demand to meet ran + never ran services + # assert (hcw_time_used.index == hcw_time_gap.index).all() + # assert (hcw_time_used.columns == hcw_time_gap.columns).all() + # hcw_time_demand = hcw_time_used + hcw_time_gap + + # cost gap proportions of cadres within each scenario + hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns) + for i in hcw_cost_gap_percent.index: + hcw_cost_gap_percent.loc[i, :] = hcw_cost_gap.loc[i, :] / hcw_cost_gap.loc[i, :].sum() + # add a column of 'other' to sum up other cadres + hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[ + ['Dental', 'Laboratory', 'Mental', 'Radiography'] + ].sum(axis=1) + hcw_cost_gap['Other'] = hcw_cost_gap[ + ['Dental', 'Laboratory', 'Mental', 'Radiography'] + ].sum(axis=1) + hcw_cost_gap.to_csv(output_folder / 'hcw_cost_gap.csv') + hcw_cost_gap_percent.to_csv(output_folder / 'hcw_cost_gap_percent.csv') + + # # store the proportions of no expansion scenario as the "best" scenario that is to be tested + # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[ + # 's_1', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other'] + # ].copy() # [0.4586, 0.0272, 0.3502, 0.1476, 0.0164] + + # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery) + # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint) + # in never run set + # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios + # then the next question is what proportion for C and P and any indication for better extra budget allocation + # so that never ran appts will be reduced and DALYs could be averted further? + def get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Pharmacy'], appts_count_all=num_never_ran_appts_by_level_summarized + ): + # find the appts that need all cadres in cadres_to_find + def find_never_ran_appts_that_need_specific_cadres(): + appts_to_find = [] + _common_cols = appt_time.columns.intersection(appts_count_all.columns) + # already checked above that columns in the latter that are not in the former have 0 count + for col in _common_cols: + if ((appt_time.loc[cadres_to_find, col] > 0).all() + and (appt_time.loc[~appt_time.index.isin(cadres_to_find), col] == 0).all()): + appts_to_find.append(col) + + return appts_to_find + + # counts and count proportions of all never ran + _appts = find_never_ran_appts_that_need_specific_cadres() + _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum() + .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum() + .reindex(num_dalys_summarized.index)) + _counts_all = (appts_count_all.groupby(level=1, axis=1).sum() + .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum() + .reindex(num_dalys_summarized.index)) + assert (_counts.index == _counts_all.index).all() + _proportions = _counts / _counts_all[_counts.columns] + + # hcw time gap and proportions + _time_gap = hcw_time_or_cost_gap(appt_time, appts_count_all[_appts]) + assert (_time_gap.index == hcw_time_gap.index).all() + _time_gap_proportions = _time_gap / hcw_time_gap[_time_gap.columns] + + # hcw cost gap and proportions + _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts]) + assert (_cost_gap.index == hcw_cost_gap.index).all() + _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns] + # cost gap distribution among cadres + _cost_gap_percent = pd.DataFrame(index=_cost_gap.index, columns=_cost_gap.columns) + for i in _cost_gap_percent.index: + _cost_gap_percent.loc[i, :] = _cost_gap.loc[i, :] / _cost_gap.loc[i, :].sum() + + # if sum up all appt types/cadres + _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1) + _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1) + _time_gap_proportions_total = _time_gap.sum(axis=1) / hcw_time_gap.sum(axis=1) + + return (_proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent, + _time_gap_proportions_total, _time_gap) + + never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']) + never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Pharmacy']) + never_ran_appts_info_that_need_CN = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical', 'Nursing_and_Midwifery']) + never_ran_appts_info_that_need_NP = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Nursing_and_Midwifery', 'Pharmacy']) + never_ran_appts_info_that_need_C = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Clinical']) + never_ran_appts_info_that_need_N = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Nursing_and_Midwifery']) + never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres( + cadres_to_find=['Pharmacy']) + + # cost/time proportions within never ran appts, in total of all cadres + p_cost = pd.DataFrame(index=num_services_summarized.index) + p_cost['C & P & NM'] = never_ran_appts_info_that_need_CNP[1] + p_cost['C & P'] = never_ran_appts_info_that_need_CP[1] + p_cost['C & NM'] = never_ran_appts_info_that_need_CN[1] + p_cost['P & NM'] = never_ran_appts_info_that_need_NP[1] + p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1] + p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1] + p_cost['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[1] + p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1) + + p_time = pd.DataFrame(index=num_services_summarized.index) + p_time['C & P & NM'] = never_ran_appts_info_that_need_CNP[4] + p_time['C & P'] = never_ran_appts_info_that_need_CP[4] + p_time['C & NM'] = never_ran_appts_info_that_need_CN[4] + p_time['P & NM'] = never_ran_appts_info_that_need_NP[4] + p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4] + p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4] + p_time['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[4] + p_time['Other cases'] = 1 - p_time[p_time.columns[0:7]].sum(axis=1) + + # absolute cost/time gap within never ran appts + a_cost = pd.DataFrame(index=num_services_summarized.index) + a_cost['C & P & NM'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1) + a_cost['C & P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1) + a_cost['C & NM'] = never_ran_appts_info_that_need_CN[2].sum(axis=1) + a_cost['P & NM'] = never_ran_appts_info_that_need_NP[2].sum(axis=1) + a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1) + a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1) + a_cost['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[2].sum(axis=1) + a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1) + + a_time = pd.DataFrame(index=num_services_summarized.index) + a_time['C & P & NM'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1) + a_time['C & P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1) + a_time['C & NM'] = never_ran_appts_info_that_need_CN[5].sum(axis=1) + a_time['P & NM'] = never_ran_appts_info_that_need_NP[5].sum(axis=1) + a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1) + a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1) + a_time['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[5].sum(axis=1) + a_time['Other cases'] = hcw_time_gap.sum(axis=1) - a_time.sum(axis=1) + + # appts count proportions within never ran appts, in total of all cadres + p_count = pd.DataFrame(index=num_services_summarized.index) + p_count['C & P & NM'] = never_ran_appts_info_that_need_CNP[0] + p_count['C & P'] = never_ran_appts_info_that_need_CP[0] + p_count['C & NM'] = never_ran_appts_info_that_need_CN[0] + p_count['P & NM'] = never_ran_appts_info_that_need_NP[0] + p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0] + p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0] + p_count['Nursing_and_Midwifery (NM)'] = never_ran_appts_info_that_need_N[0] + p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1) + + # define color for the cadres combinations above + cadre_comb_color = { + 'C & P & NM': 'royalblue', + 'C & P': 'turquoise', + 'C & NM': 'gold', + 'P & NM': 'yellowgreen', + 'Clinical (C)': 'mediumpurple', + 'Pharmacy (P)': 'limegreen', + 'Nursing_and_Midwifery (NM)': 'pink', + 'Other cases': 'gray', + } + + # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results + + # hcw time by cadre and treatment: draw = 22: C + N + P vs no expansion, draw = 11, C + P vs no expansion + # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21) + # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10) + + # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1 + # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios + # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns) + # # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative + # # (maybe only due to statistical variation; relative difference to s_1 are close to 0%), + # # thus CE does not make sense. + # # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns) + # for i in ROI.index: + # ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres'] + # # CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean'] + # # CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper'] + # # CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower'] + + # prepare colors for plots + appt_color = { + appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns + } + treatment_color = { + treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan) + for treatment in num_treatments_summarized.columns + } + cause_color = { + cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan) + for cause in num_dalys_by_cause_summarized.columns + } + officer_category_color = { + 'Clinical': 'blue', + 'DCSA': 'orange', + 'Nursing_and_Midwifery': 'red', + 'Pharmacy': 'green', + 'Dental': 'purple', + 'Laboratory': 'orchid', + 'Mental': 'plum', + 'Nutrition': 'thistle', + 'Radiography': 'lightgray', + 'Other': 'gray' + } + # get scenario color + # scenario_groups = scenario_grouping_coloring(by='effect') + # scenario_groups = scenario_grouping_coloring(by='allocation_alt') + scenario_groups = scenario_grouping_coloring(by='allocation') + scenario_color = {} + for s in param_names: + for k in scenario_groups[1].keys(): + if s in scenario_groups[0][k]: + scenario_color[s] = scenario_groups[1][k] + + # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\ + # percentage of DALYs averted decides the color of that scatter point + # prepare extra budget allocation + extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index) + extra_budget_allocation['Other'] = extra_budget_allocation[ + ['Dental', 'Laboratory', 'Mental', 'Radiography'] + ].sum(axis=1) + # prepare hrh increase rates in the same format for regression analysis + increase_rate_avg_exp = avg_increase_rate_exp.T.reindex(num_dalys_summarized.index) + increase_rate_avg_exp['Other'] = increase_rate_avg_exp['Dental'].copy() + + name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}' + # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment (avg. HCW increase rate), {target_period()}' + heat_data = pd.merge(num_dalys_averted_percent['mean'], + extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']], + # increase_rate_avg_exp[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']], + left_index=True, right_index=True, how='inner') + # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'], + alpha=0.8, marker='o', #s=heat_data['mean'] * 2000, c=colors, + c=heat_data['mean'] * 100, cmap='viridis' + ) + # plot lines from the best point to three axes panes + ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]], + [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]], + [0, heat_data['Nursing_and_Midwifery'][0]], + linestyle='--', color='gray', alpha=0.8) + ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]], + [0, heat_data['Pharmacy'][0]], + [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]], + linestyle='--', color='gray', alpha=0.8) + ax.plot3D([0, heat_data['Clinical'][0]], + [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]], + [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]], + linestyle='--', color='gray', alpha=0.8) + ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre', fontsize='small') + # ax.set_xlabel('Avg. annual increase rate of \nClinical cadre', fontsize='small') + ax.set_ylabel('Pharmacy cadre', fontsize='small') + #ax.invert_xaxis() + ax.invert_yaxis() + ax.set_zlabel('Nursing and Midwifery cadre', fontsize='small') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.1, label='DALYs averted %') + plt.title(name_of_plot) + plt.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig = plt.figure() + # ax = fig.add_subplot(111, projection='3d') + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', + # c=colors) + # ax.set_xlabel('Services increased %') + # ax.set_ylabel('Treatments increased %') + # ax.set_zlabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], + # alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0], + # c=colors) + # ax.set_xlabel('Services increased %') + # ax.set_ylabel('Treatments increased %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs averted and Services increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', c=colors) + # ax.set_xlabel('Services increased %') + # ax.set_ylabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, + # loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2, + # title='Scenario groups') + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'], + # num_treatments_total_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', c=colors) + # ax.set_xlabel('Treatments increased %') + # ax.set_ylabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2) + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs averted and Services ratio increased, {target_period()}' + # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1) + # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22'] + # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)] + # colors = [scenario_color[s] for s in heat_data.index] + # fig, ax = plt.subplots() + # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0], + # alpha=0.8, marker='o', c=colors) + # ax.set_xlabel('Service delivery ratio increased %') + # ax.set_ylabel('DALYs averted %') + # legend_labels = list(scenario_groups[1].keys()) + # legend_handles = [plt.Line2D([0, 0], [0, 0], + # linestyle='none', marker='o', color=scenario_groups[1][label] + # ) for label in legend_labels + # ] + # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2) + # plt.title(name_of_plot) + # plt.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # do some linear regression to see the isolated effects of individual cadres and combined effects of C, N, P cadres + outcome_data = num_dalys_averted_percent['mean'] + # outcome_data = num_services_increased_percent['mean'] + # outcome_data = num_treatments_total_increased_percent['mean'] + regression_data = pd.merge(outcome_data, + increase_rate_avg_exp, + # extra_budget_allocation, + left_index=True, right_index=True, how='inner') + # regression_data.drop(index='s_2', inplace=True) + # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy'] + # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery'] + # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery'] + # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy'] + # * regression_data['Nursing_and_Midwifery']) + cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'] + regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True) + predictor = regression_data[regression_data.columns[1:]] # .drop(index=['s_*', 's_2', 's_1'], axis=0) + outcome = regression_data['mean'] # .drop(index=['s_*', 's_2', 's_1'], axis=0) # regression model without "optimal" data + predictor = sm.add_constant(predictor) # add constant term + est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit() + print(est.summary()) + + # calculate the predicted DALYs based on the regression results + for i in regression_data.index: + regression_data.loc[i, 'predicted'] = ( + regression_data.loc[i, ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']].dot( + est.params[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']] + ) + + est.params['const'] + ) + + # plot mean and predicted DALYs from regression analysis + # name_of_plot = f'DALYs-averted simulated vs predicted from linear regression on extra budget allocation' + name_of_plot = 'DALYs-averted simulated vs predicted from linear regression on HRH increase rate' + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot = regression_data[['mean', 'predicted']] * 100 + data_to_plot['strategy'] = data_to_plot.index + data_to_plot.rename(columns={'mean': 'simulated'}, inplace=True) + data_to_plot.plot.scatter(x='strategy', y='simulated', color='blue', label='simulated', ax=ax) + data_to_plot.plot.scatter(x='strategy', y='predicted', color='orange', label='predicted', ax=ax) + ax.set_ylabel('DALYs averted %', fontsize='small') + ax.set(xlabel=None) + ax.grid(axis="both") + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # xtick_colors = [scenario_color[v] for v in data_to_plot.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + plt.legend(loc='upper right') + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # do regression on DALYs averted % and Euclidean distance to "gap" solution + increase_rate_distance = increase_rate_avg_exp[ + ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other'] + ].copy() + # increase_rate_distance = extra_budget_allocation[ + # ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other'] + # ].copy() + for idx in increase_rate_distance.index: + increase_rate_distance.loc[idx, 'Euclidean_distance'] = np.linalg.norm( + increase_rate_distance.loc[idx, ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']] - + increase_rate_distance.loc['s_2', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']] + ) + regression_data_1 = pd.merge(outcome_data, + increase_rate_distance['Euclidean_distance'], + left_index=True, right_index=True, how='inner') + predictor = regression_data_1['Euclidean_distance'].drop(index=['s_2'], axis=0) + outcome = regression_data_1['mean'].drop(index=['s_2'], axis=0) # regression model without "optimal" data + # calculate pearson correlation + print(predictor.corr(outcome)) + predictor = sm.add_constant(predictor) # add constant term + est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit() + print(est.summary()) + + # calculate the predicted DALYs based on the regression results + for i in regression_data_1.index: + regression_data_1.loc[i, 'predicted'] = ( + regression_data_1.loc[i, 'Euclidean_distance'] * est.params['Euclidean_distance'] + est.params['const'] + ) + + # plot mean and predicted DALYs from regression analysis + name_of_plot = 'DALYs-averted simulated vs predicted from linear regression on Euclidean distance' + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot = regression_data_1[['mean', 'predicted']] * 100 + data_to_plot['strategy'] = data_to_plot.index + data_to_plot.rename(columns={'mean': 'simulated'}, inplace=True) + data_to_plot.plot.scatter(x='strategy', y='simulated', color='blue', label='simulated', ax=ax) + data_to_plot.plot.scatter(x='strategy', y='predicted', color='orange', label='predicted', ax=ax) + ax.set_ylabel('DALYs averted %', fontsize='small') + ax.set(xlabel=None) + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # xtick_colors = [scenario_color[v] for v in data_to_plot.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + plt.legend(loc='upper right') + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # todo: could do regression analysis of DALYs averted and Services increased + + # # do anova analysis to test the difference of scenario groups + # def anova_oneway(df=num_dalys_averted_percent): + # best = df.loc[list(scenario_groups['C + P + D/NM/O/None']), 'mean'] + # middle_C = df.loc[list(scenario_groups['C + D/NM/O/None']), 'mean'] + # middle_P = df.loc[list(scenario_groups['P + D/NM/O/None']), 'mean'] + # worst = df.loc[df.index.isin(scenario_groups['D/NM/O/None']), 'mean'] + # + # return ss.oneway.anova_oneway((best, middle_C, middle_P, worst), + # groups=None, use_var='unequal', welch_correction=True, trim_frac=0) + + # anova_dalys = anova_oneway() + # anova_services = anova_oneway(num_services_increased_percent) + # anova_treatments = anova_oneway(num_treatments_total_increased_percent) + + # plot absolute numbers for scenarios + + # name_of_plot = f'Deaths, {target_period()}' + # fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs, {target_period()}' + # fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Service demand, {target_period()}' + # fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Millions)') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Service delivery ratio, {target_period()}' + # fig, ax = do_bar_plot_with_ci(ratio_service_summarized) + # ax.set_title(name_of_plot) + # ax.set_ylabel('services delivered / demand') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # plot yearly DALYs for s_0, s_1, s_2, s_22 scenarios + name_of_plot = f'Yearly DALYs, {target_period()}' + fig, ax = plt.subplots(figsize=(9, 6)) + scenarios_to_plot = ['s_0', 's_1', 's_22', 's_2'] + scenarios_color = {'s_0': 'red', 's_1': 'yellow', 's_22': 'blue', 's_2': 'green'} + for s in scenarios_to_plot: + data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :] + .drop(columns='scenario') + .pivot(index='year', columns='stat') + .droplevel(0, axis=1)) + ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=scenarios_color[s], linewidth=2) + ax.fill_between(data.index.to_numpy(), + (data['lower'] / 1e6).to_numpy(), + (data['upper'] / 1e6).to_numpy(), + color=scenarios_color[s], + alpha=0.2) + ax.set_title(name_of_plot) + ax.set_ylabel('(Millions)') + ax.set_xticks(data.index) + ax.set_xticklabels(data.index, rotation=90) + legend_labels = [substitute_labels[v] for v in scenarios_to_plot] + legend_handles = [plt.Rectangle((0, 0), 1, 1, + color=scenarios_color[v]) for v in scenarios_to_plot] + ax.legend(legend_handles, legend_labels, + loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5), + title='selected scenarios') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # plot yearly DALYs for s_0, s_1, s_2, s_22 scenarios + name_of_plot = f'Yearly DALYs by {the_cause}, {target_period()}' # TB (non-AIDS) + fig, ax = plt.subplots(figsize=(9, 6)) + scenarios_to_plot = ['s_0', 's_1', 's_22', 's_2'] + scenarios_color = {'s_0': 'red', 's_1': 'yellow', 's_22': 'blue', 's_2': 'green'} + for s in scenarios_to_plot: + data = (num_dalys_by_one_cause_yearly_summarized.loc[num_dalys_by_one_cause_yearly_summarized.scenario == s, :] + .drop(columns='scenario') + .pivot(index='year', columns='stat') + .droplevel(0, axis=1)) + ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=scenarios_color[s], linewidth=2) + ax.fill_between(data.index.to_numpy(), + (data['lower'] / 1e6).to_numpy(), + (data['upper'] / 1e6).to_numpy(), + color=scenarios_color[s], + alpha=0.2) + ax.set_title(name_of_plot) + ax.set_ylabel('(Millions)') + ax.set_xticks(data.index) + ax.set_xticklabels(data.index, rotation=90) + legend_labels = [substitute_labels[v] for v in scenarios_to_plot] + legend_handles = [plt.Rectangle((0, 0), 1, 1, + color=scenarios_color[v]) for v in scenarios_to_plot] + ax.legend(legend_handles, legend_labels, + loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5), + title='selected scenarios') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios + # best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery'] + # name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}' + # fig, ax = plt.subplots(figsize=(9, 6)) + # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1'] + # for s in best_scenarios: + # data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum( + # axis=1) + # ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s]) + # ax.set_title(name_of_plot) + # ax.set_ylabel('(Thousands)') + # ax.set_xticks(data.index) + # legend_labels = [substitute_labels[v] for v in best_scenarios] + # legend_handles = [plt.Rectangle((0, 0), 1, 1, + # color=best_scenarios_color[v]) for v in best_scenarios] + # ax.legend(legend_handles, legend_labels, + # loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5), + # title='Best scenario group') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Services by appointment type, {target_period()}' + # num_appts_summarized_in_millions = num_appts_summarized / 1e6 + # yerr_services = np.array([ + # (num_services_summarized['mean'] - num_services_summarized['lower']).values, + # (num_services_summarized['upper'] - num_services_summarized['mean']).values, + # ])/1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Services demand by appointment type, {target_period()}' + # num_appts_demand_to_plot = num_appts_demand_summarized / 1e6 + # yerr_services = np.array([ + # (num_service_demand_summarized['mean'] - num_service_demand_summarized['lower']).values, + # (num_service_demand_summarized['upper'] - num_service_demand_summarized['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_appts_demand_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_service_demand_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_appts_demand_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'Never ran services by appointment type, {target_period()}' + num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6 + yerr_services = np.array([ + (num_never_ran_services_summarized['mean'] - num_never_ran_services_summarized['lower']).values, + (num_never_ran_services_summarized['upper'] - num_never_ran_services_summarized['mean']).values, + ])/1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services, + fmt=".", color="black", zorder=100) + ax.set_ylabel('Millions', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Total services demand by appointment type, {target_period()}' + data_to_plot = num_appts_demand_summarized / 1e6 + yerr_services = np.array([ + (num_services_demand_summarized['mean'] - num_services_demand_summarized['lower']).values, + (num_services_demand_summarized['upper'] - num_services_demand_summarized['mean']).values, + ])/1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + ax.errorbar(range(len(param_names)), num_services_demand_summarized['mean'].values / 1e6, yerr=yerr_services, + fmt=".", color="black", zorder=100) + ax.set_ylabel('Millions', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'Services by treatment type, {target_period()}' + # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6 + # yerr_services = np.array([ + # (num_treatments_total_summarized['mean'] - num_treatments_total_summarized['lower']).values, + # (num_treatments_total_summarized['upper'] - num_treatments_total_summarized['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(10, 6)) + # num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services by treatment type, {target_period()}' + # num_never_ran_treatments_summarized_in_millions = num_never_ran_treatments_summarized / 1e6 + # yerr_services = np.array([ + # (num_never_ran_treatments_total_summarized['mean'] - num_never_ran_treatments_total_summarized['lower']).values, + # (num_never_ran_treatments_total_summarized['upper'] - num_never_ran_treatments_total_summarized['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(10, 6)) + # num_never_ran_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_never_ran_treatments_total_summarized['mean'].values / 1e6, + # yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}' + # total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index) + # column_dcsa = total_staff_to_plot.pop('DCSA') + # total_staff_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Thousands', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}' + data_to_plot = (hcw_time_used.drop(columns='all') / 1e6).reindex(num_dalys_summarized.index) + column_dcsa = data_to_plot.pop('DCSA') + data_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Minutes in Millions', fontsize='small') + ax.set(xlabel=None) + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}' + hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index) + column_dcsa = hcw_time_gap_to_plot.pop('DCSA') + hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Minutes in Millions', fontsize='small') + ax.set(xlabel=None) + xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW cost gap by cadre to deliver never ran appointments, {target_period()}' + cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other'] + hcw_cost_gap_to_plot = (hcw_cost_gap[cadres_to_plot] / 1e6).reindex(num_dalys_summarized.index) + column_dcsa = hcw_cost_gap_to_plot.pop('DCSA') + hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('USD in Millions', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + + xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index] + xtick_colors = [scenario_color[v] for v in hcw_cost_gap_to_plot.index] + for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Count proportions of never ran appointments that require specific cadres only, {target_period()}' + data_to_plot = p_count * 100 + fig, ax = plt.subplots(figsize=(12, 8)) + data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + ax.set_ylim(0, 100) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90) + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # plot the average proportions of all scenarios + # for c in data_to_plot.columns: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # label=c) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW cost proportions of never ran appointments that require specific cadres only, {target_period()}' + data_to_plot = p_cost * 100 + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + # ax.set_ylim(0, 100) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + xtick_colors = [scenario_color[v] for v in data_to_plot.index] + for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # plot the average proportions of all scenarios + # for c in data_to_plot.columns: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # label=c) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Time proportions of never ran appointments that require specific cadres only, {target_period()}' + data_to_plot = p_time * 100 + fig, ax = plt.subplots(figsize=(12, 8)) + data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + ax.set_ylim(0, 100) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90) + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # plot the average proportions of all scenarios + # for c in data_to_plot.columns: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # label=c) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW cost of never ran appointments that require specific cadres only, {target_period()}' + data_to_plot = a_cost / 1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + ax.set_ylabel('USD in millions') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + xtick_colors = [scenario_color[v] for v in data_to_plot.index] + for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # plot the average cost of all scenarios + # for c in data_to_plot.columns: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # label=c) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Time distribution of never ran appointments that require specific cadres only, {target_period()}' + data_to_plot = a_time / 1e6 + fig, ax = plt.subplots(figsize=(12, 8)) + data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax) + ax.set_ylabel('minutes in millions') + ax.set_xlabel('Extra budget allocation scenario') + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90) + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True) + # # plot the average cost of all scenarios + # for c in data_to_plot.columns: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2, + # label=c) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW cost gap proportion by cadre to deliver never ran appointments, {target_period()}' + cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other'] + hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100 + fig, ax = plt.subplots(figsize=(9, 6)) + # hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + hcw_cost_gap_percent_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + #ax.set_ylim(0, 100) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + + xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index] + xtick_colors = [scenario_color[v] for v in hcw_cost_gap_percent_to_plot.index] + for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True) + # plot the average proportions of all scenarios + # for c in cadres_to_plot: + # plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(), + # linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2, + # label=c) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}' + # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'] + # data_to_plot = never_ran_appts_info_that_need_CNP[3][cadres_to_plot] * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + # #ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + # # plot the average proportions of all scenarios + # for c in cadres_to_plot: + # plt.axhline(y=data_to_plot[c].mean(), + # linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2, + # label=c) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}' + # data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + # #ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}' + # data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100 + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax) + # # ax.set_ylim(0, 100) + # ax.set_ylabel('Percentage %') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'Extra budget allocation among cadres, {target_period()}' + cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other'] + extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100 + fig, ax = plt.subplots(figsize=(12, 8)) + extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90) + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category') + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}' + # total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index) + # column_dcsa = total_cost_to_plot.pop('DCSA') + # total_cost_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'DALYs by cause, {target_period()}' + # num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6 + # yerr_dalys = np.array([ + # (num_dalys_summarized['mean'] - num_dalys_summarized['lower']).values, + # (num_dalys_summarized['upper'] - num_dalys_summarized['mean']).values, + # ])/1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set(xlabel=None) + # xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # fig.subplots_adjust(right=0.7) + # ax.legend( + # loc="center left", + # bbox_to_anchor=(0.750, 0.6), + # bbox_transform=fig.transFigure, + # title='Cause of death or injury', + # title_fontsize='x-small', + # fontsize='x-small', + # reverse=True, + # ncol=1 + # ) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # plot relative numbers for scenarios + name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}' + fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True) + ax.set_title(name_of_plot, fontsize='medium') + ax.set_ylabel('DALYs averted in Millions', fontsize='medium') + ax.set_xlabel('Extra budget allocation scenario', fontsize='medium') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}' + fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True) + ax.set_title(name_of_plot) + ax.set_ylabel('Millions') + ax.set_xlabel('Extra budget allocation scenario') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # todo: plot Deaths averted by cause + + # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}' + # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True) + # ax.set_title(name_of_plot) + # ax.set_ylabel('Percentage') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}' + # extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex( + # num_dalys_summarized.index).drop(['s_1']) / 1e3 + # column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA') + # extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa) + # fig, ax = plt.subplots(figsize=(9, 6)) + # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Thousands', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}' + extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex( + num_dalys_summarized.index).drop(index='s_0') / 1e6 + column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA') + extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + ax.set_ylabel('Millions', fontsize='small') + ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # # name_of_plot = f'Time used increased by cadre and treatment: C + NM + P vs no expansion, {target_period()}' + # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6 + # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}' + # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6 + # data_to_plot['total'] = data_to_plot.sum(axis=1) + # data_to_plot.sort_values(by='total', inplace=True, ascending=False) + # data_to_plot.drop('total', axis=1, inplace=True) + # data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery', + # 'DCSA', 'Laboratory', 'Mental', 'Radiography']] + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax) + # ax.set_ylabel('Millions Minutes') + # ax.set_xlabel('Treatment') + # ax.set_xticklabels(data_to_plot.index, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + # ':', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Time used increased by treatment and cadre: C + NM + P vs no expansion, {target_period()}' + # # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}' + # data_to_plot = data_to_plot.T + # data_to_plot = data_to_plot.add_suffix('*') + # fig, ax = plt.subplots(figsize=(12, 8)) + # data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # ax.set_ylabel('Millions Minutes') + # ax.set_xlabel('Treatment') + # ax.set_xticklabels(data_to_plot.index, rotation=90) + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + # ':', ''))) + # fig.show() + # plt.close(fig) + + name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}' + data_to_plot = num_dalys_by_cause_averted_CNP / 1e6 + # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}' + # data_to_plot = num_dalys_by_cause_averted_CP / 1e6 + fig, ax = plt.subplots() + data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values) + ax.set_ylabel('Millions') + ax.set_xlabel('Treatment') + ax.set_xticklabels(data_to_plot.index, rotation=90) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + ':', '').replace('\n', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}' + data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100 + fig, ax = plt.subplots() + data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values) + ax.set_ylabel('Percentage %') + ax.set_xlabel('Treatment') + ax.set_xticklabels(data_to_plot.index, rotation=90) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace( + ':', '').replace('\n', ''))) + fig.show() + plt.close(fig) + + # name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}' + # num_appts_increased_in_millions = num_appts_increased / 1e6 + # yerr_services = np.array([ + # (num_services_increased['mean'] - num_services_increased['lower']).values, + # (num_services_increased['upper'] - num_services_increased['mean']).values, + # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name( + # name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + # ) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}' + # num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6 + # # yerr_services = np.array([ + # # (num_services_increased['mean'] - num_services_increased['lower']).values, + # # (num_services_increased['upper'] - num_services_increased['mean']).values, + # # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax) + # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services, + # # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name( + # name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + # ) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}' + # num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6 + # # yerr_services = np.array([ + # # (num_services_increased['mean'] - num_services_increased['lower']).values, + # # (num_services_increased['upper'] - num_services_increased['mean']).values, + # # ]) / 1e6 + # fig, ax = plt.subplots(figsize=(9, 6)) + # num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services, + # # fmt=".", color="black", zorder=100) + # ax.set_ylabel('Millions', fontsize='small') + # ax.set_xlabel('Extra budget allocation scenario', fontsize='small') + # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index] + # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small', + # fontsize='small', reverse=True) + # plt.title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name( + # name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + # ) + # fig.show() + # plt.close(fig) + + name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}' + data_to_plot = num_treatments_increased / 1e6 + # yerr_services = np.array([ + # (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values, + # (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values, + # ]) / 1e6 + fig, ax = plt.subplots(figsize=(10, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, width=0.8, ax=ax) + # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # move bars to new xticks + new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)} + ax.set_xticks(list(new_xticks.keys())) + ax.set_xticklabels(list(new_xticks.values())) + for i, rect in enumerate(ax.patches): + # Shift the bars based on their new position + rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2) + ax.set_xlim(-1, len(data_to_plot.index) + 1) + + ax.set_ylabel('Millions', fontsize='medium') + ax.set(xlabel=None) + ax.grid(axis='y') + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # xtick_colors = [scenario_color[v] for v in data_to_plot.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium') # re-label scenarios + + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.3), title='Treatment type', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot, fontsize='medium') + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'Services increased by treatment area vs no extra budget allocation, {target_period()}' + data_to_plot = num_treatments_group_increased / 1e6 + data_to_plot = data_to_plot[ + ['RMNCH', 'HIV/AIDS', 'Malaria', 'TB (non-AIDS)', 'NCDs', 'Transport Injuries'] + ] + # yerr_services = np.array([ + # (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values, + # (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values, + # ]) / 1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=treatment_group_color, rot=0, width=0.8, ax=ax) + # move bars to new xticks + new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)} + ax.set_xticks(list(new_xticks.keys())) + ax.set_xticklabels(list(new_xticks.values())) + for i, rect in enumerate(ax.patches): + # Shift the bars based on their new position + rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2) + ax.set_xlim(-1, len(data_to_plot.index) + 1) + + # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services, + # fmt=".", color="black", zorder=100) + # add annotation + assert (data_to_plot.index == num_treatments_total_increased_percent.index).all() + assert (data_to_plot.index == num_treatments_total_increased.index).all() + for xpos, ypos, text1 in zip(ax.get_xticks(), + (num_treatments_total_increased['upper'] / 1e6).values, + num_treatments_total_increased_percent['mean'].values): + text = f"{int(round(text1 * 100, 2))}%" # \n{[round(text2, 2),round(text3, 2)]}" + ax.text(xpos, ypos + 0.05, text, horizontalalignment='center', fontsize='small') + + ax.set_ylabel('Services increased in Millions', fontsize='medium') + ax.set_xlabel('Extra budget allocation scenario', fontsize='medium') + ax.grid(axis="y") + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # xtick_colors = [scenario_color[v] for v in data_to_plot.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium') # re-label scenarios + + plt.legend(title='Treatment area', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot, fontsize='medium') + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW time-used increased by treatment type \nvs no extra budget allocation, {target_period()}' + data_to_plot = hcw_time_increased_by_treatment_type / 1e6 + fig, ax = plt.subplots(figsize=(10, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax) + ax.set_ylabel('Million minutes', fontsize='small') + ax.set(xlabel=None) + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') + plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot) + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'HCW time-used increased by cadre vs no extra budget allocation, {target_period()}' + data_to_plot = hcw_time_increased_by_cadre.drop(columns='all') / 1e9 + column_dcsa = data_to_plot.pop('DCSA') + data_to_plot.insert(3, "DCSA", column_dcsa) + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, width=0.8, alpha=0.8, ax=ax) + # move bars to new xticks + new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)} + ax.set_xticks(list(new_xticks.keys())) + ax.set_xticklabels(list(new_xticks.values())) + for i, rect in enumerate(ax.patches): + # Shift the bars based on their new position + rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2) + ax.set_xlim(-1, len(data_to_plot.index) + 1) + # add annotation + assert (data_to_plot.index == hcw_time_increased_by_cadre_percent.index).all() + for xpos, ypos, text1 in zip(ax.get_xticks(), + (hcw_time_increased_by_cadre['all'] / 1e9).values, + hcw_time_increased_by_cadre_percent['all'].values): + text = f"{int(round(text1 * 100, 2))}%" + ax.text(xpos, ypos + 0.05, text, horizontalalignment='center', fontsize='small') + ax.set_ylabel('Billions minutes', fontsize='medium') + ax.set_xlabel('Extra budget allocation scenario', fontsize='medium') + ax.grid(axis="y") + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # xtick_colors = [scenario_color[v] for v in data_to_plot.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium') # re-label scenarios + + plt.legend(title='Officer category', title_fontsize='small', + fontsize='small', reverse=True) + plt.title(name_of_plot, fontsize='medium') + fig.tight_layout() + fig.savefig(make_graph_file_name( + name_of_plot.replace(' ', '_').replace(',', '').replace('\n', '')) + ) + fig.show() + plt.close(fig) + + name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}' + num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6 + # yerr_dalys = np.array([ + # (num_dalys_averted['mean'] - num_dalys_averted['lower']).values, + # (num_dalys_averted['upper'] - num_dalys_averted['mean']).values, + # ]) / 1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, width=0.8, ax=ax) + # ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys, + # fmt=".", color="black", zorder=100) + # move bars to new xticks + new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)} + ax.set_xticks(list(new_xticks.keys())) + ax.set_xticklabels(list(new_xticks.values())) + for i, rect in enumerate(ax.patches): + # Shift the bars based on their new position + rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2) + ax.set_xlim(-1, len(data_to_plot.index) + 1) + + ax.set_ylabel('Millions', fontsize='medium') + ax.set_xlabel('Extra budget allocation scenario', fontsize='medium') + ax.grid(axis='y') + + xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index] + # xtick_colors = [scenario_color[v] for v in num_dalys_by_cause_averted.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='medium') # re-label scenarios + + fig.subplots_adjust(right=0.7) + ax.legend( + loc="center left", + bbox_to_anchor=(0.750, 0.6), + bbox_transform=fig.transFigure, + title='Cause of death or injury', + title_fontsize='x-small', + fontsize='x-small', + ncol=1, + reverse=True + ) + plt.title(name_of_plot, fontsize='medium') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + name_of_plot = f'DALYs by cause area averted vs no extra budget allocation, {target_period()}' + data_to_plot = num_dalys_by_cause_group_averted / 1e6 + data_to_plot = data_to_plot[ + ['RMNCH', 'HIV/AIDS', 'Malaria', 'TB (non-AIDS)', 'NCDs', 'Transport Injuries'] + ] + # yerr_dalys = np.array([ + # (num_dalys_averted['mean'] - num_dalys_averted['lower']).values, + # (num_dalys_averted['upper'] - num_dalys_averted['mean']).values, + # ]) / 1e6 + fig, ax = plt.subplots(figsize=(9, 6)) + data_to_plot.plot(kind='bar', stacked=True, color=cause_group_color, rot=0, width=0.8, ax=ax) + # ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys, + # fmt=".", color="black", zorder=100) + # move bars to new xticks + new_xticks = {(i + 0.5): k for i, k in enumerate(data_to_plot.index)} + ax.set_xticks(list(new_xticks.keys())) + ax.set_xticklabels(list(new_xticks.values())) + for i, rect in enumerate(ax.patches): + # Shift the bars based on their new position + rect.set_x(list(new_xticks.keys())[i % len(list(new_xticks.keys()))] - rect.get_width() / 2) + ax.set_xlim(-1, len(data_to_plot.index) + 1) + + ax.set_ylabel('DALYs averted in Millions', fontsize='medium') + ax.set_xlabel('Extra budget allocation scenario', fontsize='medium') + ax.grid(axis="y") + + xtick_labels = [substitute_labels[v] for v in data_to_plot.index] + # xtick_colors = [scenario_color[v] for v in data_to_plot.index] + # for xtick, color in zip(ax.get_xticklabels(), xtick_colors): + # xtick.set_color(color) # color scenarios based on the group info + ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small') # re-label scenarios + + fig.subplots_adjust(right=0.7) + ax.legend( + # bbox_transform=fig.transFigure, + title='Cause of death or injury', + title_fontsize='small', + fontsize='small', + ncol=1, + reverse=True + ) + plt.title(name_of_plot, fontsize='medium') + fig.tight_layout() + fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + fig.show() + plt.close(fig) + + # plot ROI and CE for all expansion scenarios + + # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}' + # fig, ax = do_bar_plot_with_ci(ROI) + # ax.set_title(name_of_plot) + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # name_of_plot = f'Cost per DALY averted, {target_period()}' + # fig, ax = do_bar_plot_with_ci(CE) + # ax.set_title(name_of_plot) + # ax.set_ylabel('USD dollars') + # fig.tight_layout() + # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', ''))) + # fig.show() + # plt.close(fig) + + # todo + # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \ + # (around the best possible extra budget allocation scenario)? + # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \ + # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \ + # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered. + # To plot time series of staff and budget in the target period to show \ + # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)? + # Before submit a run, merge in the remote master. + # Think about a measure of Universal Health Service Coverage for the scenarios? + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("results_folder", type=Path) # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z + args = parser.parse_args() + + # Produce results for short-term analysis: 5 years + + # # 2015-2019, before change, incl. mode, hr expansion, etc. + # apply( + # results_folder=args.results_folder, + # output_folder=args.results_folder, + # resourcefilepath=Path('./resources'), + # the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31)) + # ) + # + # # 2020-2024 + # apply( + # results_folder=args.results_folder, + # output_folder=args.results_folder, + # resourcefilepath=Path('./resources'), + # the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31)) + # ) + + # Produce results for long-term analysis: 10 years + # 2020-2029 + apply( + results_folder=args.results_folder, + output_folder=args.results_folder, + resourcefilepath=Path('./resources'), + the_target_period=(Date(2025, 1, 1), Date(2034, 12, 31)) + ) diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py new file mode 100644 index 0000000000..959e96104f --- /dev/null +++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py @@ -0,0 +1,345 @@ +""" +We calculate the salary cost of current and funded plus HCW. +""" +import itertools +# import pickle +from pathlib import Path + +import numpy as np +import pandas as pd + +resourcefilepath = Path('./resources') + +mfl = pd.read_csv(resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv') + +hr_salary = pd.read_csv(resourcefilepath / + 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False) +hr_salary_per_level = pd.read_excel(resourcefilepath / + 'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources') +# as of 2019 +hr_current = pd.read_csv(resourcefilepath / + 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv') +hr_established = pd.read_csv(resourcefilepath / + 'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv') +# for 2020-2024 +historical_scaling = pd.read_excel(resourcefilepath / + 'healthsystem' / 'human_resources' / 'scaling_capabilities' / + 'ResourceFile_dynamic_HR_scaling.xlsx', sheet_name='historical_scaling' + ).set_index('year') +integrated_historical_scaling = ( + historical_scaling.loc[2020, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2021, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2022, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2023, 'dynamic_HR_scaling_factor'] * + historical_scaling.loc[2024, 'dynamic_HR_scaling_factor'] +) + +# to get minute salary per cadre per level +Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg( + {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index() +Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count'] + +# the hr salary by minute and facility id, as of 2019 +Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer') +Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff'] +# store the minute salary by cadre and level +Minute_Salary_by_Cadre_Level = Minute_Salary[ + ['Facility_Level', 'Officer_Category', 'Minute_Salary_USD'] +].copy().fillna(0.0) +Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge( + mfl[['Facility_Level', 'Facility_ID']], on=['Facility_Level'], how='outer' +) +Minute_Salary.drop(columns=['Facility_Level'], inplace=True) +Minute_Salary = Minute_Salary.fillna(0.0) +Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True) + +Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False) + +# implement historical scaling to hr_current +hr_current['Total_Mins_Per_Day'] *= integrated_historical_scaling +hr_current['Staff_Count'] *= integrated_historical_scaling + +# calculate the current cost distribution of all cadres, as of 2024 +cadre_all = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'] +staff_count = hr_current.groupby('Officer_Category')['Staff_Count'].sum().reset_index() +staff_cost = staff_count.merge(hr_salary, on=['Officer_Category'], how='outer') +staff_cost['annual_cost'] = staff_cost['Staff_Count'] * staff_cost['Annual_Salary_USD'] +staff_cost['cost_frac'] = (staff_cost['annual_cost'] / staff_cost['annual_cost'].sum()) +assert abs(staff_cost.cost_frac.sum() - 1) < 1/1e8 +staff_cost.set_index('Officer_Category', inplace=True) +staff_cost = staff_cost.reindex(index=cadre_all) + +# No expansion scenario, or zero-extra-budget-fraction scenario, "s_0" +# Define the current cost fractions among all cadres as extra-budget-fraction scenario "s_1" \ +# to be matched with Margherita's 4.2% scenario. +# Add in the scenario that is indicated by hcw cost gap distribution \ +# resulted from never ran services in no expansion scenario, "s_2" +# Add in the scenario that is indicated by the regression analysis of all other scenarios, "s_*" +# Define all other scenarios so that the extra budget fraction of each cadre, \ +# i.e., four main cadres and the "Other" cadre that groups up all other cadres, is the same (fair allocation) + +cadre_group = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other'] # main cadres +other_group = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'] + +# create scenarios +combination_list = ['s_0', 's_1', 's_2'] # the three special scenarios +for n in range(1, len(cadre_group)+1): + for subset in itertools.combinations(cadre_group, n): + combination_list.append(str(subset)) # other equal-fraction scenarios +# add in "s_*" in the end +combination_list.append('s_*') + +# cadre groups to expand +cadre_to_expand = pd.DataFrame(index=cadre_group, columns=combination_list).fillna(0.0) +for c in cadre_group: + for i in cadre_to_expand.columns[3:len(combination_list) - 1]: # for all equal-fraction scenarios + if c in i: + cadre_to_expand.loc[c, i] = 1 # value 1 indicate the cadre group will be expanded + +# prepare auxiliary dataframe for equal extra budget fractions scenarios +auxiliary = cadre_to_expand.copy() +for i in auxiliary.columns[3:len(combination_list) - 1]: # for all equal-fraction scenarios + auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum() +# for "gap" allocation strategy +# auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164] # without historical scaling; "default" settings +auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365] # historical scaling + main settings +# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365] # historical scaling + more_budget; same as above +# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365] # historical scaling + less_budget; same as above +# auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373] # historical scaling + default_cons +# auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073] # historical scaling + max_hs_function +# for "optimal" allocation strategy +auxiliary.loc[:, 's_*'] = [0.6068, 0.0, 0.0830, 0.2496, 0.0606] # historical scaling + main settings +# auxiliary.loc[:, 's_*'] = [0.5827, 0.0, 0.1083, 0.2409, 0.0681] # historical scaling + more_budget; same as above +# auxiliary.loc[:, 's_*'] = [0.5981, 0.0, 0.0902, 0.2649, 0.0468] # historical scaling + less_budget; same as above +# auxiliary.loc[:, 's_*'] = [0.6109, 0.0, 0.1494, 0.2033, 0.0364] # historical scaling + default_cons +# auxiliary.loc[:, 's_*'] = [0.5430, 0.0, 0.3631, 0.0939, 0.0] # historical scaling + max_hs_function + +# define extra budget fracs for each cadre +extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list) +assert (extra_budget_fracs.columns == auxiliary.columns).all() +assert (extra_budget_fracs.index[0:4] == auxiliary.index[0:4]).all() + +extra_budget_fracs.loc[:, 's_0'] = 0 +assert (staff_cost.index == extra_budget_fracs.index).all() +extra_budget_fracs.loc[:, 's_1'] = staff_cost.loc[:, 'cost_frac'].values + +for i in extra_budget_fracs.columns[2:]: + for c in extra_budget_fracs.index: + if c in auxiliary.index: # the four main cadres + extra_budget_fracs.loc[c, i] = auxiliary.loc[c, i] + else: # the other 5 cadres + extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] * ( + staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum() + ) # current cost distribution among the 5 other cadres + # extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5 # equal fracs among the 5 other cadres + +assert (abs(extra_budget_fracs.iloc[:, 1:len(extra_budget_fracs.columns)].sum(axis=0) - 1.0) < 1/1e10).all() + +# rename scenarios +# make the scenario of equal fracs for all five cadre groups (i.e., the last column) to be s_3 +simple_scenario_name = {extra_budget_fracs.columns[-2]: 's_3'} +for i in range(3, len(extra_budget_fracs.columns)-2): + simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1) # name scenario from s_4 to s_33 +extra_budget_fracs.rename(columns=simple_scenario_name, inplace=True) + +# reorder columns +col_order = ['s_' + str(i) for i in range(0, len(extra_budget_fracs.columns) - 1)] +col_order += ['s_*'] +assert len(col_order) == len(extra_budget_fracs.columns) +extra_budget_fracs = extra_budget_fracs.reindex(columns=col_order) + +# prepare samples for extra budget fracs that changes values for C, NM and P +# (the main cadres for service delivery and directly impacting health outcomes), +# where DCSA = 2% and Other = 4% are fixed according to "gap" strategies +# and that these cadres either have limited impacts as estimated, deliver a very small proportion of services, +# or can deliver relevant services without being constrained by other cadres. +# value_list = list(np.arange(0, 95, 1)) +# combinations = [] +# for i in itertools.product(value_list, repeat=3): +# if sum(i) == 94: +# combinations.append(i) +# extra_budget_fracs_sample = pd.DataFrame(index=extra_budget_fracs.index, columns=range(len(combinations)+1)) +# extra_budget_fracs_sample.iloc[:, 0] = 0 +# extra_budget_fracs_sample.loc['DCSA', 1:] = 2 +# for c in other_group: +# extra_budget_fracs_sample.loc[c, 1:] = 4 * ( +# staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum()) +# for i in range(1, len(combinations)+1): +# extra_budget_fracs_sample.loc[['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'], i] = combinations[i-1] +# extra_budget_fracs_sample /= 100 +# assert (abs(extra_budget_fracs_sample.iloc[:, 1:].sum(axis=0) - 1.0) < 1e-9).all() +# extra_budget_fracs_sample.rename(columns={0: 's_0'}, inplace=True) +# +# extra_budget_fracs = extra_budget_fracs_sample.copy() + +# if do not fix DCSA and Other +# value_list = list(np.arange(0, 105, 5)) +# combinations = [] +# for i in itertools.product(value_list, repeat=5): +# if sum(i) == 100: +# combinations.append(i) +# extra_budget_fracs_sample = pd.DataFrame(index=extra_budget_fracs.index, columns=range(len(combinations)+1)) +# extra_budget_fracs_sample.iloc[:, 0] = 0 +# for i in range(1, len(combinations)+1): +# extra_budget_fracs_sample.loc[['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA'], i] = combinations[i-1][:-1] +# for c in other_group: +# extra_budget_fracs_sample.loc[c, i] = combinations[i-1][4] * ( +# staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum()) +# extra_budget_fracs_sample /= 100 +# assert (abs(extra_budget_fracs_sample.iloc[:, 1:].sum(axis=0) - 1.0) < 1e-9).all() +# extra_budget_fracs_sample.rename(columns={0: 's_0'}, inplace=True) +# +# extra_budget_fracs = extra_budget_fracs_sample.copy() + +# define the HRH budget growth rate +R = 0.042 # 0.042, 0.058, 0.026 + + +# calculate hr scale up factor for years 2020-2030 (10 years in total) outside the healthsystem module +def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario, r=R) -> pd.DataFrame: + """This function calculates the yearly hr scale up factor for cadres for a year yr, + given a fraction of an extra budget allocated to each cadre and a yearly budget growth rate of 4.2%. + Parameter extra_budget_frac (list) is a list of 9 floats, representing the fractions. + Parameter yr (int) is a year between 2025 and 2035 (exclusive). + Parameter scenario (string) is a column name in the extra budget fractions resource file. + Parameter r (float) is the HRH budget growth rate. + Output dataframe stores scale up factors and relevant for the year yr. + """ + # get data of previous year + prev_year = yr - 1 + prev_data = scale_up_factor_dict[scenario][prev_year].copy() + + # calculate and update scale_up_factor + prev_data['extra_budget_frac'] = extra_budget_frac + prev_data['extra_budget'] = r * prev_data.annual_cost.sum() * prev_data.extra_budget_frac + prev_data['extra_staff'] = prev_data.extra_budget / prev_data.Annual_Salary_USD + prev_data['scale_up_factor'] = (prev_data.Staff_Count + prev_data.extra_staff) / prev_data.Staff_Count + + # store the updated data for the year yr + new_data = prev_data[['Annual_Salary_USD', 'scale_up_factor']].copy() + new_data['Staff_Count'] = prev_data.Staff_Count + prev_data.extra_staff + new_data['annual_cost'] = prev_data.annual_cost + prev_data.extra_budget + new_data['increase_rate'] = new_data['scale_up_factor'] - 1.0 + + return new_data + + +# calculate scale up factors for all defined scenarios and years +staff_cost['scale_up_factor'] = 1 +staff_cost['increase_rate'] = 0.0 +scale_up_factor_dict = {s: {y: {} for y in range(2025, 2035)} for s in extra_budget_fracs.columns} +for s in extra_budget_fracs.columns: + # for the initial/current year of 2024 + scale_up_factor_dict[s][2024] = staff_cost.drop(columns='cost_frac').copy() + # for the years with scaled up hr + for y in range(2025, 2035): + scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_fracs[s]), y, s) + +# get the total cost and staff count for each year between 2024-2034 and each scenario +total_cost = pd.DataFrame(index=range(2024, 2035), columns=extra_budget_fracs.columns) +total_staff = pd.DataFrame(index=range(2024, 2035), columns=extra_budget_fracs.columns) +for y in total_cost.index: + for s in extra_budget_fracs.columns: + total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum() + total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum() + +# check the total cost after 10 years are increased as expected +assert ( + abs(total_cost.loc[2034, total_cost.columns[1:]] - (1 + R) ** 10 * total_cost.loc[2024, 's_0']) < 1/1e6 +).all() + +# get the integrated scale up factors by the end of year 2034 and each scenario +integrated_scale_up_factor = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(1.0) +for s in total_cost.columns[1:]: + for yr in range(2025, 2035): + integrated_scale_up_factor.loc[:, s] = np.multiply( + integrated_scale_up_factor.loc[:, s].values, + scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values + ) + +# get normal average increase rate over all years +sum_increase_rate = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(0.0) +for s in total_cost.columns[1:]: + for yr in range(2025, 2035): + sum_increase_rate.loc[:, s] = ( + sum_increase_rate.loc[:, s].values + + scale_up_factor_dict[s][yr].loc[:, 'increase_rate'].values + ) +avg_increase_rate = pd.DataFrame(sum_increase_rate / 10) + +# get the staff increase rate: 2034 vs 2024 +increase_rate_2034 = pd.DataFrame(integrated_scale_up_factor - 1.0) +avg_increase_rate_exp = pd.DataFrame(integrated_scale_up_factor**(1/10) - 1.0) + +# get the linear regression prediction +# main analysis 10 runs +# -0.0699 + 1.0046 * x_clinical + 0.4170 * x_dcsa + 1.0309 * x_nursing + 0.2691 * x_pharmacy + 0.1965 * x_other, + +# const = -0.0699 +# coefs = [1.0046, 0.4170, 1.0309, 0.2691, 0.1965] +# predict_dalys_averted_percent = avg_increase_rate_exp.loc[ +# ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Dental'], +# :].mul(coefs, axis=0).sum() + const +# extra_budget_fracs_sample = extra_budget_fracs_sample.T +# extra_budget_fracs_sample.loc[:, 'DALYs averted %'] = predict_dalys_averted_percent.values * 100 + +# save the sample (fixing DCSA = 2%, Other = 4%) to plot 3D-plot +# SAMPLE = extra_budget_fracs_sample[ +# ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DALYs averted %'] +# ].drop(index='s_0', axis=0) + +# extra_budget_fracs_sample.drop( +# index=extra_budget_fracs_sample[extra_budget_fracs_sample['DALYs averted %'] < 8.0].index, inplace=True) +# extra_budget_fracs_sample['C + P'] = extra_budget_fracs_sample['Clinical'] + extra_budget_fracs_sample['Pharmacy'] +# extra_budget_fracs_sample['C + NM'] = (extra_budget_fracs_sample['Clinical'] +# + extra_budget_fracs_sample['Nursing_and_Midwifery']) +# extra_budget_fracs_sample['NM + P'] = (extra_budget_fracs_sample['Nursing_and_Midwifery'] +# + extra_budget_fracs_sample['Pharmacy']) +# extra_budget_fracs_sample['C + NM + P'] = (extra_budget_fracs_sample['Nursing_and_Midwifery'] +# + extra_budget_fracs_sample['Pharmacy'] +# + extra_budget_fracs_sample['Clinical']) +# min_row = pd.DataFrame(extra_budget_fracs_sample.min(axis=0)).T.rename(index={0: 'Min'}) +# max_row = pd.DataFrame(extra_budget_fracs_sample.max(axis=0)).T.rename(index={0: 'Max'}) +# extra_budget_fracs_sample = pd.concat([extra_budget_fracs_sample, min_row, max_row]) +# extra_budget_fracs_sample['Other'] = extra_budget_fracs_sample[other_group].sum(axis=1) +# extra_budget_fracs_sample.drop(columns=other_group, inplace=True) + + +def func_of_avg_increase_rate(cadre, scenario='s_2', r=R): + """ + This return the average growth rate of the staff of a cadre from 2025 to 2034. + The total HRH cost growth rate is r. + """ + overall_scale_up = 1 + (staff_cost.annual_cost.sum() + * extra_budget_fracs.loc[cadre, scenario] + / staff_cost.loc[cadre, 'annual_cost'] + * ((1+r)**10 - 1) + ) + + return overall_scale_up ** (1/10) - 1.0 + + +# prepare 2024 cost info for Other cadre and Total +extra_rows = pd.DataFrame(columns=staff_cost.columns, index=['Other', 'Total']) +staff_cost = pd.concat([staff_cost, extra_rows], axis=0) +staff_cost.loc['Other', 'annual_cost'] = staff_cost.loc[staff_cost.index.isin(other_group), 'annual_cost'].sum() +staff_cost.loc['Total', 'annual_cost'] = staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum() +staff_cost.loc['Other', 'Staff_Count'] = staff_cost.loc[staff_cost.index.isin(other_group), 'Staff_Count'].sum() +staff_cost.loc['Total', 'Staff_Count'] = staff_cost.loc[staff_cost.index.isin(cadre_all), 'Staff_Count'].sum() +staff_cost.loc['Other', 'cost_frac'] = (staff_cost.loc['Other', 'annual_cost'] + / staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum()) +staff_cost.loc['Total', 'cost_frac'] = (staff_cost.loc['Total', 'annual_cost'] + / staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum()) +staff_cost.annual_cost = staff_cost.annual_cost.astype(str) +staff_cost.cost_frac = staff_cost.cost_frac.astype(str) + +# # save and read pickle file +# pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' / +# 'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle') +# +# with open(pickle_file_path, 'wb') as f: +# pickle.dump(scale_up_factor_dict, f) +# +# with open(pickle_file_path, 'rb') as f: +# x = pickle.load(f) diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py new file mode 100644 index 0000000000..b94efa7b46 --- /dev/null +++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py @@ -0,0 +1,170 @@ +""" +This file defines a batch run of a large population for a long time with all disease modules and full use of HSIs +It's used for analysis of impact of expanding funded hcw, assuming all other setting as default. + +Run on the batch system using: +``` +tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py +``` + +or locally using: +``` +tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py +``` +""" + +from pathlib import Path +from typing import Dict + +from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import ( + extra_budget_fracs, +) +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class HRHExpansionByCadreWithExtraBudget(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 # change seed to 1 if to do another 5 runs per draw + self.start_date = Date(2010, 1, 1) + self.end_date = Date(2035, 1, 1) + self.pop_size = 100_000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 10 + + def log_configuration(self): + return { + 'filename': 'scenario_run_for_hcw_expansion_analysis', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return (fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]) + + def draw_parameters(self, draw_number, rng): + if draw_number < len(self._scenarios): + return list(self._scenarios.values())[draw_number] + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.""" + + self.YEAR_OF_MODE_CHANGE = 2020 + # HCW capabilities from data source are for year 2019, + # and we want to rescale to effective capabilities in the end of 2019 considering model calibration + self.YEAR_OF_HRH_EXPANSION = 2025 + # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling + + self.scenarios = extra_budget_fracs['s_*'].to_frame() + # Run 'optimal' scenario for main analysis + + # Baseline settings for change + self.cons_availability = ['all', 'default'] + self.hr_budget = [0.042, 0.058, 0.026] + self.hs_function = [[False, False], [False, True]] + + self.baselines = { + 'baseline': self._baseline_of_baseline(), # test historical scaling changes first + # 'default_cons': self._baseline_default_cons(), + # 'more_budget': self._baseline_more_budget(), # turn off when run baseline scenarios with no expansion + # 'less_budget': self._baseline_less_budget(), # turn off when run baseline scenarios with no expansion + # 'max_hs_function': self._baseline_max_hs_function(), + } + + return { + b + ' ' + self.scenarios.columns[i]: + mix_scenarios( + self.baselines[b], + {'HealthSystem': { + 'HR_expansion_by_officer_type': self.scenarios.iloc[:, i].to_dict() + } + } + ) for b in self.baselines.keys() for i in range(len(self.scenarios.columns)) + } + + def _baseline_of_baseline(self) -> Dict: + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'HealthSystem': { + 'mode_appt_constraints': 1, + 'mode_appt_constraints_postSwitch': 2, + "scale_to_effective_capabilities": True, + # This happens in the year before mode change, as the model calibration is done by that year + "year_mode_switch": self.YEAR_OF_MODE_CHANGE, + 'cons_availability': 'default', + 'cons_availability_postSwitch': self.cons_availability[0], + 'year_cons_availability_switch': self.YEAR_OF_HRH_EXPANSION, + 'HR_budget_growth_rate': self.hr_budget[0], + 'yearly_HR_scaling_mode': 'historical_scaling', # for 5 years of 2020-2024; source data year 2019 + 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION, + 'end_year_HR_expansion_by_officer_type': self.end_date.year, + "policy_name": 'Naive', + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + }, + 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { + 'max_healthcare_seeking': [False, False], + 'max_healthsystem_function': self.hs_function[0], + 'year_of_switch': self.YEAR_OF_HRH_EXPANSION, + } + }, + ) + + def _baseline_default_cons(self) -> Dict: + return mix_scenarios( + self._baseline_of_baseline(), + { + 'HealthSystem': { + 'cons_availability_postSwitch': self.cons_availability[1], + }, + }, + ) + + def _baseline_more_budget(self) -> Dict: + return mix_scenarios( + self._baseline_of_baseline(), + { + 'HealthSystem': { + 'HR_budget_growth_rate': self.hr_budget[1], + }, + }, + ) + + def _baseline_less_budget(self) -> Dict: + return mix_scenarios( + self._baseline_of_baseline(), + { + 'HealthSystem': { + 'HR_budget_growth_rate': self.hr_budget[2], + }, + }, + ) + + def _baseline_max_hs_function(self) -> Dict: + return mix_scenarios( + self._baseline_of_baseline(), + { + 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': { + 'max_healthsystem_function': self.hs_function[1], + } + }, + ) + + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index a8ad554a40..a15e56abc4 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -307,6 +307,35 @@ class HealthSystem(Module): "a worksheet of the file `ResourceFile_dynamic_HR_scaling.xlsx`." ), + 'HR_expansion_by_officer_type': Parameter( + Types.DICT, "This DICT has keys of nine officer types, each with a float value that " + "specifies the proportion of extra budget allocated to that officer type." + "The extra budget for this year is (100 * HR_budget_growth_rate) percent of the total salary " + "of these officers in last year. Given the allocated extra budget and annual salary, " + "we calculate the extra minutes for these staff of this year. The expansion is done " + "on 1 Jan of every year from start_year_HR_expansion_by_officer_type." + ), + "HR_budget_growth_rate": Parameter( + Types.REAL, "This number is the annual growth rate of HR budget. " + "The default value is 0.042 (4.2%), assuming the annual GDP growth rate is 4.2% and " + "the proportion of GDP expenditure on paying salaries of these staff is fixed " + ), + + 'start_year_HR_expansion_by_officer_type': Parameter( + Types.INT, "Year from which the HR expansion by officer type will take place. The change happens " + "on 1 Jan of every year onwards." + ), + + 'end_year_HR_expansion_by_officer_type': Parameter( + Types.INT, "Year in which the HR expansion by officer type will stop. This happens on 1 Jan of " + "that year. When submit the scenario to run, this should be the same year of the end year of " + "the run." + ), + + 'minute_salary': Parameter( + Types.DATA_FRAME, "This specifies the minute salary in USD per officer type per facility id." + ), + 'tclose_overwrite': Parameter( Types.INT, "Decide whether to overwrite tclose variables assigned by disease modules"), @@ -627,6 +656,20 @@ def read_parameters(self, data_folder): # Ensure that a value for the year at the start of the simulation is provided. assert all(2010 in sheet['year'].values for sheet in self.parameters['yearly_HR_scaling'].values()) + # Read in ResourceFile_Annual_Salary_Per_Cadre.csv + self.parameters['minute_salary'] = pd.read_csv( + Path(self.resourcefilepath) / 'costing' / 'Minute_Salary_HR.csv') + + # Set default values for HR_expansion_by_officer_type, start_year_HR_expansion_by_officer_type, + # end_year_HR_expansion_by_officer_type + self.parameters['HR_expansion_by_officer_type'] = { + 'Clinical': 0, 'DCSA': 0, 'Nursing_and_Midwifery': 0, 'Pharmacy': 0, + 'Dental': 0, 'Laboratory': 0, 'Mental': 0, 'Nutrition': 0, 'Radiography': 0 + } + self.parameters['HR_budget_growth_rate'] = 0.042 + self.parameters['start_year_HR_expansion_by_officer_type'] = 2025 + self.parameters['end_year_HR_expansion_by_officer_type'] = 2035 + def pre_initialise_population(self): """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read.""" @@ -775,6 +818,12 @@ def initialise_simulation(self, sim): # whilst the actual scaling will only take effect from 2011 onwards. sim.schedule_event(DynamicRescalingHRCapabilities(self), Date(sim.date)) + # Schedule recurring event that expands HR by officer type + # from the start_year_HR_expansion_by_officer_type to the end_year_HR_expansion_by_officer_type. + for yr in range(self.parameters["start_year_HR_expansion_by_officer_type"], + self.parameters["end_year_HR_expansion_by_officer_type"]): + sim.schedule_event(HRExpansionByOfficerType(self), Date(yr, 1, 1)) + # Schedule the logger to occur at the start of every year sim.schedule_event(HealthSystemLogger(self), Date(sim.date.year, 1, 1)) @@ -961,7 +1010,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}'] ) capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'}) # neaten - + # Create new column where capabilities per staff are computed capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count'] @@ -984,10 +1033,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple # Merge in information about facility from Master Facilities List mfl = self.parameters['Master_Facilities_List'] capabilities_ex = capabilities_ex.merge(mfl, on='Facility_ID', how='left') - + # Create a copy of this to store staff counts capabilities_per_staff_ex = capabilities_ex.copy() - + # Merge in information about officers # officer_types = self.parameters['Officer_Types_Table'][['Officer_Type_Code', 'Officer_Type']] # capabilities_ex = capabilities_ex.merge(officer_types, on='Officer_Type_Code', how='left') @@ -1000,7 +1049,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple how='left', ) capabilities_ex = capabilities_ex.fillna(0) - + capabilities_per_staff_ex = capabilities_per_staff_ex.merge( capabilities[['Facility_ID', 'Officer_Type_Code', 'Mins_Per_Day_Per_Staff']], on=['Facility_ID', 'Officer_Type_Code'], @@ -1015,7 +1064,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple + '_Officer_' + capabilities_ex['Officer_Type_Code'] ) - + # Give the standard index: capabilities_per_staff_ex = capabilities_per_staff_ex.set_index( 'FacilityID_' @@ -1041,6 +1090,11 @@ def _rescale_capabilities_to_capture_effective_capability(self): # Note: Currently relying on module variable rather than parameter for # scale_to_effective_capabilities, in order to facilitate testing. However # this may eventually come into conflict with the Switcher functions. + + # In addition, for Class HRExpansionByOfficerType, + # for the purpose of keep cost not scaled, need to scale down minute salary when capabilities are scaled up + + minute_salary = self.parameters['minute_salary'] pattern = r"FacilityID_(\w+)_Officer_(\w+)" for officer in self._daily_capabilities.keys(): matches = re.match(pattern, officer) @@ -1050,16 +1104,22 @@ def _rescale_capabilities_to_capture_effective_capability(self): level = self._facility_by_facility_id[facility_id].level # Only rescale if rescaling factor is greater than 1 (i.e. don't reduce # available capabilities if these were under-used the previous year). + # Later, we might want to rescale capabilities by rescaling factor of officer type and facility id + # (i.e., officer type, district and level specific), + # which will need fraction of time used by officer type and facility id. rescaling_factor = self._summary_counter.frac_time_used_by_officer_type_and_level( officer_type=officer_type, level=level ) if rescaling_factor > 1 and rescaling_factor != float("inf"): self._daily_capabilities[officer] *= rescaling_factor - + # We assume that increased daily capabilities is a result of each staff performing more # daily patient facing time per day than contracted (or equivalently performing appts more # efficiently). self._daily_capabilities_per_staff[officer] *= rescaling_factor + minute_salary.loc[(minute_salary.Facility_ID == facility_id) + & (minute_salary.Officer_Type_Code == officer_type), + 'Minute_Salary_USD'] /= rescaling_factor def update_consumables_availability_to_represent_merging_of_levels_1b_and_2(self, df_original): """To represent that facility levels '1b' and '2' are merged together under the label '2', we replace the @@ -1766,6 +1826,7 @@ def write_to_hsi_log( squeeze_factor=squeeze_factor, appt_footprint=event_details.appt_footprint, level=event_details.facility_level, + fac_id=facility_id if facility_id is not None else -99, ) def call_and_record_never_ran_hsi_event(self, hsi_event, priority=None): @@ -1824,6 +1885,7 @@ def write_to_never_ran_hsi_log( hsi_event_name=event_details.event_name, appt_footprint=event_details.appt_footprint, level=event_details.facility_level, + fac_id=facility_id if facility_id is not None else -99, ) def log_current_capabilities_and_usage(self): @@ -1847,23 +1909,41 @@ def log_current_capabilities_and_usage(self): comparison['Minutes_Used'].sum() / total_available if total_available > 0 else 0 ) - # Compute Fraction of Time Used In Each Facility - facility_id = [_f.split('_')[1] for _f in comparison.index] - summary_by_fac_id = comparison.groupby(by=facility_id)[['Total_Minutes_Per_Day', 'Minutes_Used']].sum() - summary_by_fac_id['Fraction_Time_Used'] = ( - summary_by_fac_id['Minutes_Used'] / summary_by_fac_id['Total_Minutes_Per_Day'] - ).replace([np.inf, -np.inf, np.nan], 0.0) + def compute_fraction_of_time_used(groups): + """ + This will take in the groups for the groupby and calculate the fraction of time used for each group. + :param groups: list of groups + :return: dataframe with groups as the index and time measures as the columns + """ + _summary = comparison.groupby(by=groups)[['Total_Minutes_Per_Day', 'Minutes_Used']].sum() + _summary['Fraction_Time_Used'] = ( + _summary['Minutes_Used'] / _summary['Total_Minutes_Per_Day'] + ).replace([np.inf, -np.inf, np.nan], 0.0) - # Compute Fraction of Time For Each Officer and level + return _summary + + # Get facility id, officer, level, district groups + facility_id = [_f.split('_')[1] for _f in comparison.index] officer = [_f.rsplit('Officer_')[1] for _f in comparison.index] level = [self._facility_by_facility_id[int(_fac_id)].level for _fac_id in facility_id] level = list(map(lambda x: x.replace('1b', '2'), level)) - summary_by_officer = comparison.groupby(by=[officer, level])[['Total_Minutes_Per_Day', 'Minutes_Used']].sum() - summary_by_officer['Fraction_Time_Used'] = ( - summary_by_officer['Minutes_Used'] / summary_by_officer['Total_Minutes_Per_Day'] - ).replace([np.inf, -np.inf, np.nan], 0.0) + district = [self._facility_by_facility_id[int(_fac_id)].name.split('_')[-1] for _fac_id in facility_id] + + # Compute Fraction of Time Used In Each Facility + summary_by_fac_id = compute_fraction_of_time_used(facility_id) + + # Compute Fraction of Time For Each Officer and Level + summary_by_officer = compute_fraction_of_time_used([officer, level]) summary_by_officer.index.names = ['Officer_Type', 'Facility_Level'] + # Compute raction of Time For Each Officer and District + summary_by_officer_district = compute_fraction_of_time_used([officer, district]) + summary_by_officer_district.index.names = ['Officer_Type', 'District'] + + # Compute Fraction of Time by Officer, Level and District + summary_by_officer_level_district = compute_fraction_of_time_used([officer, level, district]) + summary_by_officer_level_district.index.names = ['Officer_Type', 'Facility_Level', 'District'] + logger.info(key='Capacity', data={ 'Frac_Time_Used_Overall': fraction_time_used_overall, @@ -1876,7 +1956,10 @@ def log_current_capabilities_and_usage(self): self._summary_counter.record_hs_status( fraction_time_used_across_all_facilities=fraction_time_used_overall, - fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict() + fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict(), + fraction_time_used_by_officer_district=summary_by_officer_district["Fraction_Time_Used"].to_dict(), + fraction_time_used_by_officer_level_district=summary_by_officer_level_district[ + 'Fraction_Time_Used'].to_dict(), ) def remove_beddays_footprint(self, person_id): @@ -2635,14 +2718,19 @@ def _reset_internal_stores(self) -> None: self._no_blank_appt_treatment_ids = defaultdict(int) # As above, but for `HSI_Event`s with non-blank footprint self._no_blank_appt_appts = defaultdict(int) # As above, but for `HSI_Event`s that with non-blank footprint self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')} + fac_ids = list(range(133)) + [-1, -99] # 133 "real" facilities + 2 dummy facilities + self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids} # Log HSI_Events that never ran to monitor shortcoming of Health System self._never_ran_treatment_ids = defaultdict(int) # As above, but for `HSI_Event`s that never ran self._never_ran_appts = defaultdict(int) # As above, but for `HSI_Event`s that have never ran self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')} + self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids} self._frac_time_used_overall = [] # Running record of the usage of the healthcare system self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter() + self._sum_of_daily_frac_time_used_by_officer_district = Counter() + self._sum_of_daily_frac_time_used_by_officer_level_district = Counter() self._squeeze_factor_by_hsi_event_name = defaultdict(list) # Running record the squeeze-factor applying to each # treatment_id. Key is of the form: # ":" @@ -2652,7 +2740,8 @@ def record_hsi_event(self, hsi_event_name: str, squeeze_factor: float, appt_footprint: Counter, - level: str + level: str, + fac_id: int, ) -> None: """Add information about an `HSI_Event` to the running summaries.""" @@ -2675,12 +2764,14 @@ def record_hsi_event(self, for appt_type, number in appt_footprint: self._no_blank_appt_appts[appt_type] += number self._no_blank_appt_by_level[level][appt_type] += number + self._no_blank_appt_by_fac_id[fac_id][appt_type] += number def record_never_ran_hsi_event(self, treatment_id: str, hsi_event_name: str, appt_footprint: Counter, - level: str + level: str, + fac_id: int, ) -> None: """Add information about a never-ran `HSI_Event` to the running summaries.""" @@ -2691,17 +2782,24 @@ def record_never_ran_hsi_event(self, for appt_type, number in appt_footprint: self._never_ran_appts[appt_type] += number self._never_ran_appts_by_level[level][appt_type] += number + self._never_ran_appts_by_fac_id[fac_id][appt_type] += number def record_hs_status( self, fraction_time_used_across_all_facilities: float, fraction_time_used_by_officer_type_and_level: Dict[Tuple[str, int], float], + fraction_time_used_by_officer_district: Dict[Tuple[str, str], float], + fraction_time_used_by_officer_level_district: Dict[Tuple[str, str, str], float], ) -> None: """Record a current status metric of the HealthSystem.""" # The fraction of all healthcare worker time that is used: self._frac_time_used_overall.append(fraction_time_used_across_all_facilities) for officer_type_facility_level, fraction_time in fraction_time_used_by_officer_type_and_level.items(): self._sum_of_daily_frac_time_used_by_officer_type_and_level[officer_type_facility_level] += fraction_time + for officer_district, fraction_time in fraction_time_used_by_officer_district.items(): + self._sum_of_daily_frac_time_used_by_officer_district[officer_district] += fraction_time + for officer_level_district, fraction_time in fraction_time_used_by_officer_level_district.items(): + self._sum_of_daily_frac_time_used_by_officer_level_district[officer_level_district] += fraction_time def write_to_log_and_reset_counters(self): """Log summary statistics reset the data structures. This usually occurs at the end of the year.""" @@ -2724,9 +2822,10 @@ def write_to_log_and_reset_counters(self): key="HSI_Event_non_blank_appt_footprint", description="Same as for key 'HSI_Event' but limited to HSI_Event that have non-blank footprints", data={ - "TREATMENT_ID": self._no_blank_appt_treatment_ids, - "Number_By_Appt_Type_Code": self._no_blank_appt_appts, - "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level, + "TREATMENT_ID": self._no_blank_appt_treatment_ids, + "Number_By_Appt_Type_Code": self._no_blank_appt_appts, + "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level, + "Number_By_Appt_Type_Code_And_FacilityID": self._no_blank_appt_by_fac_id, }, ) @@ -2739,6 +2838,7 @@ def write_to_log_and_reset_counters(self): "TREATMENT_ID": self._never_ran_treatment_ids, "Number_By_Appt_Type_Code": self._never_ran_appts, "Number_By_Appt_Type_Code_And_Level": self._never_ran_appts_by_level, + "Number_By_Appt_Type_Code_And_FacilityID": self._never_ran_appts_by_fac_id, }, ) @@ -2762,6 +2862,26 @@ def write_to_log_and_reset_counters(self): self.frac_time_used_by_officer_type_and_level()), ) + # Log mean of 'fraction time used by officer type and district' from daily entries from the previous + # year. + logger_summary.info( + key="Capacity_By_OfficerType_And_District", + description="The fraction of healthcare worker time that is used each day, averaged over this " + "calendar year, for each officer type in each district.", + data=flatten_multi_index_series_into_dict_for_logging( + self.frac_time_used_by_officer_district()), + ) + + # Log mean of 'fraction time used by officer type and facility level and district' from daily entries from the + # previous year. + logger_summary.info( + key="Capacity_By_OfficerType_And_FacilityLevel_And_District", + description="The fraction of healthcare worker time that is used each day, averaged over this " + "calendar year, for each officer type at each facility level in each district.", + data=flatten_multi_index_series_into_dict_for_logging( + self.frac_time_used_by_officer_level_district()), + ) + self._reset_internal_stores() def frac_time_used_by_officer_type_and_level( @@ -2794,6 +2914,70 @@ def frac_time_used_by_officer_type_and_level( data=mean_frac_time_used.values() ).sort_index() + def frac_time_used_by_officer_district( + self, + officer_type: Optional[str]=None, + district: Optional[str]=None, + ) -> Union[float, pd.Series]: + """Average fraction of time used by officer type and district since last reset. + If `officer_type` and/or `district` is not provided (left to default to `None`) then a pd.Series with a multi-index + is returned giving the result for all officer_types/levels.""" + + if (officer_type is not None) and (district is not None): + return ( + self._sum_of_daily_frac_time_used_by_officer_district[officer_type, district] + / len(self._frac_time_used_overall) + # Use len(self._frac_time_used_overall) as proxy for number of days in past year. + ) + else: + # Return multiple in the form of a pd.Series with multiindex + mean_frac_time_used = { + (_officer_type, _district): v / len(self._frac_time_used_overall) + for (_officer_type, _district), v in self._sum_of_daily_frac_time_used_by_officer_district.items() + if (_officer_type == officer_type or officer_type is None) and ( + _district == district or district is None) + } + return pd.Series( + index=pd.MultiIndex.from_tuples( + mean_frac_time_used.keys(), + names=['OfficerType', 'District'] + ), + data=mean_frac_time_used.values() + ).sort_index() + + def frac_time_used_by_officer_level_district( + self, + officer_type: Optional[str]=None, + level: Optional[str]=None, + district: Optional[str]=None, + ) -> Union[float, pd.Series]: + """Average fraction of time used by officer, level and district since last reset. + If `officer_type` and/or `level` and/or 'district' is not provided (left to default to `None`), + then a pd.Series with a multi-index is returned giving the result for all officer_types/levels/districts.""" + + if (officer_type is not None) and (level is not None) and (district is not None): + return ( + self._sum_of_daily_frac_time_used_by_officer_level_district[officer_type, level, district] + / len(self._frac_time_used_overall) + # Use len(self._frac_time_used_overall) as proxy for number of days in past year. + ) + else: + # Return multiple in the form of a pd.Series with multiindex + mean_frac_time_used = { + (_officer_type, _level, _district): v / len(self._frac_time_used_overall) + for (_officer_type, _level, _district), v in self._sum_of_daily_frac_time_used_by_officer_level_district.items() + if (_officer_type == officer_type or officer_type is None) and (_level == level or level is None) and ( + _district == district or district is None) + } + return pd.Series( + index=pd.MultiIndex.from_tuples( + mean_frac_time_used.keys(), + names=['OfficerType', 'FacilityLevel', 'District'] + ), + data=mean_frac_time_used.values() + ).sort_index() + + class HealthSystemChangeParameters(Event, PopulationScopeEventMixin): """Event that causes certain internal parameters of the HealthSystem to be changed; specifically: * `mode_appt_constraints` @@ -2917,6 +3101,8 @@ def apply(self, population): HR_scaling_factor_by_district = self.module.parameters['HR_scaling_by_district_table'][ self.module.parameters['HR_scaling_by_district_mode'] ].set_index('District').to_dict() + # todo: add entries for facilities at and beyond level 3, + # so that the district list would match the facility IDs fully. pattern = r"FacilityID_(\w+)_Officer_(\w+)" @@ -2925,10 +3111,79 @@ def apply(self, population): # Extract ID and officer type from facility_id = int(matches.group(1)) district = self.module._facility_by_facility_id[facility_id].district + # todo: check if district callable; a fix might be + # district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1] if district in HR_scaling_factor_by_district: self.module._daily_capabilities[officer] *= HR_scaling_factor_by_district[district] +class HRExpansionByOfficerType(Event, PopulationScopeEventMixin): + """ This event exists to expand the HR by officer type (Clinical, DCSA, Nursing_and_Midwifery, Pharmacy) + given an extra budget. This is done for daily capabilities, as a year consists of 365.25 equal days.""" + def __init__(self, module): + super().__init__(module) + + def apply(self, population): + + # get minute salary + minute_salary_by_officer_facility_id = self.module.parameters['minute_salary'] + + # get current daily minutes and format it to be consistent with minute salary + daily_minutes = pd.DataFrame(self.module._daily_capabilities).reset_index().rename( + columns={'index': 'facilityid_officer'}) + daily_minutes[['Facility_ID', 'Officer_Type_Code']] = daily_minutes.facilityid_officer.str.split( + pat='_', n=3, expand=True)[[1, 3]] + daily_minutes['Facility_ID'] = daily_minutes['Facility_ID'].astype(int) + + # get daily cost per officer type per facility id + daily_cost = minute_salary_by_officer_facility_id.merge( + daily_minutes, on=['Facility_ID', 'Officer_Type_Code'], how='outer') + daily_cost['Total_Cost_Per_Day'] = daily_cost['Minute_Salary_USD'] * daily_cost['Total_Minutes_Per_Day'] + + # get daily cost per officer type + daily_cost = daily_cost.groupby('Officer_Type_Code').agg({'Total_Cost_Per_Day': 'sum'}) + + # get daily extra budget for this year + daily_extra_budget = (self.module.parameters['HR_budget_growth_rate'] + * daily_cost.Total_Cost_Per_Day.sum()) + + # get proportional daily extra budget for each officer type + extra_budget_fraction = pd.Series(self.module.parameters['HR_expansion_by_officer_type']) + assert set(extra_budget_fraction.index) == set(daily_cost.index), \ + "Input officer types do not match the defined officer types" + daily_cost = daily_cost.reindex(index=extra_budget_fraction.index) + daily_cost['extra_budget_per_day'] = daily_extra_budget * extra_budget_fraction + + # get the scale up factor for each officer type, assumed to be the same for each facility id of that + # officer type (note "cost = available minutes * minute salary", thus we could directly calculate + # scale up factor using cost) + daily_cost['scale_up_factor'] = ( + (daily_cost.extra_budget_per_day + daily_cost.Total_Cost_Per_Day) / daily_cost.Total_Cost_Per_Day + ) + + # scale up the daily minutes per cadre per facility id + pattern = r"FacilityID_(\w+)_Officer_(\w+)" + for officer in self.module._daily_capabilities.keys(): + matches = re.match(pattern, officer) + # Extract officer type + officer_type = matches.group(2) + self.module._daily_capabilities[officer] *= daily_cost.loc[officer_type, 'scale_up_factor'] + + # save the scale up factor, updated cost and updated capabilities into logger + # note that cost and capabilities are on the actual scale, + # not normalised by the self.capabilities_coefficient parameter + total_cost_this_year = 365.25 * (daily_cost.Total_Cost_Per_Day + daily_cost.extra_budget_per_day) + total_capabilities_this_year = (365.25 * self.module._daily_capabilities) + logger_summary.info(key='HRScaling', + description='The HR scale up factor by office type given fractions of an extra budget', + data={ + 'scale_up_factor': daily_cost.scale_up_factor.to_dict(), + 'total_hr_salary': total_cost_this_year.to_dict(), + 'total_hr_capabilities': total_capabilities_this_year.to_dict() + } + ) + + class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin): """ This event exists to change the priority policy adopted by the HealthSystem at a given year. """ diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index 875e3e03d4..f9db70dd06 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -2573,6 +2573,106 @@ def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bo assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling +def test_HR_expansion_by_officer_type(seed, tmpdir): + """Check that we can use the parameter `HR_expansion_by_officer_type` to update the minutes of time available + for healthcare workers.""" + + def get_initial_capabilities() -> pd.DataFrame: + sim = Simulation(start_date=start_date, seed=seed) + sim.register( + demography.Demography(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath) + ) + popsize=100 + sim.make_initial_population(n=popsize) + sim.simulate(end_date=start_date + pd.DateOffset(days=0)) + + caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today) + caps = caps[caps != 0] + + return caps + + def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.Series: + sim = Simulation(start_date=start_date, seed=seed) + sim.register( + demography.Demography(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + + ) + params = sim.modules['HealthSystem'].parameters + params['start_year_HR_expansion_by_officer_type'] = 2011 # first update happens on 1 Jan 2011 + params['end_year_HR_expansion_by_officer_type'] = end_year # last update happens on 1 Jan (end_year - 1) + params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type.to_dict() + + # for testing _rescale_capabilities_to_capture_effective_capability + params['year_mode_switch'] = 2011 + params['scale_to_effective_capabilities'] = True + + popsize = 100 + sim.make_initial_population(n=popsize) + + sim.simulate(end_date=Date(end_year, 1, 1)) + + caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today) + caps = caps[caps != 0] + + return caps + + initial_caps = get_initial_capabilities() + test_fracs = pd.DataFrame( + index=['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', + 'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'], + data={'no_update': [0, 0, 0, 0, 0, 0, 0, 0, 0], + 'clinical_one_update': [1, 0, 0, 0, 0, 0, 0, 0, 0], + 'clinical_dcsa_one_update': [0.5, 0.5, 0, 0, 0, 0, 0, 0, 0], + 'clinical_two_updates': [1, 0, 0, 0, 0, 0, 0, 0, 0]} + ) + caps_clinical_no_update = get_capabilities_after_update(2012, test_fracs.no_update) + caps_clinical_one_update = get_capabilities_after_update(2012, test_fracs.clinical_one_update) + caps_clinical_dcsa_one_update = get_capabilities_after_update(2012, + test_fracs.clinical_dcsa_one_update) + caps_clinical_two_updates = get_capabilities_after_update(2013, test_fracs.clinical_two_updates) + + # check that the cadres are expanded as expected + def compare(cadre, caps_1, caps_2) -> tuple: + + assert (caps_1.index == caps_2.index).all() + comp_caps_0 = caps_1.merge(caps_2, left_index=True, right_index=True) + comp_caps_0 = comp_caps_0[comp_caps_0.index.str.contains(cadre, regex=True)] + ratio = (comp_caps_0.iloc[:, 1] / comp_caps_0.iloc[:, 0]).dropna() + + return (ratio > 1).all(), (abs(ratio - ratio.unique()[0]) < 1e-6).all() + + # initial_caps vs caps_clinical_no_update + # check if the clinical cadre of each facility id is not expanded + assert not compare('Clinical', initial_caps, caps_clinical_no_update)[0] + + # initial_caps vs caps_clinical_one_update + # check if the clinical cadre of each facility id is expanded + assert compare('Clinical', initial_caps, caps_clinical_one_update)[0] + # check if the cadre is expanded by the same ratio of each facilty id + assert compare('Clinical', initial_caps, caps_clinical_one_update)[1] + + # caps_clinical_one_update vs caps_clinical_two_updates + # check if the clinical cadre of each facility id is expanded more in the latter scenario with two updates + assert compare('Clinical', caps_clinical_one_update, caps_clinical_two_updates)[0] + # check if the cadre is expanded by the same ratio of each facilty id + assert compare('Clinical', caps_clinical_one_update, caps_clinical_two_updates)[1] + + # initial_caps vs caps_clinical_dcsa_one_update + # check if the DCSA cadre of each facility id is expanded + assert compare('DCSA', initial_caps, caps_clinical_dcsa_one_update)[0] + # check if the cadre is expanded by the same ratio of each facilty id + assert compare('DCSA', initial_caps, caps_clinical_dcsa_one_update)[1] + + # caps_clinical_one_update vs caps_clinical_dcsa_one_update + # check if the cadre of each facility id is expanded less in the latter scenario with a smaller frac of extra budget + assert compare('Clinical', caps_clinical_dcsa_one_update, caps_clinical_one_update)[0] + # check if the cadre is expanded by the same ratio of each facilty id + assert compare('Clinical', caps_clinical_dcsa_one_update, caps_clinical_one_update)[1] + + def test_logging_of_only_hsi_events_with_non_blank_footprints(tmpdir): """Run the simulation with an HSI_Event that may have a blank_footprint and examine the healthsystem.summary logger. * If the footprint is blank, the HSI event should be recorded in the usual loggers but not the 'no_blank' logger