From 257fe6c3584a2dc19d40a2b73107f53287a8f6f8 Mon Sep 17 00:00:00 2001 From: TinyMarsh Date: Mon, 1 Jul 2024 15:54:00 +0100 Subject: [PATCH 1/4] Increase size of vector to accomodate extra stats --- src/HealthGPS/analysis_module.cpp | 37 ++++++++++++++++++++++--------- src/HealthGPS/analysis_module.h | 2 +- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/HealthGPS/analysis_module.cpp b/src/HealthGPS/analysis_module.cpp index d6045e8c2..957554ea6 100644 --- a/src/HealthGPS/analysis_module.cpp +++ b/src/HealthGPS/analysis_module.cpp @@ -22,13 +22,13 @@ AnalysisModule::AnalysisModule(AnalysisDefinition &&definition, WeightModel &&cl residual_disability_weight_{create_age_gender_table(age_range)}, comorbidities_{comorbidities} {} -// Overload constructor with additional parameter for calculated_factors_ +// Overload constructor with additional parameter for calculated_stats_ AnalysisModule::AnalysisModule(AnalysisDefinition &&definition, WeightModel &&classifier, const core::IntegerInterval age_range, unsigned int comorbidities, - std::vector calculated_factors) + std::vector calculated_stats) : definition_{std::move(definition)}, weight_classifier_{std::move(classifier)}, residual_disability_weight_{create_age_gender_table(age_range)}, - comorbidities_{comorbidities}, calculated_factors_{std::move(calculated_factors)} {} + comorbidities_{comorbidities}, calculated_stats_{std::move(calculated_stats)} {} SimulationModuleType AnalysisModule::type() const noexcept { return SimulationModuleType::Analysis; } @@ -59,17 +59,32 @@ void AnalysisModule::initialise_vector(RuntimeContext &context) { factor_bin_widths_.push_back((max_factor - min_factor) / factor_bins_.back()); } - // The number of factors to calculate is the number of factors minus the length of the `factors` - // vector. - size_t num_factors_to_calc = context.mapping().entries().size() - factors_to_calculate_.size(); + // The number of factors to calculate stats for is the number of factors minus the length of the + // `factors` vector. + size_t num_stats_to_calc = context.mapping().entries().size() - factors_to_calculate_.size(); + + // But we calculate the mean and standard deviation of each factor, so we need to multiply by 2 + num_stats_to_calc *= 2; + + // We also want to calculate the prevalence and incidence of each disease + num_stats_to_calc += 2 * context.diseases().size(); + + // We also want to keep the count, deaths, and emigrations + num_stats_to_calc += 3; + + // We also want to calculate the normal, overweight, obese, and above weight prevalence + num_stats_to_calc += 4; + + // Finally, we want to calculate the mean and standard deviation of YLL, YLD, and DALY + num_stats_to_calc += 6; // The product of the number of bins for each factor can be used to calculate the size of the - // `calculated_factors_` in the next step + // `calculated_stats_` in the next step size_t total_num_bins = std::accumulate(factor_bins_.cbegin(), factor_bins_.cend(), size_t{1}, std::multiplies<>()); // Set the vector size and initialise all values to 0.0 - calculated_factors_.resize(total_num_bins * num_factors_to_calc); + calculated_stats_.resize(total_num_bins * num_stats_to_calc); } const std::string &AnalysisModule::name() const noexcept { return name_; } @@ -115,7 +130,7 @@ void AnalysisModule::initialise_population(RuntimeContext &context) { void AnalysisModule::update_population(RuntimeContext &context) { // Reset the calculated factors vector to 0.0 - std::ranges::fill(calculated_factors_, 0.0); + std::ranges::fill(calculated_stats_, 0.0); publish_result_message(context); } @@ -328,7 +343,7 @@ void AnalysisModule::calculate_population_statistics(RuntimeContext &context) { bin_indices.push_back(bin_index); } - // Calculate the index in the calculated_factors_ vector + // Calculate the index in the calculated_stats_ vector size_t index = 0; for (size_t i = 0; i < bin_indices.size() - 1; i++) { size_t accumulated_bins = @@ -342,7 +357,7 @@ void AnalysisModule::calculate_population_statistics(RuntimeContext &context) { for (const auto &factor : context.mapping().entries()) { if (std::find(factors_to_calculate_.cbegin(), factors_to_calculate_.cend(), factor.key()) == factors_to_calculate_.cend()) { - calculated_factors_[index++] += person.get_risk_factor_value(factor.key()); + calculated_stats_[index++] += person.get_risk_factor_value(factor.key()); } } } diff --git a/src/HealthGPS/analysis_module.h b/src/HealthGPS/analysis_module.h index f018045b1..58ea43fe7 100644 --- a/src/HealthGPS/analysis_module.h +++ b/src/HealthGPS/analysis_module.h @@ -49,7 +49,7 @@ class AnalysisModule final : public UpdatableModule { unsigned int comorbidities_; std::string name_{"Analysis"}; std::vector factors_to_calculate_ = {"Gender"_id, "Age"_id}; - std::vector calculated_factors_; + std::vector calculated_stats_; std::vector factor_bins_; std::vector factor_bin_widths_; std::vector factor_min_values_; From 0ed3f943ee2c0da83f0bfeb475b497b4236aa855 Mon Sep 17 00:00:00 2001 From: TinyMarsh Date: Tue, 2 Jul 2024 15:49:45 +0100 Subject: [PATCH 2/4] Replicate existing channels pattern for storing which stats to calc --- src/HealthGPS/analysis_module.cpp | 48 ++++++++++++++++++++++--------- src/HealthGPS/analysis_module.h | 1 + 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/HealthGPS/analysis_module.cpp b/src/HealthGPS/analysis_module.cpp index 957554ea6..66f2ee990 100644 --- a/src/HealthGPS/analysis_module.cpp +++ b/src/HealthGPS/analysis_module.cpp @@ -63,20 +63,9 @@ void AnalysisModule::initialise_vector(RuntimeContext &context) { // `factors` vector. size_t num_stats_to_calc = context.mapping().entries().size() - factors_to_calculate_.size(); - // But we calculate the mean and standard deviation of each factor, so we need to multiply by 2 - num_stats_to_calc *= 2; - - // We also want to calculate the prevalence and incidence of each disease - num_stats_to_calc += 2 * context.diseases().size(); - - // We also want to keep the count, deaths, and emigrations - num_stats_to_calc += 3; - - // We also want to calculate the normal, overweight, obese, and above weight prevalence - num_stats_to_calc += 4; - - // Finally, we want to calculate the mean and standard deviation of YLL, YLD, and DALY - num_stats_to_calc += 6; + // And for each factor, we calculate the stats described in `stats_to_calculate_`, so we + // multiply the number of stats to calculate by the number of factors to calculate stats for. + num_stats_to_calc *= stats_to_calculate_.size(); // The product of the number of bins for each factor can be used to calculate the size of the // `calculated_stats_` in the next step @@ -573,6 +562,37 @@ void AnalysisModule::initialise_output_channels(RuntimeContext &context) { channels_.emplace_back("std_daly"); } +void AnalysisModule::initialise_stats_to_calc(RuntimeContext &context) { + if (!stats_to_calculate_.empty()) { + return; + } + + stats_to_calculate_.push_back("count"); + stats_to_calculate_.push_back("deaths"); + stats_to_calculate_.push_back("emigrations"); + + for (const auto &factor : context.mapping().entries()) { + stats_to_calculate_.push_back("mean_" + factor.key().to_string()); + stats_to_calculate_.push_back("std_" + factor.key().to_string()); + } + + for (const auto &disease : context.diseases()) { + stats_to_calculate_.push_back("prevalence_" + disease.code.to_string()); + stats_to_calculate_.push_back("incidence_" + disease.code.to_string()); + } + + stats_to_calculate_.push_back("normal_weight"); + stats_to_calculate_.push_back("over_weight"); + stats_to_calculate_.push_back("obese_weight"); + stats_to_calculate_.push_back("above_weight"); + stats_to_calculate_.push_back("mean_yll"); + stats_to_calculate_.push_back("std_yll"); + stats_to_calculate_.push_back("mean_yld"); + stats_to_calculate_.push_back("std_yld"); + stats_to_calculate_.push_back("mean_daly"); + stats_to_calculate_.push_back("std_daly"); +} + std::unique_ptr build_analysis_module(Repository &repository, const ModelInput &config) { auto analysis_entity = repository.manager().get_disease_analysis(config.settings().country()); diff --git a/src/HealthGPS/analysis_module.h b/src/HealthGPS/analysis_module.h index 58ea43fe7..ec4d16f42 100644 --- a/src/HealthGPS/analysis_module.h +++ b/src/HealthGPS/analysis_module.h @@ -46,6 +46,7 @@ class AnalysisModule final : public UpdatableModule { WeightModel weight_classifier_; DoubleAgeGenderTable residual_disability_weight_; std::vector channels_; + std::vector stats_to_calculate_; unsigned int comorbidities_; std::string name_{"Analysis"}; std::vector factors_to_calculate_ = {"Gender"_id, "Age"_id}; From 55899011a490809986971c0c70b3cf33905bd51d Mon Sep 17 00:00:00 2001 From: TinyMarsh Date: Tue, 2 Jul 2024 16:11:30 +0100 Subject: [PATCH 3/4] Remove dupliacte function and reuse channels_ vector --- src/HealthGPS/analysis_module.cpp | 33 +------------------------------ src/HealthGPS/analysis_module.h | 1 - 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/src/HealthGPS/analysis_module.cpp b/src/HealthGPS/analysis_module.cpp index 66f2ee990..392d2f650 100644 --- a/src/HealthGPS/analysis_module.cpp +++ b/src/HealthGPS/analysis_module.cpp @@ -65,7 +65,7 @@ void AnalysisModule::initialise_vector(RuntimeContext &context) { // And for each factor, we calculate the stats described in `stats_to_calculate_`, so we // multiply the number of stats to calculate by the number of factors to calculate stats for. - num_stats_to_calc *= stats_to_calculate_.size(); + num_stats_to_calc *= channels_.size(); // The product of the number of bins for each factor can be used to calculate the size of the // `calculated_stats_` in the next step @@ -562,37 +562,6 @@ void AnalysisModule::initialise_output_channels(RuntimeContext &context) { channels_.emplace_back("std_daly"); } -void AnalysisModule::initialise_stats_to_calc(RuntimeContext &context) { - if (!stats_to_calculate_.empty()) { - return; - } - - stats_to_calculate_.push_back("count"); - stats_to_calculate_.push_back("deaths"); - stats_to_calculate_.push_back("emigrations"); - - for (const auto &factor : context.mapping().entries()) { - stats_to_calculate_.push_back("mean_" + factor.key().to_string()); - stats_to_calculate_.push_back("std_" + factor.key().to_string()); - } - - for (const auto &disease : context.diseases()) { - stats_to_calculate_.push_back("prevalence_" + disease.code.to_string()); - stats_to_calculate_.push_back("incidence_" + disease.code.to_string()); - } - - stats_to_calculate_.push_back("normal_weight"); - stats_to_calculate_.push_back("over_weight"); - stats_to_calculate_.push_back("obese_weight"); - stats_to_calculate_.push_back("above_weight"); - stats_to_calculate_.push_back("mean_yll"); - stats_to_calculate_.push_back("std_yll"); - stats_to_calculate_.push_back("mean_yld"); - stats_to_calculate_.push_back("std_yld"); - stats_to_calculate_.push_back("mean_daly"); - stats_to_calculate_.push_back("std_daly"); -} - std::unique_ptr build_analysis_module(Repository &repository, const ModelInput &config) { auto analysis_entity = repository.manager().get_disease_analysis(config.settings().country()); diff --git a/src/HealthGPS/analysis_module.h b/src/HealthGPS/analysis_module.h index ec4d16f42..58ea43fe7 100644 --- a/src/HealthGPS/analysis_module.h +++ b/src/HealthGPS/analysis_module.h @@ -46,7 +46,6 @@ class AnalysisModule final : public UpdatableModule { WeightModel weight_classifier_; DoubleAgeGenderTable residual_disability_weight_; std::vector channels_; - std::vector stats_to_calculate_; unsigned int comorbidities_; std::string name_{"Analysis"}; std::vector factors_to_calculate_ = {"Gender"_id, "Age"_id}; From 83e379e7d473bf6027e41784d8c765599cbc6db6 Mon Sep 17 00:00:00 2001 From: TinyMarsh Date: Tue, 2 Jul 2024 16:15:04 +0100 Subject: [PATCH 4/4] fix comment --- src/HealthGPS/analysis_module.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/HealthGPS/analysis_module.cpp b/src/HealthGPS/analysis_module.cpp index 392d2f650..bc731b7a4 100644 --- a/src/HealthGPS/analysis_module.cpp +++ b/src/HealthGPS/analysis_module.cpp @@ -63,8 +63,8 @@ void AnalysisModule::initialise_vector(RuntimeContext &context) { // `factors` vector. size_t num_stats_to_calc = context.mapping().entries().size() - factors_to_calculate_.size(); - // And for each factor, we calculate the stats described in `stats_to_calculate_`, so we - // multiply the number of stats to calculate by the number of factors to calculate stats for. + // And for each factor, we calculate the stats described in `channels_`, so we + // multiply the size of `channels_` by the number of factors to calculate stats for. num_stats_to_calc *= channels_.size(); // The product of the number of bins for each factor can be used to calculate the size of the