From b2d3e74db42c4d6a23077460675db14c3ed97dec Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Mon, 12 Aug 2024 16:03:26 +0100 Subject: [PATCH 01/16] Add JSON schema for main config files I've dropped the version property as it's redundant now that we have the $schema property. --- schemas/v1/config.json | 377 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 schemas/v1/config.json diff --git a/schemas/v1/config.json b/schemas/v1/config.json new file mode 100644 index 000000000..f0f81d1c9 --- /dev/null +++ b/schemas/v1/config.json @@ -0,0 +1,377 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config.json", + "type": "object", + "properties": { + "$schema": { + "type": "string" + }, + "data": { + "type": "object", + "properties": { + "source": { + "type": "string" + }, + "checksum": { + "type": "string" + } + }, + "required": [ + "source" + ], + "additionalProperties": false + }, + "inputs": { + "type": "object", + "properties": { + "dataset": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "format": { + "const": "csv" + }, + "delimiter": { + "type": "string" + }, + "encoding": { + "type": "string" + }, + "columns": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "const": "integer" + }, + { + "const": "double" + } + ] + } + } + }, + "required": [ + "name", + "format", + "delimiter", + "encoding", + "columns" + ], + "additionalProperties": false + }, + "settings": { + "type": "object", + "properties": { + "country_code": { + "type": "string" + }, + "size_fraction": { + "type": "number" + }, + "age_range": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "required": [ + "country_code", + "size_fraction", + "age_range" + ], + "additionalProperties": false + } + }, + "required": [ + "dataset", + "settings" + ], + "additionalProperties": false + }, + "modelling": { + "type": "object", + "properties": { + "ses_model": { + "type": "object", + "properties": { + "function_name": { + "type": "string" + }, + "function_parameters": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": [ + "function_name", + "function_parameters" + ], + "additionalProperties": false + }, + "risk_factors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "level": { + "type": "integer" + }, + "range": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": [ + "name", + "level", + "range" + ], + "additionalProperties": false + } + }, + "risk_factor_models": { + "type": "object", + "properties": { + "static": { + "type": "string" + }, + "dynamic": { + "type": "string" + } + }, + "required": [ + "static", + "dynamic" + ], + "additionalProperties": false + }, + "baseline_adjustments": { + "type": "object", + "properties": { + "format": { + "const": "csv" + }, + "delimiter": { + "type": "string" + }, + "encoding": { + "type": "string" + }, + "file_names": { + "type": "object", + "properties": { + "factorsmean_male": { + "type": "string" + }, + "factorsmean_female": { + "type": "string" + } + }, + "required": [ + "factorsmean_male", + "factorsmean_female" + ], + "additionalProperties": false + } + }, + "required": [ + "format", + "delimiter", + "encoding", + "file_names" + ], + "additionalProperties": false + } + }, + "required": [ + "ses_model", + "risk_factors", + "risk_factor_models", + "baseline_adjustments" + ], + "additionalProperties": false + }, + "running": { + "type": "object", + "properties": { + "seed": { + "type": "array", + "minItems": 1, + "maxItems": 1, + "items": { + "type": "integer" + } + }, + "start_time": { + "type": "integer" + }, + "stop_time": { + "type": "integer" + }, + "trial_runs": { + "type": "integer" + }, + "sync_timeout_ms": { + "type": "integer" + }, + "diseases": { + "type": "array", + "items": { + "type": "string" + } + }, + "interventions": { + "type": "object", + "properties": { + "active_type_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "types": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "active_period": { + "type": "object", + "properties": { + "start_time": { + "type": "integer" + }, + "finish_time": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + } + }, + "required": [ + "start_time", + "finish_time" + ], + "additionalProperties": false + }, + "impacts": { + "type": "array" + }, + "impact_type": { + "type": "string" + }, + "dynamics": { + "type": "array", + "items": { + "type": "number" + } + }, + "coefficients": { + "type": "array", + "items": { + "type": "number" + } + }, + "coverage_rates": { + "type": "array", + "items": { + "type": "number" + } + }, + "coverage_cutoff_time": { + "type": "integer", + "minValue": 0 + }, + "child_cutoff_age": { + "type": "integer", + "minValue": 0 + }, + "adjustments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "risk_factor": { + "type": "string" + }, + "value": { + "type": "number" + } + } + } + } + }, + "required": [ + "active_period", + "impacts" + ], + "additionalProperties": false + } + } + }, + "required": [ + "active_type_id", + "types" + ], + "additionalProperties": false + } + }, + "required": [ + "seed", + "start_time", + "stop_time", + "trial_runs", + "sync_timeout_ms", + "diseases", + "interventions" + ], + "additionalProperties": false + }, + "output": { + "type": "object", + "properties": { + "comorbidities": { + "type": "integer" + }, + "folder": { + "type": "string" + }, + "file_name": { + "type": "string" + } + }, + "required": [ + "comorbidities", + "folder", + "file_name" + ], + "additionalProperties": false + } + }, + "required": [ + "$schema", + "inputs", + "modelling", + "running", + "output" + ], + "additionalProperties": false +} From 072c5f12c5635f37de89361d73ff2e6064e62df2 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 13 Aug 2024 12:01:28 +0100 Subject: [PATCH 02/16] Split config schema into subschemas --- schemas/v1/config.json | 354 +------------------------------ schemas/v1/config/data.json | 17 ++ schemas/v1/config/inputs.json | 73 +++++++ schemas/v1/config/modelling.json | 112 ++++++++++ schemas/v1/config/output.json | 22 ++ schemas/v1/config/running.json | 145 +++++++++++++ 6 files changed, 374 insertions(+), 349 deletions(-) create mode 100644 schemas/v1/config/data.json create mode 100644 schemas/v1/config/inputs.json create mode 100644 schemas/v1/config/modelling.json create mode 100644 schemas/v1/config/output.json create mode 100644 schemas/v1/config/running.json diff --git a/schemas/v1/config.json b/schemas/v1/config.json index f0f81d1c9..c35c54f76 100644 --- a/schemas/v1/config.json +++ b/schemas/v1/config.json @@ -7,363 +7,19 @@ "type": "string" }, "data": { - "type": "object", - "properties": { - "source": { - "type": "string" - }, - "checksum": { - "type": "string" - } - }, - "required": [ - "source" - ], - "additionalProperties": false + "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/data.json" }, "inputs": { - "type": "object", - "properties": { - "dataset": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "format": { - "const": "csv" - }, - "delimiter": { - "type": "string" - }, - "encoding": { - "type": "string" - }, - "columns": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "const": "integer" - }, - { - "const": "double" - } - ] - } - } - }, - "required": [ - "name", - "format", - "delimiter", - "encoding", - "columns" - ], - "additionalProperties": false - }, - "settings": { - "type": "object", - "properties": { - "country_code": { - "type": "string" - }, - "size_fraction": { - "type": "number" - }, - "age_range": { - "type": "array", - "items": { - "type": "integer" - } - } - }, - "required": [ - "country_code", - "size_fraction", - "age_range" - ], - "additionalProperties": false - } - }, - "required": [ - "dataset", - "settings" - ], - "additionalProperties": false + "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/inputs.json" }, "modelling": { - "type": "object", - "properties": { - "ses_model": { - "type": "object", - "properties": { - "function_name": { - "type": "string" - }, - "function_parameters": { - "type": "array", - "items": { - "type": "number" - } - } - }, - "required": [ - "function_name", - "function_parameters" - ], - "additionalProperties": false - }, - "risk_factors": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "level": { - "type": "integer" - }, - "range": { - "type": "array", - "items": { - "type": "number" - } - } - }, - "required": [ - "name", - "level", - "range" - ], - "additionalProperties": false - } - }, - "risk_factor_models": { - "type": "object", - "properties": { - "static": { - "type": "string" - }, - "dynamic": { - "type": "string" - } - }, - "required": [ - "static", - "dynamic" - ], - "additionalProperties": false - }, - "baseline_adjustments": { - "type": "object", - "properties": { - "format": { - "const": "csv" - }, - "delimiter": { - "type": "string" - }, - "encoding": { - "type": "string" - }, - "file_names": { - "type": "object", - "properties": { - "factorsmean_male": { - "type": "string" - }, - "factorsmean_female": { - "type": "string" - } - }, - "required": [ - "factorsmean_male", - "factorsmean_female" - ], - "additionalProperties": false - } - }, - "required": [ - "format", - "delimiter", - "encoding", - "file_names" - ], - "additionalProperties": false - } - }, - "required": [ - "ses_model", - "risk_factors", - "risk_factor_models", - "baseline_adjustments" - ], - "additionalProperties": false + "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/modelling.json" }, "running": { - "type": "object", - "properties": { - "seed": { - "type": "array", - "minItems": 1, - "maxItems": 1, - "items": { - "type": "integer" - } - }, - "start_time": { - "type": "integer" - }, - "stop_time": { - "type": "integer" - }, - "trial_runs": { - "type": "integer" - }, - "sync_timeout_ms": { - "type": "integer" - }, - "diseases": { - "type": "array", - "items": { - "type": "string" - } - }, - "interventions": { - "type": "object", - "properties": { - "active_type_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "types": { - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "active_period": { - "type": "object", - "properties": { - "start_time": { - "type": "integer" - }, - "finish_time": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "start_time", - "finish_time" - ], - "additionalProperties": false - }, - "impacts": { - "type": "array" - }, - "impact_type": { - "type": "string" - }, - "dynamics": { - "type": "array", - "items": { - "type": "number" - } - }, - "coefficients": { - "type": "array", - "items": { - "type": "number" - } - }, - "coverage_rates": { - "type": "array", - "items": { - "type": "number" - } - }, - "coverage_cutoff_time": { - "type": "integer", - "minValue": 0 - }, - "child_cutoff_age": { - "type": "integer", - "minValue": 0 - }, - "adjustments": { - "type": "array", - "items": { - "type": "object", - "properties": { - "risk_factor": { - "type": "string" - }, - "value": { - "type": "number" - } - } - } - } - }, - "required": [ - "active_period", - "impacts" - ], - "additionalProperties": false - } - } - }, - "required": [ - "active_type_id", - "types" - ], - "additionalProperties": false - } - }, - "required": [ - "seed", - "start_time", - "stop_time", - "trial_runs", - "sync_timeout_ms", - "diseases", - "interventions" - ], - "additionalProperties": false + "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/running.json" }, "output": { - "type": "object", - "properties": { - "comorbidities": { - "type": "integer" - }, - "folder": { - "type": "string" - }, - "file_name": { - "type": "string" - } - }, - "required": [ - "comorbidities", - "folder", - "file_name" - ], - "additionalProperties": false + "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/output.json" } }, "required": [ diff --git a/schemas/v1/config/data.json b/schemas/v1/config/data.json new file mode 100644 index 000000000..6c6cac332 --- /dev/null +++ b/schemas/v1/config/data.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/data.json", + "type": "object", + "properties": { + "source": { + "type": "string" + }, + "checksum": { + "type": "string" + } + }, + "required": [ + "source" + ], + "additionalProperties": false +} diff --git a/schemas/v1/config/inputs.json b/schemas/v1/config/inputs.json new file mode 100644 index 000000000..b20fb184f --- /dev/null +++ b/schemas/v1/config/inputs.json @@ -0,0 +1,73 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/inputs.json", + "type": "object", + "properties": { + "dataset": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "format": { + "const": "csv" + }, + "delimiter": { + "type": "string" + }, + "encoding": { + "type": "string" + }, + "columns": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "const": "integer" + }, + { + "const": "double" + } + ] + } + } + }, + "required": [ + "name", + "format", + "delimiter", + "encoding", + "columns" + ], + "additionalProperties": false + }, + "settings": { + "type": "object", + "properties": { + "country_code": { + "type": "string" + }, + "size_fraction": { + "type": "number" + }, + "age_range": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "required": [ + "country_code", + "size_fraction", + "age_range" + ], + "additionalProperties": false + } + }, + "required": [ + "dataset", + "settings" + ], + "additionalProperties": false +} diff --git a/schemas/v1/config/modelling.json b/schemas/v1/config/modelling.json new file mode 100644 index 000000000..039eccd32 --- /dev/null +++ b/schemas/v1/config/modelling.json @@ -0,0 +1,112 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/modelling.json", + "type": "object", + "properties": { + "ses_model": { + "type": "object", + "properties": { + "function_name": { + "type": "string" + }, + "function_parameters": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": [ + "function_name", + "function_parameters" + ], + "additionalProperties": false + }, + "risk_factors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "level": { + "type": "integer" + }, + "range": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": [ + "name", + "level", + "range" + ], + "additionalProperties": false + } + }, + "risk_factor_models": { + "type": "object", + "properties": { + "static": { + "type": "string" + }, + "dynamic": { + "type": "string" + } + }, + "required": [ + "static", + "dynamic" + ], + "additionalProperties": false + }, + "baseline_adjustments": { + "type": "object", + "properties": { + "format": { + "const": "csv" + }, + "delimiter": { + "type": "string" + }, + "encoding": { + "type": "string" + }, + "file_names": { + "type": "object", + "properties": { + "factorsmean_male": { + "type": "string" + }, + "factorsmean_female": { + "type": "string" + } + }, + "required": [ + "factorsmean_male", + "factorsmean_female" + ], + "additionalProperties": false + } + }, + "required": [ + "format", + "delimiter", + "encoding", + "file_names" + ], + "additionalProperties": false + } + }, + "required": [ + "ses_model", + "risk_factors", + "risk_factor_models", + "baseline_adjustments" + ], + "additionalProperties": false +} diff --git a/schemas/v1/config/output.json b/schemas/v1/config/output.json new file mode 100644 index 000000000..81a0ec4ad --- /dev/null +++ b/schemas/v1/config/output.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config.json", + "type": "object", + "properties": { + "comorbidities": { + "type": "integer" + }, + "folder": { + "type": "string" + }, + "file_name": { + "type": "string" + } + }, + "required": [ + "comorbidities", + "folder", + "file_name" + ], + "additionalProperties": false +} diff --git a/schemas/v1/config/running.json b/schemas/v1/config/running.json new file mode 100644 index 000000000..f102cb56e --- /dev/null +++ b/schemas/v1/config/running.json @@ -0,0 +1,145 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/running.json", + "type": "object", + "properties": { + "seed": { + "type": "array", + "minItems": 1, + "maxItems": 1, + "items": { + "type": "integer" + } + }, + "start_time": { + "type": "integer" + }, + "stop_time": { + "type": "integer" + }, + "trial_runs": { + "type": "integer" + }, + "sync_timeout_ms": { + "type": "integer" + }, + "diseases": { + "type": "array", + "items": { + "type": "string" + } + }, + "interventions": { + "type": "object", + "properties": { + "active_type_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "types": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "active_period": { + "type": "object", + "properties": { + "start_time": { + "type": "integer" + }, + "finish_time": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + } + }, + "required": [ + "start_time", + "finish_time" + ], + "additionalProperties": false + }, + "impacts": { + "type": "array" + }, + "impact_type": { + "type": "string" + }, + "dynamics": { + "type": "array", + "items": { + "type": "number" + } + }, + "coefficients": { + "type": "array", + "items": { + "type": "number" + } + }, + "coverage_rates": { + "type": "array", + "items": { + "type": "number" + } + }, + "coverage_cutoff_time": { + "type": "integer", + "minValue": 0 + }, + "child_cutoff_age": { + "type": "integer", + "minValue": 0 + }, + "adjustments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "risk_factor": { + "type": "string" + }, + "value": { + "type": "number" + } + } + } + } + }, + "required": [ + "active_period", + "impacts" + ], + "additionalProperties": false + } + } + }, + "required": [ + "active_type_id", + "types" + ], + "additionalProperties": false + } + }, + "required": [ + "seed", + "start_time", + "stop_time", + "trial_runs", + "sync_timeout_ms", + "diseases", + "interventions" + ], + "additionalProperties": false +} From 7b8fc9ff974a886478b7ad35839a6bede8c991c6 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 13 Aug 2024 12:10:02 +0100 Subject: [PATCH 03/16] We don't need to include the base URL in $refs --- schemas/v1/config.json | 10 +++++----- schemas/v1/data_index.json | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/schemas/v1/config.json b/schemas/v1/config.json index c35c54f76..84751d01f 100644 --- a/schemas/v1/config.json +++ b/schemas/v1/config.json @@ -7,19 +7,19 @@ "type": "string" }, "data": { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/data.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/data.json" }, "inputs": { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/inputs.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/inputs.json" }, "modelling": { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/modelling.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/modelling.json" }, "running": { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/running.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/running.json" }, "output": { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/output.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/output.json" } }, "required": [ diff --git a/schemas/v1/data_index.json b/schemas/v1/data_index.json index c6917bb80..6efc8fe18 100644 --- a/schemas/v1/data_index.json +++ b/schemas/v1/data_index.json @@ -8,7 +8,7 @@ "country": { "allOf": [ { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" }, { "type": "object", @@ -26,7 +26,7 @@ "demographic": { "allOf": [ { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" }, { "type": "object", @@ -116,7 +116,7 @@ "diseases": { "allOf": [ { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" }, { "type": "object", @@ -273,7 +273,7 @@ "analysis": { "allOf": [ { - "$ref": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" }, { "type": "object", From c09ccee52a6f5e5aae937e2b9e44fb5d87297f4a Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 13 Aug 2024 12:17:11 +0100 Subject: [PATCH 04/16] Add a subschema for age ranges and use in several places --- schemas/v1/age_range.json | 17 ++++++++++++++ schemas/v1/config/inputs.json | 5 +---- schemas/v1/data_index.json | 42 +++-------------------------------- 3 files changed, 21 insertions(+), 43 deletions(-) create mode 100644 schemas/v1/age_range.json diff --git a/schemas/v1/age_range.json b/schemas/v1/age_range.json new file mode 100644 index 000000000..b9fe9ae2a --- /dev/null +++ b/schemas/v1/age_range.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/age_range.json", + "type": "array", + "prefixItems": [ + { + "type": "integer", + "minimum": 0 + }, + { + "type": "integer", + "minimum": 0 + } + ], + "minItems": 2, + "items": false +} diff --git a/schemas/v1/config/inputs.json b/schemas/v1/config/inputs.json index b20fb184f..ec277ff90 100644 --- a/schemas/v1/config/inputs.json +++ b/schemas/v1/config/inputs.json @@ -51,10 +51,7 @@ "type": "number" }, "age_range": { - "type": "array", - "items": { - "type": "integer" - } + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" } }, "required": [ diff --git a/schemas/v1/data_index.json b/schemas/v1/data_index.json index 6efc8fe18..63d81878a 100644 --- a/schemas/v1/data_index.json +++ b/schemas/v1/data_index.json @@ -32,19 +32,7 @@ "type": "object", "properties": { "age_limits": { - "type": "array", - "prefixItems": [ - { - "type": "integer", - "minimum": 0 - }, - { - "type": "integer", - "minimum": 0 - } - ], - "minItems": 2, - "items": false + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" }, "time_limits": { "type": "array", @@ -122,19 +110,7 @@ "type": "object", "properties": { "age_limits": { - "type": "array", - "prefixItems": [ - { - "type": "integer", - "minimum": 0 - }, - { - "type": "integer", - "minimum": 0 - } - ], - "minItems": 2, - "items": false + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" }, "time_year": { "type": "integer", @@ -279,19 +255,7 @@ "type": "object", "properties": { "age_limits": { - "type": "array", - "prefixItems": [ - { - "type": "integer", - "minimum": 0 - }, - { - "type": "integer", - "minimum": 0 - } - ], - "minItems": 2, - "items": false + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" }, "time_year": { "type": "integer", From dde3892b798f48fdfb59e949bf67d5ac8eb042a5 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 13 Aug 2024 12:20:58 +0100 Subject: [PATCH 05/16] Split interventions into own subschema --- schemas/v1/config/interventions.json | 104 +++++++++++++++++++++++++++ schemas/v1/config/running.json | 101 +------------------------- 2 files changed, 105 insertions(+), 100 deletions(-) create mode 100644 schemas/v1/config/interventions.json diff --git a/schemas/v1/config/interventions.json b/schemas/v1/config/interventions.json new file mode 100644 index 000000000..fc6af6016 --- /dev/null +++ b/schemas/v1/config/interventions.json @@ -0,0 +1,104 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/config/interventions.json", + "type": "object", + "properties": { + "active_type_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "types": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "active_period": { + "type": "object", + "properties": { + "start_time": { + "type": "integer" + }, + "finish_time": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + } + }, + "required": [ + "start_time", + "finish_time" + ], + "additionalProperties": false + }, + "impacts": { + "type": "array" + }, + "impact_type": { + "type": "string" + }, + "dynamics": { + "type": "array", + "items": { + "type": "number" + } + }, + "coefficients": { + "type": "array", + "items": { + "type": "number" + } + }, + "coverage_rates": { + "type": "array", + "items": { + "type": "number" + } + }, + "coverage_cutoff_time": { + "type": "integer", + "minValue": 0 + }, + "child_cutoff_age": { + "type": "integer", + "minValue": 0 + }, + "adjustments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "risk_factor": { + "type": "string" + }, + "value": { + "type": "number" + } + } + } + } + }, + "required": [ + "active_period", + "impacts" + ], + "additionalProperties": false + } + } + }, + "required": [ + "active_type_id", + "types" + ], + "additionalProperties": false +} diff --git a/schemas/v1/config/running.json b/schemas/v1/config/running.json index f102cb56e..5e8f9fa68 100644 --- a/schemas/v1/config/running.json +++ b/schemas/v1/config/running.json @@ -30,106 +30,7 @@ } }, "interventions": { - "type": "object", - "properties": { - "active_type_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ] - }, - "types": { - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "active_period": { - "type": "object", - "properties": { - "start_time": { - "type": "integer" - }, - "finish_time": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - } - }, - "required": [ - "start_time", - "finish_time" - ], - "additionalProperties": false - }, - "impacts": { - "type": "array" - }, - "impact_type": { - "type": "string" - }, - "dynamics": { - "type": "array", - "items": { - "type": "number" - } - }, - "coefficients": { - "type": "array", - "items": { - "type": "number" - } - }, - "coverage_rates": { - "type": "array", - "items": { - "type": "number" - } - }, - "coverage_cutoff_time": { - "type": "integer", - "minValue": 0 - }, - "child_cutoff_age": { - "type": "integer", - "minValue": 0 - }, - "adjustments": { - "type": "array", - "items": { - "type": "object", - "properties": { - "risk_factor": { - "type": "string" - }, - "value": { - "type": "number" - } - } - } - } - }, - "required": [ - "active_period", - "impacts" - ], - "additionalProperties": false - } - } - }, - "required": [ - "active_type_id", - "types" - ], - "additionalProperties": false + "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/interventions.json" } }, "required": [ From 89b883365dd2281d0bd1a9e237f8cb28babc88db Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Mon, 12 Aug 2024 17:13:36 +0100 Subject: [PATCH 06/16] Make JSON validation code generic --- src/HealthGPS.Input/datamanager.cpp | 36 ++++---------- src/HealthGPS.Input/schema.cpp | 76 +++++++++++++++++++++++------ src/HealthGPS.Input/schema.h | 23 +++------ 3 files changed, 79 insertions(+), 56 deletions(-) diff --git a/src/HealthGPS.Input/datamanager.cpp b/src/HealthGPS.Input/datamanager.cpp index 97a8e0182..8b31ab319 100644 --- a/src/HealthGPS.Input/datamanager.cpp +++ b/src/HealthGPS.Input/datamanager.cpp @@ -5,7 +5,6 @@ #include "HealthGPS.Core/math_util.h" #include "HealthGPS.Core/string_util.h" -#include "HealthGPS/program_dirs.h" #include #include @@ -14,35 +13,18 @@ #include namespace { -nlohmann::json read_input_files_from_directory(const std::filesystem::path &root_directory) { - auto full_filename = root_directory / "index.json"; - auto ifs = std::ifstream{full_filename}; - if (!ifs) { - throw std::runtime_error( - fmt::format("File-based store, index file: '{}' not found.", full_filename.string())); - } - - // Read in JSON file - auto index = nlohmann::json::parse(ifs); +//! The name of the index file +static constexpr const char *IndexFileName = "index.json"; - // Check that the file has a $schema property and that it matches the URL of the - // schema version we support - if (!index.contains("$schema")) { - throw std::runtime_error(fmt::format("Index file missing required $schema property: {}", - full_filename.string())); - } - const auto schema_url = index.at("$schema").get(); - if (schema_url != HGPS_DATA_INDEX_SCHEMA_URL) { - throw std::runtime_error(fmt::format("Invalid schema URL provided: {} (expected: {})", - schema_url, HGPS_DATA_INDEX_SCHEMA_URL)); - } +//! The name of the index.json schema file +static constexpr const char *DataIndexSchemaFileName = "data_index.json"; - // Validate against schema - ifs.seekg(0); - const auto schema_directory = hgps::get_program_directory() / "schemas" / "v1"; - hgps::input::validate_index(schema_directory, ifs); +//! The version of the index.json schema file +static constexpr int DataIndexSchemaVersion = 1; - return index; +nlohmann::json read_input_files_from_directory(const std::filesystem::path &data_path) { + return hgps::input::load_and_validate_json(data_path / IndexFileName, DataIndexSchemaFileName, + DataIndexSchemaVersion); } } // anonymous namespace diff --git a/src/HealthGPS.Input/schema.cpp b/src/HealthGPS.Input/schema.cpp index b19f3be5b..5d6b3edde 100644 --- a/src/HealthGPS.Input/schema.cpp +++ b/src/HealthGPS.Input/schema.cpp @@ -1,23 +1,31 @@ #include "schema.h" +#include "HealthGPS/program_dirs.h" + #include #include #include #include -namespace hgps::input { +namespace { using namespace jsoncons; -json resolve_uri(const jsoncons::uri &uri, const std::filesystem::path &schema_directory) { +//! The prefix for Health-GPS schema URLs +static constexpr const char *SchemaURLPrefix = + "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/"; + +json resolve_uri(const uri &uri, const std::filesystem::path &program_directory) { const auto &uri_str = uri.string(); - if (!uri_str.starts_with(HGPS_SCHEMA_URL_PREFIX)) { + if (!uri_str.starts_with(SchemaURLPrefix)) { throw std::runtime_error(fmt::format("Unable to load URL: {}", uri_str)); } // Strip URL prefix and load file from local filesystem - const auto filename = std::filesystem::path{uri.path()}.filename(); - const auto schema_path = schema_directory / filename; + const auto uri_path = std::filesystem::path{uri.path()}; + const auto version_string = uri_path.parent_path().filename(); + const auto schema_file_name = uri_path.filename(); + const auto schema_path = program_directory / "schemas" / version_string / schema_file_name; auto ifs = std::ifstream{schema_path}; if (!ifs) { throw std::runtime_error("Failed to read schema file"); @@ -26,23 +34,63 @@ json resolve_uri(const jsoncons::uri &uri, const std::filesystem::path &schema_d return json::parse(ifs); } -void validate_index(const std::filesystem::path &schema_directory, std::istream &index_stream) { - // **YUCK**: We have to read in the data with jsoncons here rather than reusing - // the nlohmann-json representation :-( - const auto index = json::parse(index_stream); +/// @brief Validate a JSON file against the specified schema +/// @param json_stream The input stream for the JSON file +/// @param schema_file_name The name of the JSON schema file +/// @param schema_version The version of the schema file +void validate_json(std::istream &json_stream, const char *schema_file_name, int schema_version) { + // **YUCK**: We have to read in the data with jsoncons here rather than reusing the + // nlohmann-json representation :-( + const auto data = json::parse(json_stream); // Load schema - auto ifs_schema = std::ifstream{schema_directory / "data_index.json"}; + const auto program_dir = hgps::get_program_directory(); + const auto schema_relative_path = + std::filesystem::path{"schemas"} / fmt::format("v{}", schema_version) / schema_file_name; + auto ifs_schema = std::ifstream{program_dir / schema_relative_path}; if (!ifs_schema) { throw std::runtime_error("Failed to load schema"); } - const auto resolver = [&schema_directory](const auto &uri) { - return resolve_uri(uri, schema_directory); - }; + const auto resolver = [&program_dir](const auto &uri) { return resolve_uri(uri, program_dir); }; const auto schema = jsonschema::make_json_schema(json::parse(ifs_schema), resolver); // Perform validation - schema.validate(index); + schema.validate(data); +} +} // anonymous namespace + +namespace hgps::input { +nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, + const char *schema_file_name, int schema_version) { + auto ifs = std::ifstream{file_path}; + if (!ifs) { + throw std::runtime_error(fmt::format("File not found: {}", file_path.string())); + } + + // Read in JSON file + auto json = nlohmann::json::parse(ifs); + + // Check that the file has a $schema property and that it matches the URL of the + // schema version we support + if (!json.contains("$schema")) { + throw std::runtime_error( + fmt::format("File missing required $schema property: {}", file_path.string())); + } + + // Check $schema attribute is present and valid + const auto actual_schema_url = json.at("$schema").get(); + const auto expected_schema_url = + fmt::format("{}v{}/{}", SchemaURLPrefix, schema_version, schema_file_name); + if (actual_schema_url != expected_schema_url) { + throw std::runtime_error(fmt::format("Invalid schema URL provided: {} (expected: {})", + actual_schema_url, expected_schema_url)); + } + + // Perform validation + ifs.seekg(0); // Seek to start of file so we can reload + validate_json(ifs, schema_file_name, schema_version); + + return json; } } // namespace hgps::input diff --git a/src/HealthGPS.Input/schema.h b/src/HealthGPS.Input/schema.h index 22d560153..987dcc284 100644 --- a/src/HealthGPS.Input/schema.h +++ b/src/HealthGPS.Input/schema.h @@ -1,21 +1,14 @@ #pragma once -#include -#include - -//! The prefix for Health-GPS schema URLs -#define HGPS_SCHEMA_URL_PREFIX \ - "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/v1/" +#include -//! The name of the index.json schema file -#define HGPS_DATA_INDEX_SCHEMA_FILENAME "data_index.json" - -//! The schema URL for the data index file -#define HGPS_DATA_INDEX_SCHEMA_URL (HGPS_SCHEMA_URL_PREFIX HGPS_DATA_INDEX_SCHEMA_FILENAME) +#include namespace hgps::input { -/// @brief Validate the index.json file -/// @param schema_directory The root folder for JSON schemas -/// @param index_stream The input stream for the index.json file -void validate_index(const std::filesystem::path &schema_directory, std::istream &index_stream); +/// @brief Load a JSON file and validate against the specified schema +/// @param file_path The path to the JSON file +/// @param schema_file_name The name of the JSON schema file +/// @param schema_version The version of the schema file +nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, + const char *schema_file_name, int schema_version); } // namespace hgps::input From 714f6e133d9f067119f0b5d9ebdbf28be6aa2149 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Mon, 12 Aug 2024 18:18:29 +0100 Subject: [PATCH 07/16] Use JSON schema to validate top-level config files Closes #449. --- src/HealthGPS.Input/configuration.cpp | 24 +++++------------------- src/HealthGPS.Input/schema.cpp | 6 ++---- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/src/HealthGPS.Input/configuration.cpp b/src/HealthGPS.Input/configuration.cpp index 9a9790abe..e21ab026c 100644 --- a/src/HealthGPS.Input/configuration.cpp +++ b/src/HealthGPS.Input/configuration.cpp @@ -1,6 +1,7 @@ #include "configuration.h" #include "configuration_parsing.h" #include "jsonparser.h" +#include "schema.h" #include "version.h" #include "HealthGPS/baseline_scenario.h" @@ -35,6 +36,9 @@ namespace { using namespace hgps::input; +static constexpr const char *ConfigSchemaFileName = "config.json"; +static constexpr int ConfigSchemaVersion = 1; + DataSource get_data_source_from_json(const nlohmann::json &opt, const std::filesystem::path &root_path) { auto source = opt["source"].get(); @@ -69,25 +73,7 @@ Configuration get_configuration(const std::filesystem::path &config_file, int jo config.verbosity = core::VerboseMode::verbose; } - std::ifstream ifs(config_file, std::ifstream::in); - if (!ifs) { - throw ConfigurationError(fmt::format("File {} doesn't exist.", config_file.string())); - } - - const auto opt = [&ifs]() { - try { - return json::parse(ifs); - } catch (const std::exception &e) { - throw ConfigurationError(fmt::format("Could not parse JSON: {}", e.what())); - } - }(); - - // Check the file format version - try { - check_version(opt); - } catch (const ConfigurationError &) { - success = false; - } + const auto opt = load_and_validate_json(config_file, ConfigSchemaFileName, ConfigSchemaVersion); // Base dir for relative paths config.root_path = config_file.parent_path(); diff --git a/src/HealthGPS.Input/schema.cpp b/src/HealthGPS.Input/schema.cpp index 5d6b3edde..1623d35e7 100644 --- a/src/HealthGPS.Input/schema.cpp +++ b/src/HealthGPS.Input/schema.cpp @@ -22,10 +22,8 @@ json resolve_uri(const uri &uri, const std::filesystem::path &program_directory) } // Strip URL prefix and load file from local filesystem - const auto uri_path = std::filesystem::path{uri.path()}; - const auto version_string = uri_path.parent_path().filename(); - const auto schema_file_name = uri_path.filename(); - const auto schema_path = program_directory / "schemas" / version_string / schema_file_name; + const auto uri_path = std::filesystem::path{uri_str.substr(strlen(SchemaURLPrefix))}; + const auto schema_path = program_directory / "schemas" / uri_path; auto ifs = std::ifstream{schema_path}; if (!ifs) { throw std::runtime_error("Failed to read schema file"); From 64f5249e24343ad97294ba0175f0346f5ae69f23 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 13 Aug 2024 15:01:25 +0100 Subject: [PATCH 08/16] Remove unnecessary `static`s Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/HealthGPS.Input/configuration.cpp | 2 +- src/HealthGPS.Input/datamanager.cpp | 6 +++--- src/HealthGPS.Input/schema.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/HealthGPS.Input/configuration.cpp b/src/HealthGPS.Input/configuration.cpp index e21ab026c..871908085 100644 --- a/src/HealthGPS.Input/configuration.cpp +++ b/src/HealthGPS.Input/configuration.cpp @@ -36,7 +36,7 @@ namespace { using namespace hgps::input; -static constexpr const char *ConfigSchemaFileName = "config.json"; +constexpr const char *ConfigSchemaFileName = "config.json"; static constexpr int ConfigSchemaVersion = 1; DataSource get_data_source_from_json(const nlohmann::json &opt, diff --git a/src/HealthGPS.Input/datamanager.cpp b/src/HealthGPS.Input/datamanager.cpp index 8b31ab319..74f8a77bb 100644 --- a/src/HealthGPS.Input/datamanager.cpp +++ b/src/HealthGPS.Input/datamanager.cpp @@ -14,13 +14,13 @@ namespace { //! The name of the index file -static constexpr const char *IndexFileName = "index.json"; +constexpr const char *IndexFileName = "index.json"; //! The name of the index.json schema file -static constexpr const char *DataIndexSchemaFileName = "data_index.json"; +constexpr const char *DataIndexSchemaFileName = "data_index.json"; //! The version of the index.json schema file -static constexpr int DataIndexSchemaVersion = 1; +constexpr int DataIndexSchemaVersion = 1; nlohmann::json read_input_files_from_directory(const std::filesystem::path &data_path) { return hgps::input::load_and_validate_json(data_path / IndexFileName, DataIndexSchemaFileName, diff --git a/src/HealthGPS.Input/schema.cpp b/src/HealthGPS.Input/schema.cpp index 1623d35e7..a29c54a04 100644 --- a/src/HealthGPS.Input/schema.cpp +++ b/src/HealthGPS.Input/schema.cpp @@ -12,7 +12,7 @@ namespace { using namespace jsoncons; //! The prefix for Health-GPS schema URLs -static constexpr const char *SchemaURLPrefix = +constexpr const char *SchemaURLPrefix = "https://raw.githubusercontent.com/imperialCHEPI/healthgps/main/schemas/"; json resolve_uri(const uri &uri, const std::filesystem::path &program_directory) { From f2e849ad54538c05ce6db9e5190bd35b2e3a5b15 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 13 Aug 2024 15:08:03 +0100 Subject: [PATCH 09/16] Remove another `static` Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/HealthGPS.Input/configuration.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HealthGPS.Input/configuration.cpp b/src/HealthGPS.Input/configuration.cpp index 871908085..04b630c26 100644 --- a/src/HealthGPS.Input/configuration.cpp +++ b/src/HealthGPS.Input/configuration.cpp @@ -37,7 +37,7 @@ namespace { using namespace hgps::input; constexpr const char *ConfigSchemaFileName = "config.json"; -static constexpr int ConfigSchemaVersion = 1; +constexpr int ConfigSchemaVersion = 1; DataSource get_data_source_from_json(const nlohmann::json &opt, const std::filesystem::path &root_path) { From 4fb3a96ebb73f50abb9fe3fe3ac6048c5e73c609 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 15 Aug 2024 16:59:39 +0100 Subject: [PATCH 10/16] Fix: `minValue` should be `minimum` --- schemas/v1/config/interventions.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schemas/v1/config/interventions.json b/schemas/v1/config/interventions.json index fc6af6016..4deb5b4a8 100644 --- a/schemas/v1/config/interventions.json +++ b/schemas/v1/config/interventions.json @@ -67,11 +67,11 @@ }, "coverage_cutoff_time": { "type": "integer", - "minValue": 0 + "minimum": 0 }, "child_cutoff_age": { "type": "integer", - "minValue": 0 + "minimum": 0 }, "adjustments": { "type": "array", From ca5d13d31422761816a8dd57183ffdf3ccc20db0 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Fri, 16 Aug 2024 11:58:53 +0100 Subject: [PATCH 11/16] Use relative paths in schemas --- schemas/v1/config.json | 10 +++++----- schemas/v1/config/inputs.json | 2 +- schemas/v1/config/running.json | 2 +- schemas/v1/data_index.json | 14 +++++++------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/schemas/v1/config.json b/schemas/v1/config.json index 84751d01f..e358c6633 100644 --- a/schemas/v1/config.json +++ b/schemas/v1/config.json @@ -7,19 +7,19 @@ "type": "string" }, "data": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/data.json" + "$ref": "config/data.json" }, "inputs": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/inputs.json" + "$ref": "config/inputs.json" }, "modelling": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/modelling.json" + "$ref": "config/modelling.json" }, "running": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/running.json" + "$ref": "config/running.json" }, "output": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/output.json" + "$ref": "config/output.json" } }, "required": [ diff --git a/schemas/v1/config/inputs.json b/schemas/v1/config/inputs.json index ec277ff90..4970c4447 100644 --- a/schemas/v1/config/inputs.json +++ b/schemas/v1/config/inputs.json @@ -51,7 +51,7 @@ "type": "number" }, "age_range": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" + "$ref": "../age_range.json" } }, "required": [ diff --git a/schemas/v1/config/running.json b/schemas/v1/config/running.json index 5e8f9fa68..bd16d88b6 100644 --- a/schemas/v1/config/running.json +++ b/schemas/v1/config/running.json @@ -30,7 +30,7 @@ } }, "interventions": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/config/interventions.json" + "$ref": "interventions.json" } }, "required": [ diff --git a/schemas/v1/data_index.json b/schemas/v1/data_index.json index 63d81878a..32da6efb1 100644 --- a/schemas/v1/data_index.json +++ b/schemas/v1/data_index.json @@ -8,7 +8,7 @@ "country": { "allOf": [ { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "file_info.json" }, { "type": "object", @@ -26,13 +26,13 @@ "demographic": { "allOf": [ { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "file_info.json" }, { "type": "object", "properties": { "age_limits": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" + "$ref": "age_range.json" }, "time_limits": { "type": "array", @@ -104,13 +104,13 @@ "diseases": { "allOf": [ { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "file_info.json" }, { "type": "object", "properties": { "age_limits": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" + "$ref": "age_range.json" }, "time_year": { "type": "integer", @@ -249,13 +249,13 @@ "analysis": { "allOf": [ { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/file_info.json" + "$ref": "file_info.json" }, { "type": "object", "properties": { "age_limits": { - "$ref": "/imperialCHEPI/healthgps/main/schemas/v1/age_range.json" + "$ref": "age_range.json" }, "time_year": { "type": "integer", From 2f23fa7f09d0043012f56816fa1fdbba8126c26e Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 27 Aug 2024 16:37:22 +0100 Subject: [PATCH 12/16] load_and_validate_json: Allow for parsing files without $schema property We still warn if it is missing. --- src/HealthGPS.Input/schema.cpp | 30 ++++++++++++++++++------------ src/HealthGPS.Input/schema.h | 5 ++++- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/HealthGPS.Input/schema.cpp b/src/HealthGPS.Input/schema.cpp index a29c54a04..4ea0bfa52 100644 --- a/src/HealthGPS.Input/schema.cpp +++ b/src/HealthGPS.Input/schema.cpp @@ -2,6 +2,7 @@ #include "HealthGPS/program_dirs.h" +#include #include #include #include @@ -60,7 +61,8 @@ void validate_json(std::istream &json_stream, const char *schema_file_name, int namespace hgps::input { nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, - const char *schema_file_name, int schema_version) { + const char *schema_file_name, int schema_version, + bool require_schema_property) { auto ifs = std::ifstream{file_path}; if (!ifs) { throw std::runtime_error(fmt::format("File not found: {}", file_path.string())); @@ -72,17 +74,21 @@ nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, // Check that the file has a $schema property and that it matches the URL of the // schema version we support if (!json.contains("$schema")) { - throw std::runtime_error( - fmt::format("File missing required $schema property: {}", file_path.string())); - } - - // Check $schema attribute is present and valid - const auto actual_schema_url = json.at("$schema").get(); - const auto expected_schema_url = - fmt::format("{}v{}/{}", SchemaURLPrefix, schema_version, schema_file_name); - if (actual_schema_url != expected_schema_url) { - throw std::runtime_error(fmt::format("Invalid schema URL provided: {} (expected: {})", - actual_schema_url, expected_schema_url)); + const auto message = fmt::format("File missing $schema property: {}", file_path.string()); + if (require_schema_property) { + throw std::runtime_error(message); + } else { + fmt::print(fmt::fg(fmt::color::dark_salmon), "{}\n", message); + } + } else { + // Check $schema attribute is valid + const auto actual_schema_url = json.at("$schema").get(); + const auto expected_schema_url = + fmt::format("{}v{}/{}", SchemaURLPrefix, schema_version, schema_file_name); + if (actual_schema_url != expected_schema_url) { + throw std::runtime_error(fmt::format("Invalid schema URL provided: {} (expected: {})", + actual_schema_url, expected_schema_url)); + } } // Perform validation diff --git a/src/HealthGPS.Input/schema.h b/src/HealthGPS.Input/schema.h index 987dcc284..b84526a8b 100644 --- a/src/HealthGPS.Input/schema.h +++ b/src/HealthGPS.Input/schema.h @@ -9,6 +9,9 @@ namespace hgps::input { /// @param file_path The path to the JSON file /// @param schema_file_name The name of the JSON schema file /// @param schema_version The version of the schema file +/// @param require_schema_property Whether to raise an exception if the $schema property +/// is missing nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, - const char *schema_file_name, int schema_version); + const char *schema_file_name, int schema_version, + bool require_schema_property = true); } // namespace hgps::input From 2c6e30ad9e16a084ab06a1b997636fb9ad9e821f Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 27 Aug 2024 16:18:54 +0100 Subject: [PATCH 13/16] Make $schema property optional for now --- schemas/v1/config.json | 1 - src/HealthGPS.Input/configuration.cpp | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schemas/v1/config.json b/schemas/v1/config.json index e358c6633..7c84867c4 100644 --- a/schemas/v1/config.json +++ b/schemas/v1/config.json @@ -23,7 +23,6 @@ } }, "required": [ - "$schema", "inputs", "modelling", "running", diff --git a/src/HealthGPS.Input/configuration.cpp b/src/HealthGPS.Input/configuration.cpp index 04b630c26..cd84ef206 100644 --- a/src/HealthGPS.Input/configuration.cpp +++ b/src/HealthGPS.Input/configuration.cpp @@ -73,7 +73,8 @@ Configuration get_configuration(const std::filesystem::path &config_file, int jo config.verbosity = core::VerboseMode::verbose; } - const auto opt = load_and_validate_json(config_file, ConfigSchemaFileName, ConfigSchemaVersion); + const auto opt = load_and_validate_json(config_file, ConfigSchemaFileName, ConfigSchemaVersion, + /*require_schema_property=*/false); // Base dir for relative paths config.root_path = config_file.parent_path(); From c1dcf3314afe392a7daaf00d727afc3c7125ae2b Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 27 Aug 2024 16:37:59 +0100 Subject: [PATCH 14/16] Make version property optional --- schemas/v1/config.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/schemas/v1/config.json b/schemas/v1/config.json index 7c84867c4..e7518f266 100644 --- a/schemas/v1/config.json +++ b/schemas/v1/config.json @@ -6,6 +6,9 @@ "$schema": { "type": "string" }, + "version": { + "const": 2 + }, "data": { "$ref": "config/data.json" }, From ee63655fbbd28ad0cc0a5cd27ceea603debca05a Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Tue, 27 Aug 2024 16:56:37 +0100 Subject: [PATCH 15/16] Don't use else after throw Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/HealthGPS.Input/schema.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/HealthGPS.Input/schema.cpp b/src/HealthGPS.Input/schema.cpp index 4ea0bfa52..d8b68cffc 100644 --- a/src/HealthGPS.Input/schema.cpp +++ b/src/HealthGPS.Input/schema.cpp @@ -79,7 +79,8 @@ nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, throw std::runtime_error(message); } else { fmt::print(fmt::fg(fmt::color::dark_salmon), "{}\n", message); - } + } fmt::print(fmt::fg(fmt::color::dark_salmon), "{}\n", message); + } else { // Check $schema attribute is valid const auto actual_schema_url = json.at("$schema").get(); From 434b1709ca261e86420919846e0d77d355d55859 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:56:48 +0000 Subject: [PATCH 16/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/HealthGPS.Input/schema.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/HealthGPS.Input/schema.cpp b/src/HealthGPS.Input/schema.cpp index d8b68cffc..014d34b80 100644 --- a/src/HealthGPS.Input/schema.cpp +++ b/src/HealthGPS.Input/schema.cpp @@ -79,8 +79,9 @@ nlohmann::json load_and_validate_json(const std::filesystem::path &file_path, throw std::runtime_error(message); } else { fmt::print(fmt::fg(fmt::color::dark_salmon), "{}\n", message); - } fmt::print(fmt::fg(fmt::color::dark_salmon), "{}\n", message); - + } + fmt::print(fmt::fg(fmt::color::dark_salmon), "{}\n", message); + } else { // Check $schema attribute is valid const auto actual_schema_url = json.at("$schema").get();