Recreate SRO key measure with Streamlit

iaindillingham · alarthast · iaindillingham · commit aceb48661781 · 2024-12-31T16:24:02.000Z
Closes #2 Co-authored-by: Alice Wong <alice.wong@thedatalab.org>
diff --git a/app/measures.py b/app/measures.py
@@ -0,0 +1,149 @@
+import dataclasses
+import pathlib
+
+import altair
+import pandas
+import yaml
+
+
+PERCENTILE = "Percentile"
+DECILE = "Decile"
+MEDIAN = "Median"
+
+
+@dataclasses.dataclass
+class Measure:
+    name: str
+    explanation: str
+    caveats: str
+    classification: str
+    codelist_url: str
+    unique_patients: int
+    total_events: int
+    top_5_codes_table: pandas.DataFrame
+    deciles_table: pandas.DataFrame
+
+    def __repr__(self):
+        return f"Measure(name='{self.name}')"
+
+    def change_in_median(self, from_year, to_year, month):
+        # Pandas wants these to be strings
+        from_year = str(from_year)
+        to_year = str(to_year)
+
+        dt = self.deciles_table  # convenient alias
+        is_month = dt["date"].dt.month == month
+        is_median = dt["label"] == MEDIAN
+        # set index to date to allow convenient selection by year
+        value = dt.loc[is_month & is_median].set_index("date").loc[:, "value"]
+
+        # .values is a numpy array
+        from_val = value[from_year].values[0]
+        to_val = value[to_year].values[0]
+        pct_change = (to_val - from_val) / from_val
+
+        return from_val, to_val, pct_change
+
+    @property
+    def deciles_chart(self):
+        # selections
+        legend_selection = altair.selection_point(bind="legend", fields=["label"])
+
+        # encodings
+        stroke_dash = altair.StrokeDash(
+            "label",
+            title=None,
+            scale=altair.Scale(
+                domain=[PERCENTILE, DECILE, MEDIAN],
+                range=[[1, 1], [5, 5], [0, 0]],
+            ),
+            legend=altair.Legend(orient="bottom"),
+        )
+        stroke_width = (
+            altair.when(altair.datum.type == MEDIAN)
+            .then(altair.value(1))
+            .otherwise(altair.value(0.5))
+        )
+        opacity = (
+            altair.when(legend_selection)
+            .then(altair.value(1))
+            .otherwise(altair.value(0.2))
+        )
+
+        # chart
+        chart = (
+            altair.Chart(self.deciles_table, title="Rate per 1,000 registered patients")
+            .mark_line()
+            .encode(
+                altair.X("date", title=None),
+                altair.Y("value", title=None),
+                detail="percentile",
+                strokeDash=stroke_dash,
+                strokeWidth=stroke_width,
+                opacity=opacity,
+            )
+            .add_params(legend_selection)
+        )
+        return chart
+
+
+class OSJobsRepository:
+    def __init__(self):
+        path = pathlib.Path(__file__).parent.joinpath("measures.yaml")
+        self._records = {r["name"]: r for r in yaml.load(path.read_text(), yaml.Loader)}
+        self._measures = {}  # the repository
+
+    def get(self, name):
+        """Get the measure with the given name from the repository."""
+        if name not in self._measures:
+            self._measures[name] = self._construct(name)
+        return self._measures[name]
+
+    def _construct(self, name):
+        """Construct the measure with the given name from information stored on the
+        local file system and on OS Jobs."""
+        record = self._records[name]
+
+        # The following helpers don't need access to instance attributes, so we define
+        # them as functions rather than as methods. Doing so makes them easier to mock.
+        counts = _get_counts(record["counts_table_url"])
+        top_5_codes_table = _get_top_5_codes_table(record["top_5_codes_table_url"])
+        deciles_table = _get_deciles_table(record["deciles_table_url"])
+
+        return Measure(
+            name,
+            record["explanation"],
+            record["caveats"],
+            record["classification"],
+            record["codelist_url"],
+            counts["unique_patients"],
+            counts["total_events"],
+            top_5_codes_table,
+            deciles_table,
+        )
+
+    def list(self):
+        """List the names of all the measures in the repository."""
+        return sorted(self._records.keys())
+
+
+def _get_counts(counts_table_url):
+    return pandas.read_csv(counts_table_url, index_col=0).to_dict().get("count")
+
+
+def _get_top_5_codes_table(top_5_codes_table_url):
+    top_5_codes_table = pandas.read_csv(
+        top_5_codes_table_url, index_col=0, dtype={"Code": str}
+    )
+    top_5_codes_table.index = pandas.RangeIndex(
+        1, len(top_5_codes_table) + 1, name="Rank"
+    )
+    return top_5_codes_table
+
+
+def _get_deciles_table(deciles_table_url):
+    deciles_table = pandas.read_csv(deciles_table_url, parse_dates=["date"])
+    deciles_table.loc[:, "label"] = PERCENTILE
+    deciles_table.loc[deciles_table["percentile"] % 10 == 0, "label"] = DECILE
+    deciles_table.loc[deciles_table["percentile"] == 50, "label"] = MEDIAN
+    return deciles_table
diff --git a/app/measures.yaml b/app/measures.yaml
@@ -0,0 +1,30 @@
+- name: Liver Function Testing - Alanine Transferaminase (ALT)
+  explanation: >
+    An ALT blood test is one of a group of liver function tests (LFTs) which are used to detect problems with the function of the liver.
+    It is often used to monitor patients on medications which may affect the liver or which rely on the liver to break them down within the body.
+    They are also tested for patients with known or suspected liver dysfunction.
+  caveats: >
+    **In a small number of places, an ALT test may NOT be included within a liver function test.**
+    We use codes which represent results reported to GPs so tests requested but not yet reported are not included.
+    Only tests results returned to GPs are included,
+    which will usually exclude tests requested while a person is in hospital and other settings like a private clinic.
+  classification: recovery
+  codelist_url: https://www.opencodelists.org/codelist/opensafely/alanine-aminotransferase-alt-tests/2298df3e/
+  counts_table_url: https://jobs.opensafely.org/service-restoration-observatory/sro-key-measures-dashboard/published/01GGZ127420DXX35BM0MMQNW8N/
+  top_5_codes_table_url: https://jobs.opensafely.org/service-restoration-observatory/sro-key-measures-dashboard/published/01GGWFEGKSB1ANPP4X5V2FM3FR/
+  deciles_table_url: https://jobs.opensafely.org/service-restoration-observatory/sro-key-measures-dashboard/published/01GGZ12739P6B7Z00QAJBTBKK3/
+
+- name: Glycated Haemoglobin A1c Level (HbA1c)
+  explanation: >
+    HbA1c is a long term indicator of diabetes control.
+    Only test results returned to GPs are included,
+    which will usually exclude tests requested while a person is in hospital and other settings like a private clinic.
+  caveats: >
+    We use codes which represent results reported to GPs so tests requested but not yet reported are not included.
+    Only test results returned to GPs are included,
+    which will usually exclude tests requested while a person is in hospital and other settings like a private clinic.
+  classification: recovery
+  codelist_url: https://www.opencodelists.org/codelist/opensafely/glycated-haemoglobin-hba1c-tests/3e5b1269/
+  counts_table_url: https://jobs.opensafely.org/service-restoration-observatory/sro-key-measures-dashboard/published/01GGZ12749JZ938746AV8XCPZ3/
+  top_5_codes_table_url: https://jobs.opensafely.org/service-restoration-observatory/sro-key-measures-dashboard/published/01GGWFEGMVQ62NGNM403MK32Z7/
+  deciles_table_url: https://jobs.opensafely.org/service-restoration-observatory/sro-key-measures-dashboard/published/01GGZ1273K1QJM5EQ7238X7P3S/
diff --git a/app/sro_key_measures.py b/app/sro_key_measures.py
@@ -0,0 +1,50 @@
+import streamlit
+
+from app import measures
+
+
+def main():
+    repository = measures.OSJobsRepository()
+
+    selected_measure_name = streamlit.selectbox("Select a measure:", repository.list())
+
+    measure = repository.get(selected_measure_name)
+
+    streamlit.markdown(f"# {measure.name}")
+
+    streamlit.markdown(
+        "The codes used for this measure"
+        f"are available in [this codelist]({measure.codelist_url})."
+    )
+
+    with streamlit.expander("What is it and why does it matter?"):
+        streamlit.markdown(measure.explanation)
+
+    with streamlit.expander("Caveats"):
+        streamlit.markdown(measure.caveats)
+
+    streamlit.altair_chart(measure.deciles_chart, use_container_width=True)
+
+    streamlit.markdown(f"**Most common codes ([codelist]({measure.codelist_url}))**")
+
+    streamlit.dataframe(measure.top_5_codes_table)
+
+    streamlit.markdown(
+        "Total patients: "
+        f"**{measure.unique_patients:,}** "
+        f"({measure.total_events:,} events)"
+    )
+
+    for from_year, to_year in [(2019, 2020), (2019, 2021)]:
+        from_val, to_val, pct_change = measure.change_in_median(from_year, to_year, 4)
+        streamlit.markdown(
+            f"Change in median from April {from_year} ({from_val:.2f}) "
+            f"to April {to_year} ({to_val:.2f}): "
+            f"**{pct_change:.2%}**"
+        )
+
+    streamlit.markdown(f"Overall classification: **{measure.classification}**")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/justfile b/justfile
@@ -4,6 +4,7 @@ BIN_DIR := VENV_DIR / "bin"
 PIP := BIN_DIR / "python -m pip"
 PIP_COMPILE := BIN_DIR / "pip-compile"
 RUFF := BIN_DIR / "ruff"
+STREAMLIT := BIN_DIR / "streamlit"
 
 # List available recipes and their arguments
 default:
@@ -61,9 +62,9 @@ prodenv: requirements-prod (_install 'prod')
 # Install dev requirements into the virtual environment
 devenv: requirements-dev prodenv (_install 'dev') && install-pre-commit
 
-# Run a command in the virtual environment
+# Run a Streamlit app
 run *args: devenv
-    echo "Not implemented"
+    PYTHONPATH=. {{ STREAMLIT }} run {{ args }}
 
 # Run tests
 test *args: devenv
diff --git a/requirements.dev.in b/requirements.dev.in
@@ -2,3 +2,4 @@ coverage
 pre-commit
 pytest
 ruff
+watchdog
diff --git a/requirements.dev.txt b/requirements.dev.txt
@@ -191,3 +191,35 @@ virtualenv==20.28.0 \
     --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \
     --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa
     # via pre-commit
+watchdog==6.0.0 \
+    --hash=sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a \
+    --hash=sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2 \
+    --hash=sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f \
+    --hash=sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c \
+    --hash=sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c \
+    --hash=sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c \
+    --hash=sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0 \
+    --hash=sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13 \
+    --hash=sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134 \
+    --hash=sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa \
+    --hash=sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e \
+    --hash=sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379 \
+    --hash=sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a \
+    --hash=sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11 \
+    --hash=sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282 \
+    --hash=sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b \
+    --hash=sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f \
+    --hash=sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c \
+    --hash=sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112 \
+    --hash=sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948 \
+    --hash=sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881 \
+    --hash=sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860 \
+    --hash=sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3 \
+    --hash=sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680 \
+    --hash=sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26 \
+    --hash=sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26 \
+    --hash=sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e \
+    --hash=sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8 \
+    --hash=sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c \
+    --hash=sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2
+    # via -r requirements.dev.in
diff --git a/requirements.prod.in b/requirements.prod.in
@@ -0,0 +1,2 @@
+pyyaml
+streamlit
diff --git a/requirements.prod.txt b/requirements.prod.txt
diff --git a/requirements.txt b/requirements.txt