diff --git a/combine-durations/combine_durations.py b/combine-durations/combine_durations.py
index a229376..1d92f21 100644
--- a/combine-durations/combine_durations.py
+++ b/combine-durations/combine_durations.py
@@ -6,15 +6,21 @@
 import os
 import sys
 from argparse import ArgumentParser, ArgumentTypeError, Namespace
+from dataclasses import dataclass
 from functools import partial
 from pathlib import Path
 from statistics import fmean
-from typing import NamedTuple
+from typing import TYPE_CHECKING
 
 from rich import box
 from rich.console import Console
 from rich.table import Table
 
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    COMBINED_TYPE = dict[str, dict[str, list[float]]]
+
 CONSOLE = Console(color_system="standard", soft_wrap=True, record=True)
 print = CONSOLE.print
 
@@ -46,26 +52,87 @@ def parse_args() -> Namespace:
     return parser.parse_args()
 
 
-class DurationStats(NamedTuple):
-    number_of_tests: int
-    total_run_time: float
-    average_run_time: float
+@dataclass
+class DurationStats:
+    number_of_tests: int = 0
+    total_run_time: float = 0.0
+
+    @property
+    def average_run_time(self) -> float:
+        return self.total_run_time / self.number_of_tests
+
+    def __iter__(self) -> Iterable[int, float]:
+        yield self.number_of_tests
+        yield self.total_run_time
+        yield self.average_run_time
+
+
+STATS_MAP = dict[str, DurationStats]
 
 
 def read_durations(
-    path: Path, stats: dict[str, DurationStats]
+    path: Path,
+    stats: STATS_MAP,
 ) -> tuple[str, dict[str, float]]:
-    OS = path.stem
+    os_name = path.stem
     data = json.loads(path.read_text())
 
-    # new durations stats
-    stats[OS] = DurationStats(
-        number_of_tests=len(data),
-        total_run_time=sum(data.values()),
-        average_run_time=fmean(data.values()),
-    )
+    # update durations stats
+    os_stats = stats.setdefault(os_name, DurationStats())
+    os_stats.number_of_tests += len(data)
+    os_stats.total_run_time += sum(data.values())
 
-    return OS, data
+    return os_name, data
+
+
+def aggregate_new_durations(artifacts_dir: Path) -> tuple[COMBINED_TYPE, STATS_MAP]:
+    combined: COMBINED_TYPE = {}
+
+    new_stats: dict[str, DurationStats] = {}
+    for path in artifacts_dir.glob("**/*.json"):
+        # read new durations
+        os_name, new_data = read_durations(path, new_stats)
+
+        # insert new durations
+        os_combined = combined.setdefault(os_name, {})
+        for key, value in new_data.items():
+            os_combined.setdefault(key, []).append(value)
+
+    return combined, new_stats
+
+
+def aggregate_old_durations(
+    durations_dir: Path,
+    combined: COMBINED_TYPE,
+    unlink: bool = True,
+) -> tuple[COMBINED_TYPE, STATS_MAP]:
+    combined = combined or {}
+
+    old_stats: dict[str, DurationStats] = {}
+    for path in durations_dir.glob("*.json"):
+        # read old durations
+        os_name, old_data = read_durations(path, old_stats)
+
+        try:
+            os_combined = combined[os_name]
+        except KeyError:
+            # KeyError: OS not present in new durations
+            if unlink:
+                print(f"⚠️ {os_name} not present in new durations, removing")
+                path.unlink()
+            else:
+                print(f"⚠️ {os_name} not present in new durations, skipping")
+            continue
+
+        # warn about tests that are no longer present
+        for name in set(old_data) - set(combined[os_name]):
+            print(f"⚠️ {os_name}::{name} not present in new durations, removing")
+
+        # only copy over keys that are still present in new durations
+        for key in set(old_data) & set(combined[os_name]):
+            os_combined[key].append(old_data[key])
+
+    return combined, old_stats
 
 
 def get_step_summary(html: str) -> str:
@@ -103,40 +170,8 @@ def dump_summary(console: Console = CONSOLE) -> None:
 def main() -> None:
     args = parse_args()
 
-    combined: dict[str, dict[str, list[float]]] = {}
-
-    # aggregate new durations
-    new_stats: dict[str, DurationStats] = {}
-    for path in args.artifacts_dir.glob("**/*.json"):
-        # read new durations
-        OS, new_data = read_durations(path, new_stats)
-
-        # insert new durations
-        os_combined = combined.setdefault(OS, {})
-        for key, value in new_data.items():
-            os_combined.setdefault(key, []).append(value)
-
-    # aggregate old durations
-    old_stats: dict[str, DurationStats] = {}
-    for path in args.durations_dir.glob("*.json"):
-        # read old durations
-        OS, old_data = read_durations(path, old_stats)
-
-        try:
-            os_combined = combined[OS]
-        except KeyError:
-            # KeyError: OS not present in new durations
-            print(f"⚠️ {OS} not present in new durations, removing")
-            path.unlink()
-            continue
-
-        # warn about tests that are no longer present
-        for name in set(old_data) - set(combined[OS]):
-            print(f"⚠️ {OS}::{name} not present in new durations, removing")
-
-        # only copy over keys that are still present in new durations
-        for key in set(old_data) & set(combined[OS]):
-            os_combined[key].append(old_data[key])
+    combined, new_stats = aggregate_new_durations(args.artifacts_dir)
+    combined, old_stats = aggregate_old_durations(args.durations_dir, combined)
 
     # display stats
     table = Table(box=box.MARKDOWN)
@@ -144,16 +179,16 @@ def main() -> None:
     table.add_column("Number of tests")
     table.add_column("Total run time")
     table.add_column("Average run time")
-    for OS in sorted({*new_stats, *old_stats}):
-        ncount, ntotal, naverage = new_stats.get(OS, (0, 0.0, 0.0))
-        ocount, ototal, oaverage = old_stats.get(OS, (0, 0.0, 0.0))
+    for os_name in sorted({*new_stats, *old_stats}):
+        ncount, ntotal, naverage = new_stats.get(os_name, DurationStats())
+        ocount, ototal, oaverage = old_stats.get(os_name, DurationStats())
 
         dcount = ncount - ocount
         dtotal = ntotal - ototal
         daverage = naverage - oaverage
 
         table.add_row(
-            OS,
+            os_name,
             f"{ncount} ({dcount:+}) {'🟢' if dcount >= 0 else '🔴'}",
             f"{ntotal:.2f} ({dtotal:+.2f}) {'🔴' if dtotal >= 0 else '🟢'}",
             f"{naverage:.2f} ({daverage:+.2f}) {'🔴' if daverage >= 0 else '🟢'}",
@@ -161,8 +196,8 @@ def main() -> None:
     print(table)
 
     # write out averages
-    for OS, os_combined in combined.items():
-        (args.durations_dir / f"{OS}.json").write_text(
+    for os_name, os_combined in combined.items():
+        (args.durations_dir / f"{os_name}.json").write_text(
             json.dumps(
                 {key: fmean(values) for key, values in os_combined.items()},
                 indent=4,
diff --git a/combine-durations/data/artifacts/OS1_run1/OS1.json b/combine-durations/data/artifacts/OS1_run1/OS1.json
new file mode 100644
index 0000000..fec3ef1
--- /dev/null
+++ b/combine-durations/data/artifacts/OS1_run1/OS1.json
@@ -0,0 +1,5 @@
+{
+    "tests/test_alpha.py::test_a": 1.1,
+    "tests/test_alpha.py::test_b": 1.1,
+    "tests/test_alpha.py::test_c": 1.1
+}
diff --git a/combine-durations/data/artifacts/OS1_run2/OS1.json b/combine-durations/data/artifacts/OS1_run2/OS1.json
new file mode 100644
index 0000000..48e94ba
--- /dev/null
+++ b/combine-durations/data/artifacts/OS1_run2/OS1.json
@@ -0,0 +1,4 @@
+{
+    "tests/test_beta.py::test_a": 1.2,
+    "tests/test_beta.py::test_b": 1.2
+}
diff --git a/combine-durations/data/artifacts/OS2_run1/OS2.json b/combine-durations/data/artifacts/OS2_run1/OS2.json
new file mode 100644
index 0000000..6bef17b
--- /dev/null
+++ b/combine-durations/data/artifacts/OS2_run1/OS2.json
@@ -0,0 +1,5 @@
+{
+    "tests/test_alpha.py::test_a": 2.1,
+    "tests/test_alpha.py::test_b": 2.1,
+    "tests/test_alpha.py::test_c": 2.1
+}
diff --git a/combine-durations/data/artifacts/OS2_run2/OS2.json b/combine-durations/data/artifacts/OS2_run2/OS2.json
new file mode 100644
index 0000000..79eaca8
--- /dev/null
+++ b/combine-durations/data/artifacts/OS2_run2/OS2.json
@@ -0,0 +1,4 @@
+{
+    "tests/test_beta.py::test_a": 2.2,
+    "tests/test_beta.py::test_b": 2.2
+}
diff --git a/combine-durations/data/durations/OS1.json b/combine-durations/data/durations/OS1.json
new file mode 100644
index 0000000..e8d6120
--- /dev/null
+++ b/combine-durations/data/durations/OS1.json
@@ -0,0 +1,9 @@
+{
+    "tests/test_alpha.py::test_a": 1,
+    "tests/test_alpha.py::test_b": 1,
+    "tests/test_beta.py::test_a": 1,
+    "tests/test_beta.py::test_b": 1,
+    "tests/test_gamma.py::test_a": 1,
+    "tests/test_gamma.py::test_b": 1
+
+}
diff --git a/combine-durations/data/durations/OS2.json b/combine-durations/data/durations/OS2.json
new file mode 100644
index 0000000..8b1410a
--- /dev/null
+++ b/combine-durations/data/durations/OS2.json
@@ -0,0 +1,9 @@
+{
+    "tests/test_alpha.py::test_a": 2,
+    "tests/test_alpha.py::test_b": 2,
+    "tests/test_beta.py::test_a": 2,
+    "tests/test_beta.py::test_b": 2,
+    "tests/test_gamma.py::test_a": 2,
+    "tests/test_gamma.py::test_b": 2
+
+}
diff --git a/combine-durations/test_combine_durations.py b/combine-durations/test_combine_durations.py
index 9dd56b3..087074d 100644
--- a/combine-durations/test_combine_durations.py
+++ b/combine-durations/test_combine_durations.py
@@ -1,14 +1,24 @@
 from __future__ import annotations
 
+import json
 from argparse import ArgumentTypeError
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 import pytest
 
-from combine_durations import validate_dir
+from combine_durations import (
+    aggregate_new_durations,
+    aggregate_old_durations,
+    read_durations,
+    validate_dir,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from combine_durations import COMBINED_TYPE, STATS_MAP
+
+DURATIONS_DIR = Path(__file__).parent / "data" / "durations"
+ARTIFACTS_DIR = Path(__file__).parent / "data" / "artifacts"
 
 
 def test_validate_dir(tmp_path: Path) -> None:
@@ -41,3 +51,60 @@ def test_validate_dir(tmp_path: Path) -> None:
 
     # permissions
     # TODO: not easy to test using either chmod or chown
+
+
+@pytest.mark.parametrize(
+    "path",
+    [pytest.param(path, id=path.name) for path in DURATIONS_DIR.glob("*.json")],
+)
+def test_read_durations(path: Path) -> None:
+    stats: STATS_MAP = {}
+    os, data = read_durations(path, stats)
+    assert os == path.stem
+    assert data == json.loads(path.read_text())
+    assert os in stats
+    assert len(stats) == 1
+    assert stats[os].number_of_tests == len(data)
+    assert stats[os].total_run_time == sum(data.values())
+    assert stats[os].average_run_time == sum(data.values()) / len(data)
+
+
+def test_aggregate_new_durations() -> None:
+    combined, stats = aggregate_new_durations(ARTIFACTS_DIR)
+    assert len(combined) == len(stats) == 2
+    for os in ("OS1", "OS2"):
+        assert len(combined[os]) == 5
+        assert stats[os].number_of_tests == 5
+        assert stats[os].total_run_time > 0
+        assert stats[os].average_run_time > 0
+
+
+@pytest.mark.parametrize(
+    "combined,num_combined",
+    [
+        pytest.param({}, 0, id="no durations"),
+        pytest.param(
+            {
+                path.stem: {
+                    test: [duration]
+                    for test, duration in json.loads(path.read_text()).items()
+                }
+                for path in DURATIONS_DIR.glob("*.json")
+            },
+            6,
+            id="unchanged durations",
+        ),
+        pytest.param(
+            aggregate_new_durations(ARTIFACTS_DIR)[0],
+            5,
+            id="updated durations",
+        ),
+    ],
+)
+def test_aggregate_old_durations(combined: COMBINED_TYPE, num_combined: int) -> None:
+    combined, old_stats = aggregate_old_durations(DURATIONS_DIR, combined, unlink=False)
+    assert len(combined) == (2 if num_combined else 0)
+    assert len(old_stats) == 2
+    for os in ("OS1", "OS2"):
+        assert len(combined.get(os, ())) == num_combined
+        assert old_stats[os].number_of_tests == 6