@@ -185,6 +185,38 @@ def test_get_project_summary_with_complex_data():
185185 assert set (result ["runs" ]) == {"run1" , "run2" , "run3" }
186186
187187
188+ def test_get_project_summary_with_step_statistics ():
189+ """Test get_project_summary with step statistics."""
190+ mock_reader = Mock (spec = ParquetReader )
191+ project_dir = Path ("/data/step_statistics_project" )
192+
193+ # Create mock PyArrow table with step statistics
194+ mock_table = pa .table (
195+ {
196+ "project_id" : ["step-statistics-project" ] * 5 ,
197+ "run_id" : ["run1" , "run1" , "run1" , "run2" , "run2" ],
198+ "attribute_type" : ["float" ] * 5 ,
199+ "attribute_path" : ["loss" ] * 5 ,
200+ "step" : [1.0 , 2.0 , 3.0 , 1.0 , 6.0 ],
201+ }
202+ )
203+
204+ mock_generator = iter ([mock_table ])
205+ mock_reader .read_project_data .return_value = mock_generator
206+
207+ manager = SummaryManager (parquet_reader = mock_reader )
208+ result = manager .get_project_summary (project_dir )
209+
210+ assert result ["project_id" ] == "step-statistics-project"
211+ assert result ["total_runs" ] == 2
212+ assert result ["step_statistics" ] == {
213+ "total_steps" : 5 ,
214+ "min_step" : 1.0 ,
215+ "max_step" : 6.0 ,
216+ "unique_steps" : 4 ,
217+ }
218+
219+
188220def test_get_project_summary_multiple_tables ():
189221 """Test get_project_summary when read_project_data returns multiple tables."""
190222 mock_reader = Mock (spec = ParquetReader )
0 commit comments