Skip to content

Commit c432181

Browse files
Implemented end-to-end coverage reporting for dev evals
1 parent c2c7920 commit c432181

File tree

19 files changed

+763
-70
lines changed

19 files changed

+763
-70
lines changed

eval/README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,9 @@ Each experiment produces:
124124
- **Build success**: Can the project build without errors?
125125
- **Type check**: TypeScript compilation errors count
126126
- **Lint**: ESLint errors count
127-
- **Tests**: Storybook test results (passed/failed)
127+
- **Tests**: Storybook story results (passed/failed) including play functions
128128
- **Accessibility**: Axe violations count
129+
- **Coverage**: Vite/Vitest coverage summary (lines/statements/branches/functions)
129130
- **Cost**: API usage cost in USD
130131
- **Duration**: Total time and API time in seconds
131132
- **Turns**: Number of agent conversation turns
@@ -145,7 +146,13 @@ Complete metrics from execution and evaluation:
145146
"typeCheckErrors": 0,
146147
"lintErrors": 0,
147148
"test": { "passed": 3, "failed": 0 },
148-
"a11y": { "violations": 1 }
149+
"a11y": { "violations": 1 },
150+
"coverage": {
151+
"lines": 87.5,
152+
"statements": 86.9,
153+
"branches": 75.0,
154+
"functions": 80.0
155+
}
149156
}
150157
```
151158

eval/eval.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,15 @@ if (
145145
);
146146
}
147147

148+
const cov = evaluationSummary.coverage;
149+
const formatCov = (v: number | null | undefined) =>
150+
typeof v === 'number' ? v : '–';
151+
p.log.message(
152+
cov
153+
? `📊 Coverage: lines ${formatCov(cov.lines)}%, statements ${formatCov(cov.statements)}%, branches ${formatCov(cov.branches)}%, functions ${formatCov(cov.functions)}%`
154+
: '📊 Coverage: (not collected)',
155+
);
156+
148157
p.log.message(
149158
`⏱️ Duration: ${promptSummary.duration}s (API: ${promptSummary.durationApi}s)`,
150159
);

eval/lib/context-utils.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import type { Context } from '../types.ts';
2+
3+
export function isDevEvaluation(context: Context): boolean {
4+
return context.type === 'storybook-mcp-dev';
5+
}

eval/lib/evaluations/coverage.ts

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import * as path from 'node:path';
2+
import * as fs from 'node:fs/promises';
3+
import type { EvaluationSummary } from '../../types';
4+
import type { CoverageFiles, CoverageSummary } from './result-types';
5+
import { createCoverageMap } from 'istanbul-lib-coverage';
6+
7+
export async function computeCoverage(
8+
projectPath: string,
9+
resultsPath: string,
10+
): Promise<{
11+
coverage?: EvaluationSummary['coverage'];
12+
coverageFiles?: CoverageFiles;
13+
}> {
14+
let coverage: EvaluationSummary['coverage'];
15+
let coverageFiles: CoverageFiles | undefined;
16+
17+
const finalCoveragePath = path.join(
18+
projectPath,
19+
'coverage',
20+
'coverage-final.json',
21+
);
22+
23+
try {
24+
let normalizedTotal: CoverageSummary | undefined;
25+
26+
const coverageData = JSON.parse(
27+
await fs.readFile(finalCoveragePath, 'utf8'),
28+
);
29+
30+
// Derive from coverage-final using istanbul-lib-coverage
31+
const coverageMap = createCoverageMap(coverageData);
32+
const summary = coverageMap.getCoverageSummary().toJSON();
33+
const coverageJson = coverageMap.toJSON();
34+
35+
coverageFiles = {};
36+
37+
for (const filePath of Object.keys(coverageJson)) {
38+
if (filePath === 'total') continue;
39+
const fileCoverage = coverageMap.fileCoverageFor(filePath);
40+
const fileSummary = fileCoverage.toSummary().toJSON();
41+
let source: string | undefined;
42+
try {
43+
source = await fs.readFile(filePath, 'utf8');
44+
} catch {
45+
source = undefined;
46+
}
47+
48+
let lineHits: Record<string, number> | undefined;
49+
let branchesByLine:
50+
| Record<string, { covered: number | null; total: number | null }>
51+
| undefined;
52+
try {
53+
lineHits = fileCoverage.getLineCoverage() as Record<string, number>;
54+
const branches = fileCoverage.getBranchCoverageByLine?.();
55+
if (branches && typeof branches === 'object') {
56+
branchesByLine = {};
57+
for (const [line, data] of Object.entries(
58+
branches as Record<string, any>,
59+
)) {
60+
branchesByLine[line] = {
61+
covered: data.covered ?? null,
62+
total: data.total ?? null,
63+
};
64+
}
65+
}
66+
} catch {
67+
// ignore
68+
}
69+
70+
coverageFiles[filePath] = {
71+
branches: { pct: fileSummary.branches.pct },
72+
functions: { pct: fileSummary.functions.pct },
73+
lines: { pct: fileSummary.lines.pct },
74+
statements: { pct: fileSummary.statements.pct },
75+
lineHits,
76+
branchesByLine,
77+
source,
78+
};
79+
}
80+
normalizedTotal = {
81+
branches: { pct: summary.branches.pct },
82+
functions: { pct: summary.functions.pct },
83+
lines: { pct: summary.lines.pct },
84+
statements: { pct: summary.statements.pct },
85+
};
86+
87+
if (normalizedTotal) {
88+
coverage = {
89+
branches: normalizedTotal.branches?.pct ?? null,
90+
functions: normalizedTotal.functions?.pct ?? null,
91+
lines: normalizedTotal.lines?.pct ?? null,
92+
statements: normalizedTotal.statements?.pct ?? null,
93+
};
94+
95+
const targetCoveragePath = path.join(
96+
resultsPath,
97+
'coverage',
98+
'coverage-summary.json',
99+
);
100+
await fs.mkdir(path.dirname(targetCoveragePath), { recursive: true });
101+
await fs.writeFile(
102+
targetCoveragePath,
103+
JSON.stringify({ total: normalizedTotal }, null, 2),
104+
);
105+
106+
await fs.writeFile(
107+
path.join(resultsPath, 'coverage', 'coverage-final.json'),
108+
JSON.stringify(coverageFiles ?? {}, null, 2),
109+
);
110+
}
111+
} catch {
112+
coverage = undefined;
113+
coverageFiles = undefined;
114+
}
115+
116+
return { coverage, coverageFiles };
117+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import * as path from 'node:path';
2+
import * as fs from 'node:fs/promises';
3+
import type { JsonAssertionResult, JsonTestResults } from 'vitest/reporters';
4+
import type { A11yViolations, StoryResult, TestSummary } from './result-types';
5+
6+
export async function parseTestResults(resultsPath: string): Promise<{
7+
testSummary: TestSummary;
8+
a11y: A11yViolations;
9+
storyResults: StoryResult[];
10+
}> {
11+
const testResultsPath = path.join(resultsPath, 'tests.json');
12+
const { default: jsonTestResults } = (await import(testResultsPath, {
13+
with: { type: 'json' },
14+
})) as { default: JsonTestResults };
15+
16+
// write the file again to pretty-print it
17+
await fs.writeFile(testResultsPath, JSON.stringify(jsonTestResults, null, 2));
18+
19+
const a11yViolations: A11yViolations = {};
20+
const storyAssertions: Record<
21+
string,
22+
{ status: JsonAssertionResult['status'] }
23+
> = {};
24+
25+
const testSuites = jsonTestResults.testResults
26+
? Object.values(jsonTestResults.testResults)
27+
: [];
28+
29+
for (const jsonTestResult of testSuites) {
30+
for (const assertionResult of jsonTestResult.assertionResults ?? []) {
31+
const storyId = (assertionResult.meta as any)?.storyId;
32+
if (!storyId) continue;
33+
34+
storyAssertions[storyId] = {
35+
status: assertionResult.status,
36+
};
37+
38+
for (const report of (assertionResult.meta as any).reports ?? []) {
39+
if (report.type === 'a11y' && report.result?.violations?.length > 0) {
40+
a11yViolations[storyId] = report.result.violations;
41+
}
42+
}
43+
}
44+
}
45+
46+
const storyResults = Object.entries(storyAssertions).map(
47+
([storyId, { status }]) =>
48+
({
49+
storyId,
50+
status,
51+
}) as StoryResult,
52+
);
53+
54+
const testsPassed = storyResults.filter((s) => s.status === 'passed').length;
55+
const testsFailed = storyResults.length - testsPassed;
56+
57+
return {
58+
testSummary: {
59+
passed: testsPassed,
60+
failed: testsFailed,
61+
},
62+
a11y: a11yViolations,
63+
storyResults,
64+
};
65+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import type { EvaluationSummary } from '../../types';
2+
3+
export type TestSummary = Pick<EvaluationSummary['test'], 'passed' | 'failed'>;
4+
5+
export type StoryResult = {
6+
storyId: string;
7+
status: 'passed' | 'failed';
8+
};
9+
10+
export type A11yViolations = Record<string, any[]>;
11+
12+
export type CoverageSummary = {
13+
branches: { pct: number | null };
14+
functions: { pct: number | null };
15+
lines: { pct: number | null };
16+
statements: { pct: number | null };
17+
};
18+
19+
export type CoverageFiles = Record<
20+
string,
21+
{
22+
lineHits?: Record<string, number>;
23+
branchesByLine?: Record<
24+
string,
25+
{ covered: number | null; total: number | null }
26+
>;
27+
source?: string;
28+
} & CoverageSummary
29+
>;

eval/lib/evaluations/run-tests.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import * as path from 'node:path';
2+
import * as fs from 'node:fs/promises';
3+
import { x } from 'tinyexec';
4+
import { dedent } from 'ts-dedent';
5+
import type { ExperimentArgs } from '../../types';
6+
7+
export async function runTests(
8+
experimentArgs: ExperimentArgs,
9+
testScript: string,
10+
): Promise<number> {
11+
const { projectPath, resultsPath } = experimentArgs;
12+
const result = await x('pnpm', [testScript], {
13+
nodeOptions: { cwd: projectPath },
14+
});
15+
16+
await fs.writeFile(
17+
path.join(resultsPath, 'tests.md'),
18+
dedent`# Test Results
19+
20+
**Exit Code:** ${result.exitCode}
21+
22+
## stdout
23+
24+
\`\`\`sh
25+
${result.stdout}
26+
\`\`\`
27+
28+
## stderr
29+
30+
\`\`\`
31+
${result.stderr}
32+
\`\`\`
33+
`,
34+
);
35+
36+
return result.exitCode ?? 0;
37+
}

0 commit comments

Comments
 (0)