|
1 | 1 | benchmarks:
|
2 | 2 | llama-3-8b:
|
3 | 3 | name: Llama 3.0 8B
|
4 |
| - step_time_lower_bound: 2.67793406 |
5 |
| - step_time_upper_bound: 2.788876 |
6 |
| - confidence_interval: 0.05547 |
7 |
| - average: 2.7334 |
8 |
| - sample_size: 169 |
| 4 | + step_time_lower_bound: 2.68109009 |
| 5 | + step_time_upper_bound: 2.789223 |
| 6 | + confidence_interval: 0.05407 |
| 7 | + average: 2.7352 |
| 8 | + sample_size: 427 |
9 | 9 | llama-3_1-8b-sa:
|
10 | 10 | name: Llama 3.1 8B (Splash Attention)
|
11 |
| - step_time_lower_bound: 2.35485746 |
12 |
| - step_time_upper_bound: 2.457598 |
13 |
| - confidence_interval: 0.05137 |
14 |
| - average: 2.4062 |
15 |
| - sample_size: 169 |
| 11 | + step_time_lower_bound: 2.34653077 |
| 12 | + step_time_upper_bound: 2.467111 |
| 13 | + confidence_interval: 0.06029 |
| 14 | + average: 2.4068 |
| 15 | + sample_size: 428 |
16 | 16 | llama-3_1-8b-scan-offload:
|
17 | 17 | name: Llama 3.1 8B (Scan + Offload)
|
18 |
| - step_time_lower_bound: 2.73963398 |
19 |
| - step_time_upper_bound: 2.85849 |
20 |
| - confidence_interval: 0.05943 |
21 |
| - average: 2.7991 |
22 |
| - sample_size: 169 |
| 18 | + step_time_lower_bound: 2.74099553 |
| 19 | + step_time_upper_bound: 2.860302 |
| 20 | + confidence_interval: 0.05965 |
| 21 | + average: 2.8006 |
| 22 | + sample_size: 428 |
23 | 23 | llama-3-8b-2d:
|
24 | 24 | name: Llama 3.0 8B (2D sharding)
|
25 |
| - step_time_lower_bound: 3.28863385 |
26 |
| - step_time_upper_bound: 3.38879529 |
| 25 | + step_time_lower_bound: 3.28827914 |
| 26 | + step_time_upper_bound: 3.38842977 |
27 | 27 | confidence_interval: 0.05008
|
28 |
| - average: 3.3387 |
29 |
| - sample_size: 169 |
| 28 | + average: 3.3384 |
| 29 | + sample_size: 428 |
30 | 30 | mixtral-8x7b:
|
31 | 31 | name: Mixtral 8x7B
|
32 |
| - step_time_lower_bound: 3.09891705 |
33 |
| - step_time_upper_bound: 3.19330031 |
| 32 | + step_time_lower_bound: 3.09900735 |
| 33 | + step_time_upper_bound: 3.19339336 |
34 | 34 | confidence_interval: 0.04719
|
35 |
| - average: 3.1461 |
36 |
| - sample_size: 169 |
| 35 | + average: 3.1462 |
| 36 | + sample_size: 427 |
37 | 37 | llama-3-8b-2-slice:
|
38 | 38 | name: Llama 3.0 8B (2 Slice)
|
39 |
| - step_time_lower_bound: 3.88681827 |
40 |
| - step_time_upper_bound: 4.026164 |
41 |
| - confidence_interval: 0.06967 |
42 |
| - average: 3.9565 |
43 |
| - sample_size: 169 |
| 39 | + step_time_lower_bound: 3.82985294 |
| 40 | + step_time_upper_bound: 4.087614 |
| 41 | + confidence_interval: 0.12888 |
| 42 | + average: 3.9587 |
| 43 | + sample_size: 416 |
44 | 44 | metadata:
|
45 |
| - query_start: 2025-05-29 17:52:00 America/Los_Angeles |
46 |
| - query_end: 2025-06-01 20:00:00 America/Los_Angeles |
| 45 | + query_start: '2025-05-26T18:37:58.674556-07:00' |
| 46 | + query_end: '2025-06-05T18:37:58-07:00' |
47 | 47 | confidence_level: 0.999
|
0 commit comments