@@ -375,135 +375,181 @@ TEST(CpuCacheTest, Metadata) {
375
375
376
376
const int num_cpus = NumCPUs ();
377
377
378
- CpuCache cache;
379
- cache.Activate ();
380
-
381
- cpu_cache_internal::SlabShiftBounds shift_bounds =
382
- cache.GetPerCpuSlabShiftBounds ();
383
-
384
- PerCPUMetadataState r = cache.MetadataMemoryUsage ();
385
- size_t slabs_size = subtle::percpu::GetSlabsAllocSize (
386
- subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus);
387
- size_t resize_size = num_cpus * sizeof (bool );
388
- size_t begins_size = kNumClasses * sizeof (std::atomic<uint16_t >);
389
- EXPECT_EQ (r.virtual_size , slabs_size + resize_size + begins_size);
390
- EXPECT_EQ (r.resident_size , 0 );
391
-
392
- auto count_cores = [&]() {
393
- int populated_cores = 0 ;
394
- for (int i = 0 ; i < num_cpus; i++) {
395
- if (cache.HasPopulated (i)) {
396
- populated_cores++;
378
+ const int kAttempts = 3 ;
379
+ for (int attempt = 1 ; attempt <= kAttempts ; attempt++) {
380
+ SCOPED_TRACE (absl::StrCat (" attempt=" , attempt));
381
+
382
+ CpuCache cache;
383
+ cache.Activate ();
384
+
385
+ cpu_cache_internal::SlabShiftBounds shift_bounds =
386
+ cache.GetPerCpuSlabShiftBounds ();
387
+
388
+ PerCPUMetadataState r = cache.MetadataMemoryUsage ();
389
+ size_t slabs_size = subtle::percpu::GetSlabsAllocSize (
390
+ subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus);
391
+ size_t resize_size = num_cpus * sizeof (bool );
392
+ size_t begins_size = kNumClasses * sizeof (std::atomic<uint16_t >);
393
+ EXPECT_EQ (r.virtual_size , slabs_size + resize_size + begins_size);
394
+ EXPECT_EQ (r.resident_size , 0 );
395
+
396
+ auto count_cores = [&]() {
397
+ int populated_cores = 0 ;
398
+ for (int i = 0 ; i < num_cpus; i++) {
399
+ if (cache.HasPopulated (i)) {
400
+ populated_cores++;
401
+ }
402
+ }
403
+ return populated_cores;
404
+ };
405
+
406
+ EXPECT_EQ (0 , count_cores ());
407
+
408
+ int allowed_cpu_id;
409
+ const size_t kSizeClass = 2 ;
410
+ const size_t num_to_move =
411
+ cache.forwarder ().num_objects_to_move (kSizeClass );
412
+
413
+ TransferCacheStats tc_stats =
414
+ cache.forwarder ().transfer_cache ().GetStats (kSizeClass );
415
+ EXPECT_EQ (tc_stats.remove_hits , 0 );
416
+ EXPECT_EQ (tc_stats.remove_misses , 0 );
417
+ EXPECT_EQ (tc_stats.remove_object_misses , 0 );
418
+ EXPECT_EQ (tc_stats.insert_hits , 0 );
419
+ EXPECT_EQ (tc_stats.insert_misses , 0 );
420
+ EXPECT_EQ (tc_stats.insert_object_misses , 0 );
421
+
422
+ void * ptr;
423
+ {
424
+ // Restrict this thread to a single core while allocating and processing
425
+ // the slow path.
426
+ //
427
+ // TODO(b/151313823): Without this restriction, we may access--for
428
+ // reading only--other slabs if we end up being migrated. These may cause
429
+ // huge pages to be faulted for those cores, leading to test flakiness.
430
+ tcmalloc_internal::ScopedAffinityMask mask (
431
+ tcmalloc_internal::AllowedCpus ()[0 ]);
432
+ allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize ();
433
+
434
+ ptr = cache.Allocate (kSizeClass );
435
+
436
+ if (mask.Tampered () ||
437
+ allowed_cpu_id !=
438
+ subtle::percpu::TcmallocTest::VirtualCpuSynchronize ()) {
439
+ return ;
397
440
}
398
441
}
399
- return populated_cores;
400
- };
442
+ EXPECT_NE (ptr, nullptr );
443
+ EXPECT_EQ (1 , count_cores ());
444
+
445
+ // We don't care if the transfer cache hit or missed, but the CPU cache
446
+ // should have done the operation.
447
+ tc_stats = cache.forwarder ().transfer_cache ().GetStats (kSizeClass );
448
+ if ((tc_stats.remove_object_misses != num_to_move ||
449
+ tc_stats.insert_hits + tc_stats.insert_misses != 0 ) &&
450
+ attempt < kAttempts ) {
451
+ // The operation didn't occur as expected, likely because we were
452
+ // preempted but returned to the same core (otherwise Tampered would have
453
+ // fired).
454
+ //
455
+ // The MSB of tcmalloc_slabs should be cleared to indicate we were
456
+ // preempted. As of December 2024, Refill and its callees do not invoke
457
+ // CacheCpuSlab. This check can spuriously pass if we're preempted
458
+ // between the end of Allocate and now, rather than within Allocate, but
459
+ // it ensures we do not silently break.
460
+ EXPECT_EQ (subtle::percpu::tcmalloc_slabs & TCMALLOC_CACHED_SLABS_MASK, 0 );
461
+
462
+ cache.Deallocate (ptr, kSizeClass );
463
+ cache.Deactivate ();
401
464
402
- EXPECT_EQ (0 , count_cores ());
465
+ continue ;
466
+ }
403
467
404
- int allowed_cpu_id;
405
- const size_t kSizeClass = 2 ;
406
- const size_t num_to_move = cache.forwarder ().num_objects_to_move (kSizeClass );
407
- void * ptr;
408
- {
409
- // Restrict this thread to a single core while allocating and processing the
410
- // slow path.
411
- //
412
- // TODO(b/151313823): Without this restriction, we may access--for reading
413
- // only--other slabs if we end up being migrated. These may cause huge
414
- // pages to be faulted for those cores, leading to test flakiness.
415
- tcmalloc_internal::ScopedAffinityMask mask (
416
- tcmalloc_internal::AllowedCpus ()[0 ]);
417
- allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize ();
468
+ EXPECT_EQ (tc_stats.remove_hits + tc_stats.remove_misses , 1 );
469
+ EXPECT_EQ (tc_stats.remove_object_misses , num_to_move);
470
+ EXPECT_EQ (tc_stats.insert_hits , 0 );
471
+ EXPECT_EQ (tc_stats.insert_misses , 0 );
472
+ EXPECT_EQ (tc_stats.insert_object_misses , 0 );
473
+
474
+ r = cache.MetadataMemoryUsage ();
475
+ EXPECT_EQ (
476
+ r.virtual_size ,
477
+ resize_size + begins_size +
478
+ subtle::percpu::GetSlabsAllocSize (
479
+ subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus));
480
+
481
+ // We expect to fault in a single core, but we may end up faulting an
482
+ // entire hugepage worth of memory when we touch that core and another when
483
+ // touching the header.
484
+ const size_t core_slab_size = r.virtual_size / num_cpus;
485
+ const size_t upper_bound =
486
+ ((core_slab_size + kHugePageSize - 1 ) & ~(kHugePageSize - 1 )) +
487
+ kHugePageSize ;
488
+
489
+ // A single core may be less than the full slab (core_slab_size), since we
490
+ // do not touch every page within the slab.
491
+ EXPECT_GT (r.resident_size , 0 );
492
+ EXPECT_LE (r.resident_size , upper_bound)
493
+ << count_cores () << " " << core_slab_size << " " << kHugePageSize ;
494
+
495
+ // This test is much more sensitive to implementation details of the per-CPU
496
+ // cache. It may need to be updated from time to time. These numbers were
497
+ // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
498
+ // measuring the resident size.
499
+ switch (shift_bounds.max_shift ) {
500
+ case 13 :
501
+ EXPECT_GE (r.resident_size , 4096 );
502
+ break ;
503
+ case 19 :
504
+ EXPECT_GE (r.resident_size , 8192 );
505
+ break ;
506
+ default :
507
+ ASSUME (false );
508
+ break ;
509
+ }
418
510
419
- ptr = cache.Allocate (kSizeClass );
511
+ // Read stats from the CPU caches. This should not impact resident_size.
512
+ const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size ();
513
+ size_t total_used_bytes = 0 ;
514
+ for (int cpu = 0 ; cpu < num_cpus; ++cpu) {
515
+ size_t used_bytes = cache.UsedBytes (cpu);
516
+ total_used_bytes += used_bytes;
517
+
518
+ if (cpu == allowed_cpu_id) {
519
+ EXPECT_GT (used_bytes, 0 );
520
+ EXPECT_TRUE (cache.HasPopulated (cpu));
521
+ } else {
522
+ EXPECT_EQ (used_bytes, 0 );
523
+ EXPECT_FALSE (cache.HasPopulated (cpu));
524
+ }
420
525
421
- if (mask. Tampered () ||
422
- allowed_cpu_id !=
423
- subtle::percpu::TcmallocTest::VirtualCpuSynchronize ()) {
424
- return ;
526
+ EXPECT_LE (cache. Unallocated (cpu), max_cpu_cache_size);
527
+ EXPECT_EQ (cache. Capacity (cpu), max_cpu_cache_size);
528
+ EXPECT_EQ (cache. Allocated (cpu) + cache. Unallocated (cpu),
529
+ cache. Capacity (cpu)) ;
425
530
}
426
- }
427
- EXPECT_NE (ptr, nullptr );
428
- EXPECT_EQ (1 , count_cores ());
429
-
430
- r = cache.MetadataMemoryUsage ();
431
- EXPECT_EQ (
432
- r.virtual_size ,
433
- resize_size + begins_size +
434
- subtle::percpu::GetSlabsAllocSize (
435
- subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus));
436
-
437
- // We expect to fault in a single core, but we may end up faulting an
438
- // entire hugepage worth of memory when we touch that core and another when
439
- // touching the header.
440
- const size_t core_slab_size = r.virtual_size / num_cpus;
441
- const size_t upper_bound =
442
- ((core_slab_size + kHugePageSize - 1 ) & ~(kHugePageSize - 1 )) +
443
- kHugePageSize ;
444
-
445
- // A single core may be less than the full slab (core_slab_size), since we
446
- // do not touch every page within the slab.
447
- EXPECT_GT (r.resident_size , 0 );
448
- EXPECT_LE (r.resident_size , upper_bound)
449
- << count_cores () << " " << core_slab_size << " " << kHugePageSize ;
450
-
451
- // This test is much more sensitive to implementation details of the per-CPU
452
- // cache. It may need to be updated from time to time. These numbers were
453
- // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
454
- // measuring the resident size.
455
- switch (shift_bounds.max_shift ) {
456
- case 13 :
457
- EXPECT_GE (r.resident_size , 4096 );
458
- break ;
459
- case 19 :
460
- EXPECT_GE (r.resident_size , 8192 );
461
- break ;
462
- default :
463
- ASSUME (false );
464
- break ;
465
- }
466
531
467
- // Read stats from the CPU caches. This should not impact resident_size.
468
- const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size ();
469
- size_t total_used_bytes = 0 ;
470
- for (int cpu = 0 ; cpu < num_cpus; ++cpu) {
471
- size_t used_bytes = cache.UsedBytes (cpu);
472
- total_used_bytes += used_bytes;
473
-
474
- if (cpu == allowed_cpu_id) {
475
- EXPECT_GT (used_bytes, 0 );
476
- EXPECT_TRUE (cache.HasPopulated (cpu));
477
- } else {
478
- EXPECT_EQ (used_bytes, 0 );
479
- EXPECT_FALSE (cache.HasPopulated (cpu));
532
+ for (int size_class = 1 ; size_class < kNumClasses ; ++size_class) {
533
+ // This is sensitive to the current growth policies of CpuCache. It may
534
+ // require updating from time-to-time.
535
+ EXPECT_EQ (cache.TotalObjectsOfClass (size_class),
536
+ (size_class == kSizeClass ? num_to_move - 1 : 0 ))
537
+ << size_class;
480
538
}
539
+ EXPECT_EQ (cache.TotalUsedBytes (), total_used_bytes);
481
540
482
- EXPECT_LE (cache.Unallocated (cpu), max_cpu_cache_size);
483
- EXPECT_EQ (cache.Capacity (cpu), max_cpu_cache_size);
484
- EXPECT_EQ (cache.Allocated (cpu) + cache.Unallocated (cpu),
485
- cache.Capacity (cpu));
486
- }
541
+ PerCPUMetadataState post_stats = cache.MetadataMemoryUsage ();
542
+ // Confirm stats are within expected bounds.
543
+ EXPECT_GT (post_stats.resident_size , 0 );
544
+ EXPECT_LE (post_stats.resident_size , upper_bound) << count_cores ();
545
+ // Confirm stats are unchanged.
546
+ EXPECT_EQ (r.resident_size , post_stats.resident_size );
487
547
488
- for (int size_class = 1 ; size_class < kNumClasses ; ++size_class) {
489
- // This is sensitive to the current growth policies of CpuCache. It may
490
- // require updating from time-to-time.
491
- EXPECT_EQ (cache.TotalObjectsOfClass (size_class),
492
- (size_class == kSizeClass ? num_to_move - 1 : 0 ))
493
- << size_class;
548
+ // Tear down.
549
+ cache.Deallocate (ptr, kSizeClass );
550
+ cache.Deactivate ();
551
+ break ;
494
552
}
495
- EXPECT_EQ (cache.TotalUsedBytes (), total_used_bytes);
496
-
497
- PerCPUMetadataState post_stats = cache.MetadataMemoryUsage ();
498
- // Confirm stats are within expected bounds.
499
- EXPECT_GT (post_stats.resident_size , 0 );
500
- EXPECT_LE (post_stats.resident_size , upper_bound) << count_cores ();
501
- // Confirm stats are unchanged.
502
- EXPECT_EQ (r.resident_size , post_stats.resident_size );
503
-
504
- // Tear down.
505
- cache.Deallocate (ptr, kSizeClass );
506
- cache.Deactivate ();
507
553
}
508
554
509
555
TEST (CpuCacheTest, CacheMissStats) {
0 commit comments