@@ -375,135 +375,181 @@ TEST(CpuCacheTest, Metadata) {
375375
376376 const int num_cpus = NumCPUs ();
377377
378- CpuCache cache;
379- cache.Activate ();
380-
381- cpu_cache_internal::SlabShiftBounds shift_bounds =
382- cache.GetPerCpuSlabShiftBounds ();
383-
384- PerCPUMetadataState r = cache.MetadataMemoryUsage ();
385- size_t slabs_size = subtle::percpu::GetSlabsAllocSize (
386- subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus);
387- size_t resize_size = num_cpus * sizeof (bool );
388- size_t begins_size = kNumClasses * sizeof (std::atomic<uint16_t >);
389- EXPECT_EQ (r.virtual_size , slabs_size + resize_size + begins_size);
390- EXPECT_EQ (r.resident_size , 0 );
391-
392- auto count_cores = [&]() {
393- int populated_cores = 0 ;
394- for (int i = 0 ; i < num_cpus; i++) {
395- if (cache.HasPopulated (i)) {
396- populated_cores++;
378+ const int kAttempts = 3 ;
379+ for (int attempt = 1 ; attempt <= kAttempts ; attempt++) {
380+ SCOPED_TRACE (absl::StrCat (" attempt=" , attempt));
381+
382+ CpuCache cache;
383+ cache.Activate ();
384+
385+ cpu_cache_internal::SlabShiftBounds shift_bounds =
386+ cache.GetPerCpuSlabShiftBounds ();
387+
388+ PerCPUMetadataState r = cache.MetadataMemoryUsage ();
389+ size_t slabs_size = subtle::percpu::GetSlabsAllocSize (
390+ subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus);
391+ size_t resize_size = num_cpus * sizeof (bool );
392+ size_t begins_size = kNumClasses * sizeof (std::atomic<uint16_t >);
393+ EXPECT_EQ (r.virtual_size , slabs_size + resize_size + begins_size);
394+ EXPECT_EQ (r.resident_size , 0 );
395+
396+ auto count_cores = [&]() {
397+ int populated_cores = 0 ;
398+ for (int i = 0 ; i < num_cpus; i++) {
399+ if (cache.HasPopulated (i)) {
400+ populated_cores++;
401+ }
402+ }
403+ return populated_cores;
404+ };
405+
406+ EXPECT_EQ (0 , count_cores ());
407+
408+ int allowed_cpu_id;
409+ const size_t kSizeClass = 2 ;
410+ const size_t num_to_move =
411+ cache.forwarder ().num_objects_to_move (kSizeClass );
412+
413+ TransferCacheStats tc_stats =
414+ cache.forwarder ().transfer_cache ().GetStats (kSizeClass );
415+ EXPECT_EQ (tc_stats.remove_hits , 0 );
416+ EXPECT_EQ (tc_stats.remove_misses , 0 );
417+ EXPECT_EQ (tc_stats.remove_object_misses , 0 );
418+ EXPECT_EQ (tc_stats.insert_hits , 0 );
419+ EXPECT_EQ (tc_stats.insert_misses , 0 );
420+ EXPECT_EQ (tc_stats.insert_object_misses , 0 );
421+
422+ void * ptr;
423+ {
424+ // Restrict this thread to a single core while allocating and processing
425+ // the slow path.
426+ //
427+ // TODO(b/151313823): Without this restriction, we may access--for
428+ // reading only--other slabs if we end up being migrated. These may cause
429+ // huge pages to be faulted for those cores, leading to test flakiness.
430+ tcmalloc_internal::ScopedAffinityMask mask (
431+ tcmalloc_internal::AllowedCpus ()[0 ]);
432+ allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize ();
433+
434+ ptr = cache.Allocate (kSizeClass );
435+
436+ if (mask.Tampered () ||
437+ allowed_cpu_id !=
438+ subtle::percpu::TcmallocTest::VirtualCpuSynchronize ()) {
439+ return ;
397440 }
398441 }
399- return populated_cores;
400- };
442+ EXPECT_NE (ptr, nullptr );
443+ EXPECT_EQ (1 , count_cores ());
444+
445+ // We don't care if the transfer cache hit or missed, but the CPU cache
446+ // should have done the operation.
447+ tc_stats = cache.forwarder ().transfer_cache ().GetStats (kSizeClass );
448+ if ((tc_stats.remove_object_misses != num_to_move ||
449+ tc_stats.insert_hits + tc_stats.insert_misses != 0 ) &&
450+ attempt < kAttempts ) {
451+ // The operation didn't occur as expected, likely because we were
452+ // preempted but returned to the same core (otherwise Tampered would have
453+ // fired).
454+ //
455+ // The MSB of tcmalloc_slabs should be cleared to indicate we were
456+ // preempted. As of December 2024, Refill and its callees do not invoke
457+ // CacheCpuSlab. This check can spuriously pass if we're preempted
458+ // between the end of Allocate and now, rather than within Allocate, but
459+ // it ensures we do not silently break.
460+ EXPECT_EQ (subtle::percpu::tcmalloc_slabs & TCMALLOC_CACHED_SLABS_MASK, 0 );
461+
462+ cache.Deallocate (ptr, kSizeClass );
463+ cache.Deactivate ();
401464
402- EXPECT_EQ (0 , count_cores ());
465+ continue ;
466+ }
403467
404- int allowed_cpu_id;
405- const size_t kSizeClass = 2 ;
406- const size_t num_to_move = cache.forwarder ().num_objects_to_move (kSizeClass );
407- void * ptr;
408- {
409- // Restrict this thread to a single core while allocating and processing the
410- // slow path.
411- //
412- // TODO(b/151313823): Without this restriction, we may access--for reading
413- // only--other slabs if we end up being migrated. These may cause huge
414- // pages to be faulted for those cores, leading to test flakiness.
415- tcmalloc_internal::ScopedAffinityMask mask (
416- tcmalloc_internal::AllowedCpus ()[0 ]);
417- allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize ();
468+ EXPECT_EQ (tc_stats.remove_hits + tc_stats.remove_misses , 1 );
469+ EXPECT_EQ (tc_stats.remove_object_misses , num_to_move);
470+ EXPECT_EQ (tc_stats.insert_hits , 0 );
471+ EXPECT_EQ (tc_stats.insert_misses , 0 );
472+ EXPECT_EQ (tc_stats.insert_object_misses , 0 );
473+
474+ r = cache.MetadataMemoryUsage ();
475+ EXPECT_EQ (
476+ r.virtual_size ,
477+ resize_size + begins_size +
478+ subtle::percpu::GetSlabsAllocSize (
479+ subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus));
480+
481+ // We expect to fault in a single core, but we may end up faulting an
482+ // entire hugepage worth of memory when we touch that core and another when
483+ // touching the header.
484+ const size_t core_slab_size = r.virtual_size / num_cpus;
485+ const size_t upper_bound =
486+ ((core_slab_size + kHugePageSize - 1 ) & ~(kHugePageSize - 1 )) +
487+ kHugePageSize ;
488+
489+ // A single core may be less than the full slab (core_slab_size), since we
490+ // do not touch every page within the slab.
491+ EXPECT_GT (r.resident_size , 0 );
492+ EXPECT_LE (r.resident_size , upper_bound)
493+ << count_cores () << " " << core_slab_size << " " << kHugePageSize ;
494+
495+ // This test is much more sensitive to implementation details of the per-CPU
496+ // cache. It may need to be updated from time to time. These numbers were
497+ // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
498+ // measuring the resident size.
499+ switch (shift_bounds.max_shift ) {
500+ case 13 :
501+ EXPECT_GE (r.resident_size , 4096 );
502+ break ;
503+ case 19 :
504+ EXPECT_GE (r.resident_size , 8192 );
505+ break ;
506+ default :
507+ ASSUME (false );
508+ break ;
509+ }
418510
419- ptr = cache.Allocate (kSizeClass );
511+ // Read stats from the CPU caches. This should not impact resident_size.
512+ const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size ();
513+ size_t total_used_bytes = 0 ;
514+ for (int cpu = 0 ; cpu < num_cpus; ++cpu) {
515+ size_t used_bytes = cache.UsedBytes (cpu);
516+ total_used_bytes += used_bytes;
517+
518+ if (cpu == allowed_cpu_id) {
519+ EXPECT_GT (used_bytes, 0 );
520+ EXPECT_TRUE (cache.HasPopulated (cpu));
521+ } else {
522+ EXPECT_EQ (used_bytes, 0 );
523+ EXPECT_FALSE (cache.HasPopulated (cpu));
524+ }
420525
421- if (mask. Tampered () ||
422- allowed_cpu_id !=
423- subtle::percpu::TcmallocTest::VirtualCpuSynchronize ()) {
424- return ;
526+ EXPECT_LE (cache. Unallocated (cpu), max_cpu_cache_size);
527+ EXPECT_EQ (cache. Capacity (cpu), max_cpu_cache_size);
528+ EXPECT_EQ (cache. Allocated (cpu) + cache. Unallocated (cpu),
529+ cache. Capacity (cpu)) ;
425530 }
426- }
427- EXPECT_NE (ptr, nullptr );
428- EXPECT_EQ (1 , count_cores ());
429-
430- r = cache.MetadataMemoryUsage ();
431- EXPECT_EQ (
432- r.virtual_size ,
433- resize_size + begins_size +
434- subtle::percpu::GetSlabsAllocSize (
435- subtle::percpu::ToShiftType (shift_bounds.max_shift ), num_cpus));
436-
437- // We expect to fault in a single core, but we may end up faulting an
438- // entire hugepage worth of memory when we touch that core and another when
439- // touching the header.
440- const size_t core_slab_size = r.virtual_size / num_cpus;
441- const size_t upper_bound =
442- ((core_slab_size + kHugePageSize - 1 ) & ~(kHugePageSize - 1 )) +
443- kHugePageSize ;
444-
445- // A single core may be less than the full slab (core_slab_size), since we
446- // do not touch every page within the slab.
447- EXPECT_GT (r.resident_size , 0 );
448- EXPECT_LE (r.resident_size , upper_bound)
449- << count_cores () << " " << core_slab_size << " " << kHugePageSize ;
450-
451- // This test is much more sensitive to implementation details of the per-CPU
452- // cache. It may need to be updated from time to time. These numbers were
453- // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
454- // measuring the resident size.
455- switch (shift_bounds.max_shift ) {
456- case 13 :
457- EXPECT_GE (r.resident_size , 4096 );
458- break ;
459- case 19 :
460- EXPECT_GE (r.resident_size , 8192 );
461- break ;
462- default :
463- ASSUME (false );
464- break ;
465- }
466531
467- // Read stats from the CPU caches. This should not impact resident_size.
468- const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size ();
469- size_t total_used_bytes = 0 ;
470- for (int cpu = 0 ; cpu < num_cpus; ++cpu) {
471- size_t used_bytes = cache.UsedBytes (cpu);
472- total_used_bytes += used_bytes;
473-
474- if (cpu == allowed_cpu_id) {
475- EXPECT_GT (used_bytes, 0 );
476- EXPECT_TRUE (cache.HasPopulated (cpu));
477- } else {
478- EXPECT_EQ (used_bytes, 0 );
479- EXPECT_FALSE (cache.HasPopulated (cpu));
532+ for (int size_class = 1 ; size_class < kNumClasses ; ++size_class) {
533+ // This is sensitive to the current growth policies of CpuCache. It may
534+ // require updating from time-to-time.
535+ EXPECT_EQ (cache.TotalObjectsOfClass (size_class),
536+ (size_class == kSizeClass ? num_to_move - 1 : 0 ))
537+ << size_class;
480538 }
539+ EXPECT_EQ (cache.TotalUsedBytes (), total_used_bytes);
481540
482- EXPECT_LE (cache.Unallocated (cpu), max_cpu_cache_size);
483- EXPECT_EQ (cache.Capacity (cpu), max_cpu_cache_size);
484- EXPECT_EQ (cache.Allocated (cpu) + cache.Unallocated (cpu),
485- cache.Capacity (cpu));
486- }
541+ PerCPUMetadataState post_stats = cache.MetadataMemoryUsage ();
542+ // Confirm stats are within expected bounds.
543+ EXPECT_GT (post_stats.resident_size , 0 );
544+ EXPECT_LE (post_stats.resident_size , upper_bound) << count_cores ();
545+ // Confirm stats are unchanged.
546+ EXPECT_EQ (r.resident_size , post_stats.resident_size );
487547
488- for (int size_class = 1 ; size_class < kNumClasses ; ++size_class) {
489- // This is sensitive to the current growth policies of CpuCache. It may
490- // require updating from time-to-time.
491- EXPECT_EQ (cache.TotalObjectsOfClass (size_class),
492- (size_class == kSizeClass ? num_to_move - 1 : 0 ))
493- << size_class;
548+ // Tear down.
549+ cache.Deallocate (ptr, kSizeClass );
550+ cache.Deactivate ();
551+ break ;
494552 }
495- EXPECT_EQ (cache.TotalUsedBytes (), total_used_bytes);
496-
497- PerCPUMetadataState post_stats = cache.MetadataMemoryUsage ();
498- // Confirm stats are within expected bounds.
499- EXPECT_GT (post_stats.resident_size , 0 );
500- EXPECT_LE (post_stats.resident_size , upper_bound) << count_cores ();
501- // Confirm stats are unchanged.
502- EXPECT_EQ (r.resident_size , post_stats.resident_size );
503-
504- // Tear down.
505- cache.Deallocate (ptr, kSizeClass );
506- cache.Deactivate ();
507553}
508554
509555TEST (CpuCacheTest, CacheMissStats) {
0 commit comments