Skip to content

Commit

Permalink
Deflake CpuCacheTest.Metadata.
Browse files Browse the repository at this point in the history
If we are preempted during the allocation but returned to the same core,
our CPU mask may be unchanged (!Tampered()), but we fail to refill in full and
don't touch the slab.

To avoid this, make multiple attempts and confirm that we were preempted by
verifying the slab is in the uncached state on each failed attempt.

PiperOrigin-RevId: 703845482
Change-Id: I998a1015aa05d5a8ab1d7a468a9edb07e776d02b
  • Loading branch information
ckennelly authored and copybara-github committed Dec 7, 2024
1 parent 1e53d3f commit 253603d
Showing 1 changed file with 163 additions and 117 deletions.
280 changes: 163 additions & 117 deletions tcmalloc/cpu_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -375,135 +375,181 @@ TEST(CpuCacheTest, Metadata) {

const int num_cpus = NumCPUs();

CpuCache cache;
cache.Activate();

cpu_cache_internal::SlabShiftBounds shift_bounds =
cache.GetPerCpuSlabShiftBounds();

PerCPUMetadataState r = cache.MetadataMemoryUsage();
size_t slabs_size = subtle::percpu::GetSlabsAllocSize(
subtle::percpu::ToShiftType(shift_bounds.max_shift), num_cpus);
size_t resize_size = num_cpus * sizeof(bool);
size_t begins_size = kNumClasses * sizeof(std::atomic<uint16_t>);
EXPECT_EQ(r.virtual_size, slabs_size + resize_size + begins_size);
EXPECT_EQ(r.resident_size, 0);

auto count_cores = [&]() {
int populated_cores = 0;
for (int i = 0; i < num_cpus; i++) {
if (cache.HasPopulated(i)) {
populated_cores++;
const int kAttempts = 3;
for (int attempt = 1; attempt <= kAttempts; attempt++) {
SCOPED_TRACE(absl::StrCat("attempt=", attempt));

CpuCache cache;
cache.Activate();

cpu_cache_internal::SlabShiftBounds shift_bounds =
cache.GetPerCpuSlabShiftBounds();

PerCPUMetadataState r = cache.MetadataMemoryUsage();
size_t slabs_size = subtle::percpu::GetSlabsAllocSize(
subtle::percpu::ToShiftType(shift_bounds.max_shift), num_cpus);
size_t resize_size = num_cpus * sizeof(bool);
size_t begins_size = kNumClasses * sizeof(std::atomic<uint16_t>);
EXPECT_EQ(r.virtual_size, slabs_size + resize_size + begins_size);
EXPECT_EQ(r.resident_size, 0);

auto count_cores = [&]() {
int populated_cores = 0;
for (int i = 0; i < num_cpus; i++) {
if (cache.HasPopulated(i)) {
populated_cores++;
}
}
return populated_cores;
};

EXPECT_EQ(0, count_cores());

int allowed_cpu_id;
const size_t kSizeClass = 2;
const size_t num_to_move =
cache.forwarder().num_objects_to_move(kSizeClass);

TransferCacheStats tc_stats =
cache.forwarder().transfer_cache().GetStats(kSizeClass);
EXPECT_EQ(tc_stats.remove_hits, 0);
EXPECT_EQ(tc_stats.remove_misses, 0);
EXPECT_EQ(tc_stats.remove_object_misses, 0);
EXPECT_EQ(tc_stats.insert_hits, 0);
EXPECT_EQ(tc_stats.insert_misses, 0);
EXPECT_EQ(tc_stats.insert_object_misses, 0);

void* ptr;
{
// Restrict this thread to a single core while allocating and processing
// the slow path.
//
// TODO(b/151313823): Without this restriction, we may access--for
// reading only--other slabs if we end up being migrated. These may cause
// huge pages to be faulted for those cores, leading to test flakiness.
tcmalloc_internal::ScopedAffinityMask mask(
tcmalloc_internal::AllowedCpus()[0]);
allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize();

ptr = cache.Allocate(kSizeClass);

if (mask.Tampered() ||
allowed_cpu_id !=
subtle::percpu::TcmallocTest::VirtualCpuSynchronize()) {
return;
}
}
return populated_cores;
};
EXPECT_NE(ptr, nullptr);
EXPECT_EQ(1, count_cores());

// We don't care if the transfer cache hit or missed, but the CPU cache
// should have done the operation.
tc_stats = cache.forwarder().transfer_cache().GetStats(kSizeClass);
if ((tc_stats.remove_object_misses != num_to_move ||
tc_stats.insert_hits + tc_stats.insert_misses != 0) &&
attempt < kAttempts) {
// The operation didn't occur as expected, likely because we were
// preempted but returned to the same core (otherwise Tampered would have
// fired).
//
// The MSB of tcmalloc_slabs should be cleared to indicate we were
// preempted. As of December 2024, Refill and its callees do not invoke
// CacheCpuSlab. This check can spuriously pass if we're preempted
// between the end of Allocate and now, rather than within Allocate, but
// it ensures we do not silently break.
EXPECT_EQ(subtle::percpu::tcmalloc_slabs & TCMALLOC_CACHED_SLABS_MASK, 0);

cache.Deallocate(ptr, kSizeClass);
cache.Deactivate();

EXPECT_EQ(0, count_cores());
continue;
}

int allowed_cpu_id;
const size_t kSizeClass = 2;
const size_t num_to_move = cache.forwarder().num_objects_to_move(kSizeClass);
void* ptr;
{
// Restrict this thread to a single core while allocating and processing the
// slow path.
//
// TODO(b/151313823): Without this restriction, we may access--for reading
// only--other slabs if we end up being migrated. These may cause huge
// pages to be faulted for those cores, leading to test flakiness.
tcmalloc_internal::ScopedAffinityMask mask(
tcmalloc_internal::AllowedCpus()[0]);
allowed_cpu_id = subtle::percpu::TcmallocTest::VirtualCpuSynchronize();
EXPECT_EQ(tc_stats.remove_hits + tc_stats.remove_misses, 1);
EXPECT_EQ(tc_stats.remove_object_misses, num_to_move);
EXPECT_EQ(tc_stats.insert_hits, 0);
EXPECT_EQ(tc_stats.insert_misses, 0);
EXPECT_EQ(tc_stats.insert_object_misses, 0);

r = cache.MetadataMemoryUsage();
EXPECT_EQ(
r.virtual_size,
resize_size + begins_size +
subtle::percpu::GetSlabsAllocSize(
subtle::percpu::ToShiftType(shift_bounds.max_shift), num_cpus));

// We expect to fault in a single core, but we may end up faulting an
// entire hugepage worth of memory when we touch that core and another when
// touching the header.
const size_t core_slab_size = r.virtual_size / num_cpus;
const size_t upper_bound =
((core_slab_size + kHugePageSize - 1) & ~(kHugePageSize - 1)) +
kHugePageSize;

// A single core may be less than the full slab (core_slab_size), since we
// do not touch every page within the slab.
EXPECT_GT(r.resident_size, 0);
EXPECT_LE(r.resident_size, upper_bound)
<< count_cores() << " " << core_slab_size << " " << kHugePageSize;

// This test is much more sensitive to implementation details of the per-CPU
// cache. It may need to be updated from time to time. These numbers were
// calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
// measuring the resident size.
switch (shift_bounds.max_shift) {
case 13:
EXPECT_GE(r.resident_size, 4096);
break;
case 19:
EXPECT_GE(r.resident_size, 8192);
break;
default:
ASSUME(false);
break;
}

ptr = cache.Allocate(kSizeClass);
// Read stats from the CPU caches. This should not impact resident_size.
const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size();
size_t total_used_bytes = 0;
for (int cpu = 0; cpu < num_cpus; ++cpu) {
size_t used_bytes = cache.UsedBytes(cpu);
total_used_bytes += used_bytes;

if (cpu == allowed_cpu_id) {
EXPECT_GT(used_bytes, 0);
EXPECT_TRUE(cache.HasPopulated(cpu));
} else {
EXPECT_EQ(used_bytes, 0);
EXPECT_FALSE(cache.HasPopulated(cpu));
}

if (mask.Tampered() ||
allowed_cpu_id !=
subtle::percpu::TcmallocTest::VirtualCpuSynchronize()) {
return;
EXPECT_LE(cache.Unallocated(cpu), max_cpu_cache_size);
EXPECT_EQ(cache.Capacity(cpu), max_cpu_cache_size);
EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu),
cache.Capacity(cpu));
}
}
EXPECT_NE(ptr, nullptr);
EXPECT_EQ(1, count_cores());

r = cache.MetadataMemoryUsage();
EXPECT_EQ(
r.virtual_size,
resize_size + begins_size +
subtle::percpu::GetSlabsAllocSize(
subtle::percpu::ToShiftType(shift_bounds.max_shift), num_cpus));

// We expect to fault in a single core, but we may end up faulting an
// entire hugepage worth of memory when we touch that core and another when
// touching the header.
const size_t core_slab_size = r.virtual_size / num_cpus;
const size_t upper_bound =
((core_slab_size + kHugePageSize - 1) & ~(kHugePageSize - 1)) +
kHugePageSize;

// A single core may be less than the full slab (core_slab_size), since we
// do not touch every page within the slab.
EXPECT_GT(r.resident_size, 0);
EXPECT_LE(r.resident_size, upper_bound)
<< count_cores() << " " << core_slab_size << " " << kHugePageSize;

// This test is much more sensitive to implementation details of the per-CPU
// cache. It may need to be updated from time to time. These numbers were
// calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
// measuring the resident size.
switch (shift_bounds.max_shift) {
case 13:
EXPECT_GE(r.resident_size, 4096);
break;
case 19:
EXPECT_GE(r.resident_size, 8192);
break;
default:
ASSUME(false);
break;
}

// Read stats from the CPU caches. This should not impact resident_size.
const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size();
size_t total_used_bytes = 0;
for (int cpu = 0; cpu < num_cpus; ++cpu) {
size_t used_bytes = cache.UsedBytes(cpu);
total_used_bytes += used_bytes;

if (cpu == allowed_cpu_id) {
EXPECT_GT(used_bytes, 0);
EXPECT_TRUE(cache.HasPopulated(cpu));
} else {
EXPECT_EQ(used_bytes, 0);
EXPECT_FALSE(cache.HasPopulated(cpu));
for (int size_class = 1; size_class < kNumClasses; ++size_class) {
// This is sensitive to the current growth policies of CpuCache. It may
// require updating from time-to-time.
EXPECT_EQ(cache.TotalObjectsOfClass(size_class),
(size_class == kSizeClass ? num_to_move - 1 : 0))
<< size_class;
}
EXPECT_EQ(cache.TotalUsedBytes(), total_used_bytes);

EXPECT_LE(cache.Unallocated(cpu), max_cpu_cache_size);
EXPECT_EQ(cache.Capacity(cpu), max_cpu_cache_size);
EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu),
cache.Capacity(cpu));
}
PerCPUMetadataState post_stats = cache.MetadataMemoryUsage();
// Confirm stats are within expected bounds.
EXPECT_GT(post_stats.resident_size, 0);
EXPECT_LE(post_stats.resident_size, upper_bound) << count_cores();
// Confirm stats are unchanged.
EXPECT_EQ(r.resident_size, post_stats.resident_size);

for (int size_class = 1; size_class < kNumClasses; ++size_class) {
// This is sensitive to the current growth policies of CpuCache. It may
// require updating from time-to-time.
EXPECT_EQ(cache.TotalObjectsOfClass(size_class),
(size_class == kSizeClass ? num_to_move - 1 : 0))
<< size_class;
// Tear down.
cache.Deallocate(ptr, kSizeClass);
cache.Deactivate();
break;
}
EXPECT_EQ(cache.TotalUsedBytes(), total_used_bytes);

PerCPUMetadataState post_stats = cache.MetadataMemoryUsage();
// Confirm stats are within expected bounds.
EXPECT_GT(post_stats.resident_size, 0);
EXPECT_LE(post_stats.resident_size, upper_bound) << count_cores();
// Confirm stats are unchanged.
EXPECT_EQ(r.resident_size, post_stats.resident_size);

// Tear down.
cache.Deallocate(ptr, kSizeClass);
cache.Deactivate();
}

TEST(CpuCacheTest, CacheMissStats) {
Expand Down

0 comments on commit 253603d

Please sign in to comment.