Skip to content

Commit 78191c9

Browse files
committed
WIP
1 parent 0ba96a0 commit 78191c9

File tree

4 files changed

+23
-2
lines changed

4 files changed

+23
-2
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,7 @@ if(BUILD_NVFUSER_BENCHMARK)
808808
target_link_libraries(nvfuser_bench PRIVATE
809809
benchmark::benchmark
810810
codegen_internal
811+
GTest::gtest
811812
)
812813
add_dependencies(nvfuser_bench flatc build_flatbuffer_config)
813814
@@ -845,6 +846,7 @@ if(BUILD_NVFUSER_BENCHMARK)
845846
target_link_libraries(nvfuser_multidevice_bench PRIVATE
846847
benchmark::benchmark
847848
codegen_internal
849+
GTest::gtest
848850
)
849851
add_dependencies(nvfuser_multidevice_bench flatc build_flatbuffer_config)
850852

tests/cpp/utils.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,13 @@ void NVFuserTest::SetUp() {
4646
EnableOptionsGuard::getCurOptions().set(EnableOption::IdModelExtraValidation);
4747

4848
resetPeakMemoryStats(0);
49-
NVF_ERROR_EQ(maxMemoryAllocated(0), 0);
49+
ASSERT_EQ(maxMemoryAllocated(0), 0);
5050
}
5151

5252
void NVFuserTest::TearDown() {
53+
at::cuda::clearCublasWorkspaces();
54+
55+
ASSERT_EQ(memoryAllocated(0), 0);
5356
if (::testing::Test::HasFailure()) {
5457
auto test_info = ::testing::UnitTest::GetInstance()->current_test_info();
5558
std::cerr << "To reproduce: NVFUSER_TEST_RANDOM_SEED=" << getCRandomSeed()
@@ -930,4 +933,17 @@ int64_t maxMemoryAllocated(const c10::DeviceIndex device) {
930933
.peak;
931934
}
932935

936+
int64_t memoryAllocated(const c10::DeviceIndex device) {
937+
c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator =
938+
c10::cuda::CUDACachingAllocator::get();
939+
NVF_CHECK(allocator != nullptr);
940+
941+
c10::CachingDeviceAllocator::DeviceStats device_stats =
942+
allocator->getDeviceStats(device);
943+
944+
return device_stats.allocated_bytes
945+
.at(static_cast<uint64_t>(c10::CachingAllocator::StatType::AGGREGATE))
946+
.current;
947+
}
948+
933949
} // namespace nvfuser

tests/cpp/utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,4 +847,7 @@ void resetPeakMemoryStats(c10::DeviceIndex device);
847847
// C++ implementation of torch.cuda.max_memory_allocated
848848
int64_t maxMemoryAllocated(const c10::DeviceIndex device);
849849

850+
// C++ implementation of torch.cuda.memory_allocated
851+
int64_t memoryAllocated(const c10::DeviceIndex device);
852+
850853
} // namespace nvfuser

tests/cpp/validator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
*/
77
// clang-format on
88
#include <runtime/fusion_kernel_runtime.h>
9+
#include <tests/cpp/utils.h>
910
#include <tests/cpp/validator.h>
10-
1111
#include <validator_utils.h>
1212

1313
namespace nvfuser {

0 commit comments

Comments
 (0)