File tree Expand file tree Collapse file tree 4 files changed +23
-2
lines changed Expand file tree Collapse file tree 4 files changed +23
-2
lines changed Original file line number Diff line number Diff line change @@ -808,6 +808,7 @@ if(BUILD_NVFUSER_BENCHMARK)
808
808
target_link_libraries(nvfuser_bench PRIVATE
809
809
benchmark::benchmark
810
810
codegen_internal
811
+ GTest::gtest
811
812
)
812
813
add_dependencies(nvfuser_bench flatc build_flatbuffer_config)
813
814
@@ -845,6 +846,7 @@ if(BUILD_NVFUSER_BENCHMARK)
845
846
target_link_libraries(nvfuser_multidevice_bench PRIVATE
846
847
benchmark::benchmark
847
848
codegen_internal
849
+ GTest::gtest
848
850
)
849
851
add_dependencies(nvfuser_multidevice_bench flatc build_flatbuffer_config)
850
852
Original file line number Diff line number Diff line change @@ -46,10 +46,13 @@ void NVFuserTest::SetUp() {
46
46
EnableOptionsGuard::getCurOptions ().set (EnableOption::IdModelExtraValidation);
47
47
48
48
resetPeakMemoryStats (0 );
49
- NVF_ERROR_EQ (maxMemoryAllocated (0 ), 0 );
49
+ ASSERT_EQ (maxMemoryAllocated (0 ), 0 );
50
50
}
51
51
52
52
void NVFuserTest::TearDown () {
53
+ at::cuda::clearCublasWorkspaces ();
54
+
55
+ ASSERT_EQ (memoryAllocated (0 ), 0 );
53
56
if (::testing::Test::HasFailure ()) {
54
57
auto test_info = ::testing::UnitTest::GetInstance ()->current_test_info ();
55
58
std::cerr << " To reproduce: NVFUSER_TEST_RANDOM_SEED=" << getCRandomSeed ()
@@ -930,4 +933,17 @@ int64_t maxMemoryAllocated(const c10::DeviceIndex device) {
930
933
.peak ;
931
934
}
932
935
936
+ int64_t memoryAllocated (const c10::DeviceIndex device) {
937
+ c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator =
938
+ c10::cuda::CUDACachingAllocator::get ();
939
+ NVF_CHECK (allocator != nullptr );
940
+
941
+ c10::CachingDeviceAllocator::DeviceStats device_stats =
942
+ allocator->getDeviceStats (device);
943
+
944
+ return device_stats.allocated_bytes
945
+ .at (static_cast <uint64_t >(c10::CachingAllocator::StatType::AGGREGATE))
946
+ .current ;
947
+ }
948
+
933
949
} // namespace nvfuser
Original file line number Diff line number Diff line change @@ -847,4 +847,7 @@ void resetPeakMemoryStats(c10::DeviceIndex device);
847
847
// C++ implementation of torch.cuda.max_memory_allocated
848
848
int64_t maxMemoryAllocated (const c10::DeviceIndex device);
849
849
850
+ // C++ implementation of torch.cuda.memory_allocated
851
+ int64_t memoryAllocated (const c10::DeviceIndex device);
852
+
850
853
} // namespace nvfuser
Original file line number Diff line number Diff line change 6
6
*/
7
7
// clang-format on
8
8
#include < runtime/fusion_kernel_runtime.h>
9
+ #include < tests/cpp/utils.h>
9
10
#include < tests/cpp/validator.h>
10
-
11
11
#include < validator_utils.h>
12
12
13
13
namespace nvfuser {
You can’t perform that action at this time.
0 commit comments