DTolm
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_half.cu
Lines changed: 1 addition & 1 deletion b/‎benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_half.cu
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_r2c.cu
Lines changed: 1 addition & 1 deletion b/‎benchmark_scripts/cuFFT_scripts/src/precision_cuFFT_r2c.cu
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark_scripts/rocFFT_scripts/src/precision_rocFFT_r2c.cpp
Lines changed: 1 addition & 1 deletion b/‎benchmark_scripts/rocFFT_scripts/src/precision_rocFFT_r2c.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
Lines changed: 12 additions & 2 deletions b/‎benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
Lines changed: 12 additions & 2 deletions
diff --git a/‎benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
Lines changed: 15 additions & 3 deletions b/‎benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
Lines changed: 15 additions & 3 deletions
@@ -6,10 +6,10 @@ VkFFT is an efficient GPU-accelerated multidimensional Fast Fourier Transform li
 ## Currently supported features:
   - 1D/2D/3D/ND systems - specify VKFFT_MAX_FFT_DIMENSIONS for arbitrary number of dimensions.
   - Forward and inverse directions of FFT.
-  - Support for big FFT dimension sizes. Current limits: C2C or even C2R/R2C - (2^32, 2^32, 2^32).  Odd C2R/R2C - (2^12, 2^32, 2^32). R2R - (2^12, 2^12, 2^12). Depends on the amount of shared memory on the device. (will be increased later).
+  - Support for big FFT dimension sizes. Current limits: approximately 2^32 in all dimensions for all types of transforms. Depends on the amount of shared memory available on the device.
   - Radix-2/3/4/5/7/8/11/13 FFT. Sequences using radix 3, 5, 7, 11 and 13 have comparable performance to that of powers of 2.
   - Rader's FFT algorithm for primes from 17 up to max shared memory length (~10000). Inlined and done without additional memory transfers.
-  - Bluestein's FFT algorithm for all other sequences. Full coverage of C2C range, single upload (2^12, 2^12, 2^12) for R2C/C2R/R2R. Optimized to have as few memory transfers as possible by using zero padding and merged convolution support of VkFFT.
+  - Bluestein's FFT algorithm for all other sequences. Optimized to have as few memory transfers as possible by using zero padding and merged convolution support of VkFFT.
   - Single, double, half and quad (double-double) precision support. Double and quad precision uses CPU-generated LUT tables. Half precision still does all computations in single and only uses half precision to store data.
   - All transformations are performed in-place with no performance loss. Out-of-place transforms are supported by selecting different input/output buffers.
   - No additional transposition uploads. Note: Data can be reshuffled after the Four Step FFT algorithm with an additional buffer (for big sequences). Doesn't matter for convolutions - they return to the input ordering (saves memory).
 
@@ -51,7 +51,7 @@ void launch_precision_cuFFT_half(void* inputC, void* output_cuFFT, int device_id
 		break;
 	}
 	res = cufftXtMakePlanMany(
-		planHalf, dims[3], local_dims, NULL, 1, 1, CUDA_C_16F,
+		planHalf, dims[4], local_dims, NULL, 1, 1, CUDA_C_16F,
 		NULL, 1, 1, CUDA_C_16F, 1, &ws, CUDA_C_16F);
 
 	for (int i = 0; i < 1; i++) {
 
@@ -45,7 +45,7 @@ void launch_precision_cuFFT_r2c(void* inputC, void* output_cuFFT, int device_id,
 	}
 	cudaDeviceSynchronize();
 	cufftDestroy(planR2C);
-	switch (dims[3]) {
+	switch (dims[4]) {
 	case 1:
 		cufftPlan1d(&planR2C, dims[0], CUFFT_C2R, 1);
 		break;
 
@@ -44,7 +44,7 @@ void launch_precision_rocFFT_r2c(void* inputC, void* output_rocFFT, int device_i
 	}
 	hipDeviceSynchronize();
 	hipfftDestroy(planR2C);
-	switch (dims[3]) {
+	switch (dims[4]) {
 	case 1:
 		hipfftPlan1d(&planR2C, dims[0], HIPFFT_C2R, 1);
 		break;
 
@@ -75,7 +75,10 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 	const int num_benchmark_samples = 277;
 	const int num_runs = 1;
 
-	uint64_t benchmark_dimensions[num_benchmark_samples][5] = { {1, 1, 1, 1, 3}, {(uint64_t)pow(2,5), 1, 1, 1, 1}, {(uint64_t)pow(2,6), 1, 1, 1, 1},{(uint64_t)pow(2,7), 1, 1, 1, 1},{(uint64_t)pow(2,8), 1, 1, 1, 1},{(uint64_t)pow(2,9), 1, 1, 1, 1},{(uint64_t)pow(2,10), 1, 1, 1, 1},
+	const int num_benchmark_samples = 318;
+	const int num_runs = 1;
+
+	uint64_t benchmark_dimensions[num_benchmark_samples][5] = { {1, 1, 1, 1, 1}, {(uint64_t)pow(2,5), 1, 1, 1, 1}, {(uint64_t)pow(2,6), 1, 1, 1, 1},{(uint64_t)pow(2,7), 1, 1, 1, 1},{(uint64_t)pow(2,8), 1, 1, 1, 1},{(uint64_t)pow(2,9), 1, 1, 1, 1},{(uint64_t)pow(2,10), 1, 1, 1, 1},
 		{(uint64_t)pow(2,11), 1, 1, 1, 1},{(uint64_t)pow(2,12), 1, 1, 1, 1},{(uint64_t)pow(2,13), 1, 1, 1, 1},{(uint64_t)pow(2,14), 1, 1, 1, 1},{(uint64_t)pow(2,15), 1, 1, 1, 1},{(uint64_t)pow(2,16), 1, 1, 1, 1},{(uint64_t)pow(2,17), 1, 1, 1, 1},{(uint64_t)pow(2,18), 1, 1, 1, 1},
 		{(uint64_t)pow(2,19), 1, 1, 1, 1},{(uint64_t)pow(2,20), 1, 1, 1, 1},{(uint64_t)pow(2,21), 1, 1, 1, 1},{(uint64_t)pow(2,22), 1, 1, 1, 1},{(uint64_t)pow(2,23), 1, 1, 1, 1},{(uint64_t)pow(2,24), 1, 1, 1, 1},{(uint64_t)pow(2,25), 1, 1, 1, 1},{(uint64_t)pow(2,26), 1, 1, 1, 1},
 
@@ -96,7 +99,14 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 		{3, 8, 1, 1, 2},{5, 8, 1, 1, 2},{6, 8, 1, 1, 2},{7, 8, 1, 1, 2},{9, 8, 1, 1, 2},{10, 8, 1, 1, 2},{18, 8, 1, 1, 2},{12, 8, 1, 1, 2},{13, 8, 1, 1, 2},{14, 8, 1, 1, 2},
 		{15, 8, 1, 1, 2},{17, 8, 1, 1, 2},{19, 8, 1, 1, 2},{21, 8, 1, 1, 2},{22, 8, 1, 1, 2},{23, 8, 1, 1, 2},{24, 8, 1, 1, 2},{25, 8, 1, 1, 2},{26, 8, 1, 1, 2},{27, 8, 1, 1, 2},{28, 8, 1, 1, 2},{29, 8, 1, 1, 2},{30, 8, 1, 1, 2},{31, 8, 1, 1, 2},{33, 8, 1, 1, 2},{34, 8, 1, 1, 2},{35, 8, 1, 1, 2},{37, 8, 1, 1, 2},{39, 8, 1, 1, 2},{41, 8, 1, 1, 2},{42, 8, 1, 1, 2},{44, 8, 1, 1, 2},{45, 8, 1, 1, 2},{46, 8, 1, 1, 2},{49, 8, 1, 1, 2},{52, 8, 1, 1, 2},{55, 8, 1, 1, 2},{56, 8, 1, 1, 2},{60, 8, 1, 1, 2},{65, 8, 1, 1, 2},{66, 8, 1, 1, 2},{88, 8, 1, 1, 2},
 		{128, 8, 1, 1, 2},{125, 8, 1, 1, 2},{143, 8, 1, 1, 2},{149, 8, 1, 1, 2},{169, 8, 1, 1, 2},{229, 8, 1, 1, 2},{243, 8, 1, 1, 2},{286, 8, 1, 1, 2},{343, 8, 1, 1, 2},{429, 8, 1, 1, 2},{482, 1, 1, 1, 1},{572, 8, 1, 1, 2},{625, 8, 1, 1, 2},{720, 8, 1, 1, 2},{1080, 8, 1, 1, 2},{1008, 8, 1, 1, 2},{1181, 8, 1, 1, 2},{1287, 8, 1, 1, 2},{1400, 8, 1, 1, 2},{1440, 8, 1, 1, 2},{1920, 8, 1, 1, 2},{1951, 8, 1, 1, 2},{2160, 8, 1, 1, 2},{3024,1,1, 1, 1},{3500,1,1, 1, 1},
-		{3840, 8, 1, 1, 2},{4000 , 8, 1, 1, 2},{4050, 8, 1, 1, 2},{4320 , 8, 1, 1, 2},{4864 , 8, 1, 1, 2},{7000,1,1, 1, 1},{7680, 8, 1, 1, 2},{9000, 8, 1, 1, 2},{4864 * 5, 8, 1, 1, 2},{7680 * 5, 8, 1, 1, 2},{39829*2, 8, 1, 1, 2},
+		{3840, 8, 1, 1, 2},{4000 , 8, 1, 1, 2},{4050, 8, 1, 1, 2},{4320 , 8, 1, 1, 2},{4864 , 8, 1, 1, 2},{7000,1,1, 1, 1}, {9000, 1, 1, 1, 1},{11587, 1, 1, 1, 1},{7680 * 5, 1, 1, 1, 1},
+		{15319, 1, 1, 1, 1},{21269, 1, 1, 1, 1},{27283, 1, 1, 1, 1},{39829, 1, 1, 1, 1},{52733, 1, 1, 1, 1},{2000083, 1, 1, 1, 1},{4000067, 1, 1, 1, 1},{8003869, 1, 1, 1, 1},
+		{(uint64_t)pow(3,10), 1, 1, 1, 1},{(uint64_t)pow(3,11), 1, 1, 1, 1},{(uint64_t)pow(3,12), 1, 1, 1, 1},{(uint64_t)pow(3,13), 1, 1, 1, 1},{(uint64_t)pow(3,14), 1, 1, 1, 1},{(uint64_t)pow(3,15), 1, 1, 1, 1},
+		{(uint64_t)pow(5,5), 1, 1, 1, 1},{(uint64_t)pow(5,6), 1, 1, 1, 1},{(uint64_t)pow(5,7), 1, 1, 1, 1},{(uint64_t)pow(5,8), 1, 1, 1, 1},{(uint64_t)pow(5,9), 1, 1, 1, 1},
+		{(uint64_t)pow(7,4), 1, 1, 1, 1},{(uint64_t)pow(7,5), 1, 1, 1, 1},{(uint64_t)pow(7,6), 1, 1, 1, 1},{(uint64_t)pow(7,7), 1, 1, 1, 1},{(uint64_t)pow(7,8), 1, 1, 1, 1},
+		{(uint64_t)pow(11,3), 1, 1, 1, 1},{(uint64_t)pow(11,4), 1, 1, 1, 1},{(uint64_t)pow(11,5), 1, 1, 1, 1},{(uint64_t)pow(11,6), 1, 1, 1, 1},
+		{(uint64_t)pow(13,3), 1, 1, 1, 1},{(uint64_t)pow(13,4), 1, 1, 1, 1},{(uint64_t)pow(13,5), 1, 1, 1, 1},{(uint64_t)pow(13,6), 1, 1, 1, 1},
+		{7680, 8, 1, 1, 2},{9000, 8, 1, 1, 2},{4864 * 5, 8, 1, 1, 2},{7680 * 5, 8, 1, 1, 2},{39829*2, 8, 1, 1, 2}, {15319, 8, 1, 1, 2},{21269, 8, 1, 1, 2},{27283, 8, 1, 1, 2},{39829, 8, 1, 1, 2},{52733, 8, 1, 1, 2},{2000083, 4, 1, 1, 2},
 		{3, 3, 3, 3, 4},{5, 5, 5, 5, 4},{6, 6, 6, 6, 4},{7, 7, 7, 7, 4},{9, 9, 9, 9, 4},{10, 10, 10, 10, 4},{11, 11, 11, 11, 4},{12, 12, 12, 12, 4},{13, 13, 13, 13, 4},{14, 14, 14, 14, 4},
 		{15, 15, 15, 15, 4},{17, 17, 17, 17, 4},{21, 21, 21, 21, 4},{22, 22, 22, 22, 4},{23, 23, 23, 23, 4},{24, 24, 24, 24, 4},{25, 25, 25, 25, 4},{26, 26, 26, 26, 4},{27, 27, 27, 27, 4},{28, 28, 28, 28, 4},{29, 29, 29, 29, 4},{30, 30, 30, 30, 4},{31, 31, 31, 31, 4},{33, 33, 33, 33, 4},{35, 35, 35, 35, 4},{37, 37, 37, 37, 4},{39, 39, 39, 39, 4},{41, 41, 41, 41, 4},{42, 42, 42, 42, 4},{43, 43, 43, 43, 4},{44, 44, 44, 44, 4},{45, 45, 45, 45, 4},{47, 47, 47, 47, 4},{49, 49, 49, 49, 4},{52, 52, 52, 52, 4},{53, 53, 53, 53, 4},{56, 56, 56, 56, 4},{59, 59, 59, 59, 4},{60, 60, 60, 60, 4},{61, 61, 61, 61, 4},{81, 81, 81, 81, 4},
 		{3, 5, 7, 9, 4},{5, 3, 7, 9, 4},{9, 7, 5, 3, 4},{23, 25, 27, 29, 4},{25, 23, 27, 29, 4},{29, 27, 25, 23, 4},{123, 25, 127, 129, 4},{125, 123, 27, 129, 4},{129, 127, 125, 23, 4},
 
@@ -72,16 +72,28 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 		fprintf(output, "16 - VkFFT/FFTW R2R DCT-I, II, III and IV precision test in single precision\n");
 	printf("16 - VkFFT/FFTW R2R DCT-I, II, III and IV precision test in single precision\n");
 
-	const int num_benchmark_samples = 235;
+	const int num_benchmark_samples = 318;
 	const int num_runs = 1;
 
 	uint64_t benchmark_dimensions[num_benchmark_samples][5] = { {2, 1, 1, 1, 1},{3, 1, 1, 1, 1},{5, 1, 1, 1, 1},{6, 1, 1, 1, 1},{7, 1, 1, 1, 1},{8, 1, 1, 1, 1},{9, 1, 1, 1, 1},{10, 1, 1, 1, 1},{11, 1, 1, 1, 1},{12, 1, 1, 1, 1},{13, 1, 1, 1, 1},{14, 1, 1, 1, 1},
 		{15, 1, 1, 1, 1},{16, 1, 1, 1, 1},{17, 1, 1, 1, 1},{21, 1, 1, 1, 1},{22, 1, 1, 1, 1},{23, 1, 1, 1, 1},{24, 1, 1, 1, 1},{25, 1, 1, 1, 1},{26, 1, 1, 1, 1},{27, 1, 1, 1, 1},{28, 1, 1, 1, 1},{29, 1, 1, 1, 1},{30, 1, 1, 1, 1},{31, 1, 1, 1, 1},{32, 1, 1, 1, 1},{33, 1, 1, 1, 1},{34, 1, 1, 1, 1},{35, 1, 1, 1, 1},{37, 1, 1, 1, 1},{38, 1, 1, 1, 1},{39, 1, 1, 1, 1},{41, 1, 1, 1, 1},{42, 1, 1, 1, 1},{44, 1, 1, 1, 1},{45, 1, 1, 1, 1},{46, 1, 1, 1, 1},{49, 1, 1, 1, 1},{52, 1, 1, 1, 1},{55, 1, 1, 1, 1},{56, 1, 1, 1, 1},{58, 1, 1, 1, 1},{60, 1, 1, 1, 1},{64, 1, 1, 1, 1},{65, 1, 1, 1, 1},{66, 1, 1, 1, 1},{81, 1, 1, 1, 1},
 		{121, 1, 1, 1, 1},{125, 1, 1, 1, 1},{128, 1, 1, 1, 1},{143, 1, 1, 1, 1},{146, 1, 1, 1, 1}, {169, 1, 1, 1, 1},{243, 1, 1, 1, 1},{256, 1, 1, 1, 1},{283, 1, 1, 1, 1},{286, 1, 1, 1, 1},{343, 1, 1, 1, 1},{429, 1, 1, 1, 1},{512, 1, 1, 1, 1},{572, 1, 1, 1, 1},{625, 1, 1, 1, 1},{720, 1, 1, 1, 1},{1024, 1, 1, 1, 1},{1080, 1, 1, 1, 1},{1001, 1, 1, 1, 1},{1229, 1, 1, 1, 1},{1287, 1, 1, 1, 1},{1400, 1, 1, 1, 1},{1440, 1, 1, 1, 1},{1676, 1, 1, 1, 1},{1920, 1, 1, 1, 1},{2048, 1, 1, 1, 1},{2160, 1, 1, 1, 1},{3024,1,1, 1, 1},{3500,1,1, 1, 1},
-		{3840, 1, 1, 1, 1},{4000 , 1, 1, 1, 1},{4050, 1, 1, 1, 1},{4096 , 1, 1, 1, 1},
+		{3840, 1, 1, 1, 1},{4000 , 1, 1, 1, 1},{4050, 1, 1, 1, 1},{4096 , 1, 1, 1, 1} ,{4391, 1, 1, 1, 1},{7000,1,1, 1, 1},{7680, 1, 1, 1, 1},{7879, 1, 1, 1, 1},{9000, 1, 1, 1, 1},{11587, 1, 1, 1, 1},{7680 * 5, 1, 1, 1, 1},
+		{15319, 1, 1, 1, 1},{21269, 1, 1, 1, 1},{27283, 1, 1, 1, 1},{39829, 1, 1, 1, 1},{52733, 1, 1, 1, 1},{2000083, 1, 1, 1, 1},{4000067, 1, 1, 1, 1},{8003869, 1, 1, 1, 1},
+		{(uint64_t)pow(3,10), 1, 1, 1, 1},{(uint64_t)pow(3,11), 1, 1, 1, 1},{(uint64_t)pow(3,12), 1, 1, 1, 1},{(uint64_t)pow(3,13), 1, 1, 1, 1},{(uint64_t)pow(3,14), 1, 1, 1, 1},{(uint64_t)pow(3,15), 1, 1, 1, 1},
+		{(uint64_t)pow(5,5), 1, 1, 1, 1},{(uint64_t)pow(5,6), 1, 1, 1, 1},{(uint64_t)pow(5,7), 1, 1, 1, 1},{(uint64_t)pow(5,8), 1, 1, 1, 1},{(uint64_t)pow(5,9), 1, 1, 1, 1},
+		{(uint64_t)pow(7,4), 1, 1, 1, 1},{(uint64_t)pow(7,5), 1, 1, 1, 1},{(uint64_t)pow(7,6), 1, 1, 1, 1},{(uint64_t)pow(7,7), 1, 1, 1, 1},{(uint64_t)pow(7,8), 1, 1, 1, 1},
+		{(uint64_t)pow(11,3), 1, 1, 1, 1},{(uint64_t)pow(11,4), 1, 1, 1, 1},{(uint64_t)pow(11,5), 1, 1, 1, 1},{(uint64_t)pow(11,6), 1, 1, 1, 1},
+		{(uint64_t)pow(13,3), 1, 1, 1, 1},{(uint64_t)pow(13,4), 1, 1, 1, 1},{(uint64_t)pow(13,5), 1, 1, 1, 1},{(uint64_t)pow(13,6), 1, 1, 1, 1},
 		{8, 3, 1, 1, 2},{8, 5, 1, 1, 2},{8, 6, 1, 1, 2},{8, 7, 1, 1, 2},{8, 8, 1, 1, 2},{8, 9, 1, 1, 2},{8, 10, 1, 1, 2},{8, 11, 1, 1, 2},{8, 12, 1, 1, 2},{8, 13, 1, 1, 2},{8, 14, 1, 1, 2},{8, 15, 1, 1, 2},{8, 16, 1, 1, 2},{8, 17, 1, 1, 2},{8, 21, 1, 1, 2},{8, 22, 1, 1, 2},{8, 23, 1, 1, 2},{8, 24, 1, 1, 2},
 		{8, 25, 1, 1, 2},{8, 26, 1, 1, 2},{8, 27, 1, 1, 2},{8, 28, 1, 1, 2},{8, 29, 1, 1, 2},{8, 30, 1, 1, 2},{8, 31, 1, 1, 2},{8, 32, 1, 1, 2},{8, 33, 1, 1, 2},{8, 34, 1, 1, 2},{8, 35, 1, 1, 2},{8, 37, 1, 1, 2},{8, 38, 1, 1, 2},{8, 39, 1, 1, 2},{8, 41, 1, 1, 2},{8, 44, 1, 1, 2},{8, 45, 1, 1, 2},{8, 46, 1, 1, 2},{8, 49, 1, 1, 2},{8, 52, 1, 1, 2},{8, 56, 1, 1, 2},{8, 58, 1, 1, 2},{8, 60, 1, 1, 2},{8, 64, 1, 1, 2},{8, 66, 1, 1, 2},{8, 81, 1, 1, 2},{8, 125, 1, 1, 2},{8, 128, 1, 1, 2},{8, 243, 1, 1, 2},{8, 256, 1, 1, 2},{8, 343, 1, 1, 2},{8, 358, 1, 1, 2},{8, 429, 1, 1, 2},{8, 512, 1, 1, 2},{8, 1024, 1, 1, 2},
-	    {720, 480, 1, 1, 2},{1280, 720, 1, 1, 2},
+	    {720, 480, 1, 1, 2},{1280, 720, 1, 1, 2}, {8, 4320, 1, 1, 2},{8, 4391, 1, 1, 2},{8, 7000, 1, 1, 2},{8, 7680, 1, 1, 2},{8, 4050 * 3, 1, 1, 2},{8, 7680 * 5, 1, 1, 2}, {720, 480, 1, 1, 2},{1280, 720, 1, 1, 2},{1920, 1080, 1, 1, 2}, {2560, 1440, 1, 1, 2},{3840, 2160, 1, 1, 2},{7680, 4320, 1, 1, 2},
+		{8,15319, 1, 1, 2},{8,21269, 1, 1, 2},{8,27283, 1, 1, 2},{8,39829, 1, 1, 2},{8,52733, 1, 1, 2},{8,2000083, 1, 1, 2},{8,4000067, 1, 1, 2},{8,8003869, 1, 1, 2},
+		{8, (uint64_t)pow(3,10), 1, 1, 2},	{8, (uint64_t)pow(3,11), 1, 1, 2}, {8, (uint64_t)pow(3,12), 1, 1, 2}, {8, (uint64_t)pow(3,13), 1, 1, 2}, {8, (uint64_t)pow(3,14), 1, 1, 2}, {8, (uint64_t)pow(3,15), 1, 1, 2},
+		{8, (uint64_t)pow(5,5), 1, 1, 2},	{8, (uint64_t)pow(5,6), 1, 1, 2}, {8, (uint64_t)pow(5,7), 1, 1, 2}, {8, (uint64_t)pow(5,8), 1, 1, 2}, {8, (uint64_t)pow(5,9), 1, 1, 2},
+		{8, (uint64_t)pow(7,4), 1, 1, 2},{8, (uint64_t)pow(7,5), 1, 1, 2},{8, (uint64_t)pow(7,6), 1, 1, 2},{8, (uint64_t)pow(7,7), 1, 1, 2},{8, (uint64_t)pow(7,8), 1, 1, 2},
+		{8, (uint64_t)pow(11,3), 1, 1, 2},{8, (uint64_t)pow(11,4), 1, 1, 2},{8, (uint64_t)pow(11,5), 1, 1, 2},{8, (uint64_t)pow(11,6), 1, 1, 2},
+		{8, (uint64_t)pow(13,3), 1, 1, 2},{8, (uint64_t)pow(13,4), 1, 1, 2},{8, (uint64_t)pow(13,5), 1, 1, 2},{8, (uint64_t)pow(13,6), 1, 1, 2},
 		{2, 2, 2, 1, 3},{3, 3, 3, 1, 3},{5, 5, 5, 1, 3},{6, 6, 6, 1, 3},{7, 7, 7, 1, 3},{8, 8, 8, 1, 3},{9, 9, 9, 1, 3},{10, 10, 10, 1, 3},{11, 11, 11, 1, 3},{12, 12, 12, 1, 3},{13, 13, 13, 1, 3},{14, 14, 14, 1, 3},
 		{15, 15, 15, 1, 3},{16, 16, 16, 1, 3},{17, 17, 17, 1, 3},{21, 21, 21, 1, 3},{22, 22, 22, 1, 3},{23, 23, 23, 1, 3},{24, 24, 24, 1, 3},{25, 25, 25, 1, 3},{26, 26, 26, 1, 3},{27, 27, 27, 1, 3},{28, 28, 28, 1, 3},{29, 29, 29, 1, 3},{30, 30, 30, 1, 3},{31, 31, 31, 1, 3},{32, 32, 32, 1, 3},{33, 33, 33, 1, 3},{34, 34, 34, 1, 3},{35, 35, 35, 1, 3},{39, 39, 39, 1, 3},{42, 42, 42, 1, 3},{44, 44, 44, 1, 3},{45, 45, 45, 1, 3},{46, 46, 46, 1, 3},{49, 49, 49, 1, 3},{52, 52, 52, 1, 3},{56, 56, 56, 1, 3},{60, 60, 60, 1, 3},{64, 64, 64, 1, 3},{81, 81, 81, 1, 3},
 		{121, 121, 121, 1, 3},{128, 128, 128, 1, 3},{125, 125, 125, 1, 3},{143, 143, 143, 1, 3},{169, 169, 169, 1, 3},{243, 243, 243, 1, 3},{256, 256, 256, 1, 3},
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ void launch_precision_cuFFT_half(void* inputC, void* output_cuFFT, int device_id`
`51`	`51`	`break;`
`52`	`52`	`}`
`53`	`53`	`res = cufftXtMakePlanMany(`
`54`		`- planHalf, dims[3], local_dims, NULL, 1, 1, CUDA_C_16F,`
	`54`	`+ planHalf, dims[4], local_dims, NULL, 1, 1, CUDA_C_16F,`
`55`	`55`	`NULL, 1, 1, CUDA_C_16F, 1, &ws, CUDA_C_16F);`
`56`	`56`
`57`	`57`	`for (int i = 0; i < 1; i++) {`
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ void launch_precision_cuFFT_r2c(void* inputC, void* output_cuFFT, int device_id,`
`45`	`45`	`}`
`46`	`46`	`cudaDeviceSynchronize();`
`47`	`47`	`cufftDestroy(planR2C);`
`48`		`- switch (dims[3]) {`
	`48`	`+ switch (dims[4]) {`
`49`	`49`	`case 1:`
`50`	`50`	`cufftPlan1d(&planR2C, dims[0], CUFFT_C2R, 1);`
`51`	`51`	`break;`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ void launch_precision_rocFFT_r2c(void* inputC, void* output_rocFFT, int device_i`
`44`	`44`	`}`
`45`	`45`	`hipDeviceSynchronize();`
`46`	`46`	`hipfftDestroy(planR2C);`
`47`		`- switch (dims[3]) {`
	`47`	`+ switch (dims[4]) {`
`48`	`48`	`case 1:`
`49`	`49`	`hipfftPlan1d(&planR2C, dims[0], HIPFFT_C2R, 1);`
`50`	`50`	`break;`