Skip to content

Commit bc4f3e0

Browse files
committed
Added DST I-IV support
1 parent 3c60386 commit bc4f3e0

22 files changed

+1288
-488
lines changed

VkFFT_TestSuite.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ int main(int argc, char* argv[])
562562
version_decomposed[0] = version / 10000;
563563
version_decomposed[1] = (version - version_decomposed[0] * 10000) / 100;
564564
version_decomposed[2] = (version - version_decomposed[0] * 10000 - version_decomposed[1] * 100);
565-
printf("VkFFT v%d.%d.%d (25-09-2023). Author: Tolmachev Dmitrii\n", version_decomposed[0], version_decomposed[1], version_decomposed[2]);
565+
printf("VkFFT v%d.%d.%d (22-10-2023). Author: Tolmachev Dmitrii\n", version_decomposed[0], version_decomposed[1], version_decomposed[2]);
566566
#if (VKFFT_BACKEND==0)
567567
printf("Vulkan backend\n");
568568
#elif (VKFFT_BACKEND==1)
@@ -591,7 +591,7 @@ int main(int argc, char* argv[])
591591
printf(" 7 - FFT + iFFT C2C Bluestein benchmark in single precision\n");
592592
printf(" 8 - FFT + iFFT C2C Bluestein benchmark in double precision\n");
593593
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
594-
printf(" 9 - FFT + iFFT C2C benchmark 1D batched in quad double-double precision LUT\n");
594+
printf(" 9 - FFT + iFFT C2C benchmark 1D batched in double-double emulation of quad precision LUT\n");
595595
#endif
596596
#if (VKFFT_BACKEND==0)
597597
printf(" 10 - multiple buffer(4 by default) split version of benchmark 0\n");
@@ -607,7 +607,7 @@ int main(int argc, char* argv[])
607607
printf(" 17 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
608608
printf(" 18 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in double precision\n");
609609
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
610-
printf(" 19 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in quad double-double precision\n");
610+
printf(" 19 - VkFFT / FFTW C2C precision test in double-double emulation of quad precision\n");
611611
#endif
612612
#elif USE_rocFFT
613613
printf(" 11 - VkFFT / rocFFT / FFTW C2C precision test in single precision\n");
@@ -619,7 +619,7 @@ int main(int argc, char* argv[])
619619
printf(" 17 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
620620
printf(" 18 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in double precision\n");
621621
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
622-
printf(" 19 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in quad double-double precision\n");
622+
printf(" 19 - VkFFT / FFTW C2C precision test in double-double emulation of quad precision\n");
623623
#endif
624624
#else
625625
printf(" 11 - VkFFT / FFTW C2C precision test in single precision\n");
@@ -631,7 +631,7 @@ int main(int argc, char* argv[])
631631
printf(" 17 - VkFFT / FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
632632
printf(" 18 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in double precision\n");
633633
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
634-
printf(" 19 - VkFFT / FFTW C2C radix 3 / 5 / 7 / 11 / 13 / Bluestein precision test in quad double-double precision\n");
634+
printf(" 19 - VkFFT / FFTW C2C precision test in double-double emulation of quad precision\n");
635635
#endif
636636
#endif
637637
#endif
@@ -652,12 +652,12 @@ int main(int argc, char* argv[])
652652
printf(" 1002 - FFT + iFFT C2C benchmark 1D batched in half precision: all supported systems from 2 to 4096\n");
653653
printf(" 1003 - FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n");
654654
#ifdef VKFFT_USE_DOUBLEDOUBLE_FP128
655-
printf(" 1004 - FFT + iFFT C2C benchmark 1D batched in quad double-double precision: all supported systems from 2 to 4096\n");
655+
printf(" 1004 - FFT + iFFT C2C benchmark 1D batched in double-double emulation of quad precision: all supported systems from 2 to 4096\n");
656656
#endif
657657
printf(" -benchmark_vkfft: run VkFFT benchmark on a user-defined system:\n\
658658
-X uint, -Y uint, -Z uint - FFT dimensions (default Y and Z are 1)\n");
659659
printf("\
660-
-P uint - precision (0 - single, 1 - double, 2 - half, 3 - quad double-double) (default 0)\n");
660+
-P uint - precision (0 - single, 1 - double, 2 - half, 3 - double-double) (default 0)\n");
661661
printf("\
662662
-B uint - number of batched systems (default 1)\n\
663663
-N uint - number of consecutive FFT+iFFT iterations (default 1)\n\

benchmark_scripts/vkFFT_scripts/src/sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ VkFFTResult sample_1004_benchmark_VkFFT_quadDoubleDouble_2_4096(VkGPU* vkGPU, ui
6363
#elif(VKFFT_BACKEND==5)
6464
#endif
6565
if (file_output)
66-
fprintf(output, "1004 - VkFFT FFT + iFFT C2C benchmark 1D batched in quad double-double precision: all supported systems from 2 to 4096\n");
67-
printf("1004 - VkFFT FFT + iFFT C2C benchmark 1D batched in quad double-double precision: all supported systems from 2 to 4096\n");
66+
fprintf(output, "1004 - VkFFT FFT + iFFT C2C benchmark 1D batched in double-double emulation of quad precision: all supported systems from 2 to 4096\n");
67+
printf("1004 - VkFFT FFT + iFFT C2C benchmark 1D batched in double-double emulation of quad precision: all supported systems from 2 to 4096\n");
6868
const int num_runs = 3;
6969
double benchmark_result = 0;//averaged result = sum(system_size/iteration_time)/num_benchmark_samples
7070
//memory allocated on the CPU once, makes benchmark completion faster + avoids performance issues connected to frequent allocation/deallocation.

benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
195195
}
196196
//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.
197197
resFFT = initializeVkFFT(&app, configuration);
198-
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT) {
198+
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2R) {
199199
if (r == num_runs - 1) {
200200
omitted_systems++;
201201
if (file_output)

benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
196196
}
197197
//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.
198198
resFFT = initializeVkFFT(&app, configuration);
199-
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT) {
199+
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2R) {
200200
if (r == num_runs - 1) {
201201
omitted_systems++;
202202
if (file_output)

benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
304304
}
305305
//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.
306306
resFFT = initializeVkFFT(&app, configuration);
307-
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT) {
307+
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2R) {
308308
if (file_output)
309309
fprintf(output, "VkFFT DCT-%d System: %" PRIu64 "x%" PRIu64 "x%" PRIu64 "x%" PRIu64 " - UNSUPPORTED\n", t, dims[0], dims[1], dims[2], dims[3]);
310310
printf("VkFFT DCT-%d System: %" PRIu64 "x%" PRIu64 "x%" PRIu64 "x%" PRIu64 " - UNSUPPORTED\n", t, dims[0], dims[1], dims[2], dims[3]);

benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
303303
}
304304
//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.
305305
resFFT = initializeVkFFT(&app, configuration);
306-
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT) {
306+
if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2R) {
307307
if (file_output)
308308
fprintf(output, "VkFFT DCT-%d System: %" PRIu64 "x%" PRIu64 "x%" PRIu64 "x%" PRIu64 " - UNSUPPORTED\n", t, dims[0], dims[1], dims[2], dims[3]);
309309
printf("VkFFT DCT-%d System: %" PRIu64 "x%" PRIu64 "x%" PRIu64 "x%" PRIu64 " - UNSUPPORTED\n", t, dims[0], dims[1], dims[2], dims[3]);

benchmark_scripts/vkFFT_scripts/src/sample_19_precision_VkFFT_quadDoubleDouble_nonPow2.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ VkFFTResult sample_19_precision_VkFFT_quadDoubleDouble_nonPow2(VkGPU* vkGPU, uin
7474
#elif(VKFFT_BACKEND==5)
7575
#endif
7676
if (file_output)
77-
fprintf(output, "19 - VkFFT/FFTW C2C precision test in quad double-double precision\n");
78-
printf("19 - VkFFT/FFTW C2C precision test in quad double-double precision\n");
77+
fprintf(output, "19 - VkFFT/FFTW C2C precision test in double-double emulation of quad precision\n");
78+
printf("19 - VkFFT/FFTW C2C precision test in double-double emulation of quad precision\n");
7979

8080
const int num_benchmark_samples = 349;
8181
const int num_runs = 1;

benchmark_scripts/vkFFT_scripts/src/sample_9_benchmark_VkFFT_quadDoubleDouble.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ VkFFTResult sample_9_benchmark_VkFFT_quadDoubleDouble(VkGPU* vkGPU, uint64_t fil
6363
#elif(VKFFT_BACKEND==5)
6464
#endif
6565
if (file_output)
66-
fprintf(output, "9 - VkFFT FFT + iFFT C2C benchmark 1D batched in quad double-double precision LUT\n");
67-
printf("9 - VkFFT FFT + iFFT C2C benchmark 1D batched in quad double-double precision LUT\n");
66+
fprintf(output, "9 - VkFFT FFT + iFFT C2C benchmark 1D batched in double-double emulation of quad precision LUT\n");
67+
printf("9 - VkFFT FFT + iFFT C2C benchmark 1D batched in double-double emulation of quad precision LUT\n");
6868
const int num_runs = 3;
6969
double benchmark_result = 0;//averaged result = sum(system_size/iteration_time)/num_benchmark_samples
7070
//memory allocated on the CPU once, makes benchmark completion faster + avoids performance issues connected to frequent allocation/deallocation.

vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1301,6 +1301,9 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf
13011301
if (inputLaunchConfiguration.performDCT != 0) {
13021302
app->configuration.performDCT = inputLaunchConfiguration.performDCT;
13031303
}
1304+
if (inputLaunchConfiguration.performDST != 0) {
1305+
app->configuration.performDST = inputLaunchConfiguration.performDST;
1306+
}
13041307
if (inputLaunchConfiguration.disableMergeSequencesR2C != 0) {
13051308
app->configuration.disableMergeSequencesR2C = inputLaunchConfiguration.disableMergeSequencesR2C;
13061309
}
@@ -1412,7 +1415,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf
14121415
if (inputLaunchConfiguration.halfThreads != 0) app->configuration.halfThreads = inputLaunchConfiguration.halfThreads;
14131416
if (inputLaunchConfiguration.swapTo2Stage4Step != 0) app->configuration.swapTo2Stage4Step = inputLaunchConfiguration.swapTo2Stage4Step;
14141417
if (inputLaunchConfiguration.swapTo3Stage4Step != 0) app->configuration.swapTo3Stage4Step = inputLaunchConfiguration.swapTo3Stage4Step;
1415-
if (app->configuration.performDCT > 0) app->configuration.performBandwidthBoost = -1;
1418+
if ((app->configuration.performDCT > 0) || (app->configuration.performDST > 0)) app->configuration.performBandwidthBoost = -1;
14161419
if (inputLaunchConfiguration.performBandwidthBoost != 0) app->configuration.performBandwidthBoost = inputLaunchConfiguration.performBandwidthBoost;
14171420
#if(VKFFT_BACKEND==0)
14181421
if (inputLaunchConfiguration.stagingBuffer != 0) app->configuration.stagingBuffer = inputLaunchConfiguration.stagingBuffer;

vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_Registers.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ static inline void appendRegisterInitialization(VkFFTSpecializationConstantsLayo
333333
PfDefine(sc, &sc->angle, name);
334334
PfSetToZero(sc, &sc->angle);
335335
}
336-
if (((sc->stageStartSize.data.i > 1) && (!((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse)))) || (((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse))) || (sc->performDCT)) {
336+
if (((sc->stageStartSize.data.i > 1) && (!((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse)))) || (((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse))) || (sc->performDCT) || (sc->performDST)) {
337337
sc->mult.type = 100 + sc->vecTypeCode;
338338
PfAllocateContainerFlexible(sc, &sc->mult, 50);
339339
sprintf(name, "mult");
@@ -622,7 +622,7 @@ static inline void freeRegisterInitialization(VkFFTSpecializationConstantsLayout
622622
else {
623623
PfDeallocateContainer(sc, &sc->angle);
624624
}
625-
if (((sc->stageStartSize.data.i > 1) && (!((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse)))) || (((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse))) || (sc->performDCT)) {
625+
if (((sc->stageStartSize.data.i > 1) && (!((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse)))) || (((sc->stageStartSize.data.i > 1) && (!sc->reorderFourStep) && (sc->inverse))) || (sc->performDCT) || (sc->performDST)) {
626626
PfDeallocateContainer(sc, &sc->mult);
627627
}
628628
return;

0 commit comments

Comments
 (0)