Adding devcontainer

spectralDNS · Aug 22, 2024 · 96a7692 · 96a7692
1 parent e9e30a4
commit 96a7692
Show file tree

Hide file tree

Showing 19 changed files with 666 additions and 608 deletions.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,19 @@
+{
+    "image": "mcr.microsoft.com/devcontainers/universal:2",
+    "hostRequirements": {
+      "cpus": 4
+    },
+    "waitFor": "onCreateCommand",
+    "updateContentCommand": ["/bin/bash", "-c", "conda env create --file l2copenblas.yml && source activate l2copenblas && cd src && ./build_meson.sh"],
+    "customizations": {
+      "codespaces": {
+        "openFiles": []
+      },
+      "vscode": {
+        "extensions": [
+          "ms-python.python"
+        ]
+      }
+    },
+    "postCreateCommand": "conda init"
+  }
diff --git a/.github/workflows/l2c.yml b/.github/workflows/l2c.yml
@@ -22,9 +22,9 @@ jobs:
     - uses: actions/checkout@v4
     - uses: conda-incubator/setup-miniconda@v3
       with:
-          activate-environment: l2c
+          activate-environment: l2copenblas
           channels: conda-forge
-          environment-file: environment.yml
+          environment-file: l2copenblas.yml
           python-version: ${{ matrix.python-version }}
           auto-activate-base: true
     - name: install and test 

diff --git a/environment.yml → l2cacc.yml b/environment.yml → l2cacc.yml
@@ -1,4 +1,4 @@
-name: l2c
+name: l2cacc
 
 channels:
   - conda-forge
@@ -7,16 +7,18 @@ channels:
 dependencies:
   - pip
   - numpy
-  - numba
   - cython
   - fftw
-  - mpi4py-fft
-  - openblas
   - meson
   - ninja
   - pkg-config
   - mpmath
   - boost
+  - mpi4py-fft
   - pytest
   - scipy
+  - numba
   - sympy
+
+variables:
+  USE_ACCELERATE : "1"
diff --git a/l2copenblas.yml b/l2copenblas.yml
@@ -0,0 +1,26 @@
+name: l2copenblas
+
+channels:
+  - conda-forge
+  - defaults
+
+dependencies:
+  - pip
+  - numpy
+  - cython
+  - fftw
+  - meson
+  - ninja
+  - openblas
+  - compilers
+  - pkg-config
+  - mpmath
+  - boost
+  - mpi4py-fft
+  - pytest
+  - scipy
+  - numba
+  - sympy
+
+variables:
+  USE_ACCELERATE: "0"
diff --git a/src/C/leg2cheb.h b/src/C/leg2cheb.h
@@ -28,12 +28,21 @@ enum { L2C = 0, C2L = 1, BOTH = 2 };
 
 #ifdef CLOCK_UPTIME_RAW
 #define tic clock_gettime_nsec_np(CLOCK_UPTIME_RAW)
-#define dtics(a, b) (double)(b - a) / 1.0E9
-#define toc(a) (double)(tic - a) / 1.0E9
+#define dtics(a, b) (double)(b - a) * 1.0E-9
+#define toc(a) (double)(tic - a) * 1.0E-9
 #else
-#define tic clock()
-#define dtics(a, b) (double)(b - a) / (double)CLOCKS_PER_SEC
-#define toc(a) (double)(tic - a) / (double)CLOCKS_PER_SEC
+#define tic ({ \
+    struct timespec ts; \
+    clock_gettime(CLOCK_MONOTONIC, &ts); \
+    (ts.tv_sec * 1e9 + ts.tv_nsec); \
+})
+#define toc(start) ({ \
+    struct timespec ts; \
+    clock_gettime(CLOCK_MONOTONIC, &ts); \
+    uint64_t end = (ts.tv_sec * 1.0E9 + ts.tv_nsec); \
+    (end - (start)) * 1.0E-9; \
+})
+#define dtics(a, b) (double)(b - a) * 1.0E-9
 #endif
 
 typedef struct {

diff --git a/src/bin/l2c.c b/src/bin/l2c.c
@@ -414,14 +414,24 @@ void test_directM(size_t N, size_t repeat, size_t verbose, size_t s, size_t M,
 void test_dct0(size_t N, size_t repeat) {
   double *fun = (double *)fftw_malloc(N * sizeof(double));
   double *fun_hat = (double *)fftw_malloc(N * sizeof(double));
+  char *useacc = getenv("USE_ACCELERATE");
+  char filename[128]; 
+  const char *prefix = "fftw_wisdom_acc_";
+  const char *suffix = ".dat";
+  snprintf(filename, sizeof(filename), "%s%s%s", prefix, useacc, suffix);
 
 #ifdef OMP
   fftw_init_threads();
   fftw_plan_with_nthreads(omp_get_max_threads());
 #endif
 
+  // Load the wisdom from a file if it exists
+  if (fftw_import_wisdom_from_filename(filename) == 0) {
+    fprintf(stderr, "Failed to load wisdom from file\n");
+  }
+
   fftw_plan plan =
-      fftw_plan_r2r_1d(N, fun, fun_hat, FFTW_REDFT10, FFTW_ESTIMATE);
+      fftw_plan_r2r_1d(N, fun, fun_hat, FFTW_REDFT10, FFTW_MEASURE);
 
   double min_time = 1e8;
   for (size_t i = 0; i < N; i++) {
@@ -437,9 +447,15 @@ void test_dct0(size_t N, size_t repeat) {
   }
   printf("Time N = %ld avg / min = %2.6e / %2.6e \n", N, toc(t0) / repeat,
          min_time);
+
+  // Save the wisdom to a file
+  if (fftw_export_wisdom_to_filename(filename) == 0) {
+    fprintf(stderr, "Failed to save wisdom to file\n");
+  }
   fftw_free(fun);
   fftw_free(fun_hat);
   fftw_destroy_plan(plan);
+  fftw_cleanup();
 #ifdef OMP
   fftw_cleanup_threads();
 #endif
@@ -741,7 +757,7 @@ void test_openmp2(size_t N, size_t repeat, size_t lagrange, size_t verbose) {
 int main(int argc, char *argv[]) {
   int opt;
   size_t N = 512;
-  size_t maxs = 64;
+  size_t maxs = 32;
   size_t verbose = 2;
   size_t num_threads = 1;
   size_t lagrange = 0;

diff --git a/src/build_meson.sh b/src/build_meson.sh
@@ -1,20 +1,9 @@
 #!/bin/bash
 
-#export LD_LIBRARY_PATH=$PWD/build-install/lib  
-#export DYLD_LIBRARY_PATH=$PWD/build-install/lib  
-#export PYTHONPATH=$PYTHONPATH:$PWD/build-install/lib/python3.12/site-packages
-
-export USE_ACCELERATE=0
-
 rm -rf build
 rm -rf build-install
 
-if [ "$USE_ACCELERATE" -eq 1 ]; then
-  export PATH="/opt/homebrew/opt/cython/bin:$PATH"
-  meson setup build --prefix=$PWD/build-install --includedir=/opt/homebrew/include --libdir=/opt/homebrew/lib
-else
-  meson setup build --prefix=$PWD/build-install --includedir=$CONDA_PREFIX/include --libdir=$CONDA_PREFIX/lib
-fi
+meson setup build --prefix=$PWD/build-install --includedir=$CONDA_PREFIX/include --libdir=$CONDA_PREFIX/lib
 meson configure -Dbuildtype=release -Doptimization=3 -Dc_args="-march=native -Ofast -fPIC" -Dc_link_args="-lm -fPIC"  build
 meson compile -v -C build
 meson install -C build
diff --git a/src/meson.build b/src/meson.build
@@ -23,12 +23,10 @@ if acc == '1'
   add_global_link_arguments(['-framework', 'Accelerate'], language : 'c')
   add_global_arguments(['-DUSE_ACCELERATE'], language : 'c')
   add_global_arguments(['-DUSE_ACCELERATE'], language : 'cpp') 
-  #boost = dependency('boost')
   deps = [fftw]
 else
   omp = dependency('openmp', required : false)
   cblas = dependency('openblas')
-  message(get_option('libdir'))
   fftwt = c.find_library('fftw3_threads', dirs : get_option('libdir'))
   deps = [cblas, fftw, fftwt, omp]
   if omp.found()

diff --git a/src/multiprec/C++/L2Cprec.cpp b/src/multiprec/C++/L2Cprec.cpp
@@ -7,10 +7,10 @@ extern "C" {
 #include "leg2cheb.h"
 }
 
-using boost::multiprecision::number;
 using boost::multiprecision::cpp_dec_float;
+using boost::multiprecision::number;
 
-typedef number<cpp_dec_float<32> > cpp_dec_float_32;
+typedef number<cpp_dec_float<32>> cpp_dec_float_32;
 
 template <class T> T Lambda(T z) {
   return boost::math::tgamma(z + T(0.5)) / boost::math::tgamma(z + 1);
@@ -68,27 +68,29 @@ template <class T> void cheb2leg(T *u, T *b, size_t N) {
   free(un);
 }
 
-void test_accuracy_C(size_t N, double m, size_t direction, size_t norm, size_t random) {
+void test_accuracy_C(size_t N, double m, size_t direction, size_t norm,
+                     size_t random) {
 
-  //typedef boost::multiprecision::cpp_dec_float_100 T;
-  //typedef boost::multiprecision::cpp_dec_float_50 T;
+  // typedef boost::multiprecision::cpp_dec_float_100 T;
+  // typedef boost::multiprecision::cpp_dec_float_50 T;
   typedef cpp_dec_float_32 T;
 
   //srand(time(NULL));   // Initialization, should only be called once.
   srand(1);
+
   T *u = (T *)malloc(N * sizeof(T));
   T *b = (T *)calloc(N, sizeof(T));
-  switch (random)
-  {
+  switch (random) {
   case 0:
     for (size_t i = 0; i < N; i++)
       u[i] = T(1) / (boost::multiprecision::pow(T(i + 1), m));
     break;
 
   case 1:
     for (size_t i = 0; i < N; i++)
-      //u[i] =  (T(rand()) / T(RAND_MAX)) / boost::multiprecision::pow(T(i + 1), m);
-      u[i] =  (2 * T(rand()) / T(RAND_MAX) - 1) / boost::multiprecision::pow(T(i + 1), m);
+      u[i] =  (T(rand()) / T(RAND_MAX)) / boost::multiprecision::pow(T(i + 1), m);
+      //u[i] = (2 * T(rand()) / T(RAND_MAX) - 1) /
+      //       boost::multiprecision::pow(T(i + 1), m);
     break;
   }
 
@@ -106,8 +108,8 @@ void test_accuracy_C(size_t N, double m, size_t direction, size_t norm, size_t r
   double *output_arrayM = (double *)calloc(N, sizeof(double));
   double *output_arrayN = (double *)calloc(N, sizeof(double));
 
-  fmm_plan *fmmplanM = create_fmm(N, 64, 18, direction, 0, 0, 1);
-  fmm_plan *fmmplanN = create_fmm(N, 64, 18, direction, 1, 0, 1);
+  fmm_plan *fmmplanM = create_fmm(N, 32, 18, direction, 0, 0, 1);
+  fmm_plan *fmmplanN = create_fmm(N, 32, 18, direction, 1, 0, 1);
 
   for (size_t i = 0; i < N; i++)
     input_array[i] = (double)u[i];
@@ -119,51 +121,71 @@ void test_accuracy_C(size_t N, double m, size_t direction, size_t norm, size_t r
   double errorN = 0;
   double max_output;
   double ulp;
-  switch (norm)
-  {
+  max_output = 0;
+  for (size_t i = 0; i < N; i++) {
+    max_output = fmax((double)b[i], max_output);
+  }
+  //std::cout << max_output << std::endl;
+  ulp = nextafter(max_output, 1e8) - max_output;
+  switch (norm) {
   case 0: // L2 norm
-    {
-      for (size_t i = 0; i < N; i++) {
-        errorM += pow(output_arrayM[i] - (double)b[i], 2);
-        errorN += pow(output_arrayN[i] - (double)b[i], 2);
-      }
-      errorM = sqrt(errorM);
-      errorN = sqrt(errorN);
+  {
+    for (size_t i = 0; i < N; i++) {
+      errorM += pow(output_arrayM[i] - (double)b[i], 2);
+      errorN += pow(output_arrayN[i] - (double)b[i], 2);
     }
-    break;
+    errorM = sqrt(errorM);
+    errorN = sqrt(errorN);
+  } break;
 
   case 1: // inf norm
-    {
-      max_output = 0;
-      for (size_t i = 0; i < N; i++) {
-        max_output = fmax((double)b[i], max_output);
-      }
-      ulp = nextafter(max_output, 1e8) - max_output;
+  {
 
-      for (size_t i = 0; i < N; i++) {
-        errorM = fmax(fabs(output_arrayM[i] - (double)b[i]), errorM);
-        errorN = fmax(fabs(output_arrayN[i] - (double)b[i]), errorN);
-        //std::cout << std::setprecision(16) << errorM << " " << fabs(output_arrayM[i] - (double)b[i]) << " " << (double)b[i] <<  " " << ulp << " " << errorM/ulp << std::endl;
+    for (size_t i = 0; i < N; i++) {
+      double e0 = fabs(output_arrayM[i] - (double)b[i]);
+      double e1 = fabs(output_arrayN[i] - (double)b[i]);
+      if (e0 > errorM) {
+        errorM = e0;
+        //std::cout << "Max for i=" <<  i << std::endl;;
+      }
+      if (e1 > errorN) {
+        errorN = e1;
+        //std::cout << "Max for i=" <<  i << std::endl;;
       }
-      errorM /= max_output;
-      errorN /= max_output;
+
+      //errorM = fmax(fabs(output_arrayM[i] - (double)b[i]), errorM);
+      //errorN = fmax(fabs(output_arrayN[i] - (double)b[i]), errorN);
+      // std::cout << std::setprecision(16) << errorM << " " <<
+      // fabs(output_arrayM[i] - (double)b[i]) << " " << (double)b[i] <<  " " <<
+      // ulp << " " << errorM/ulp << std::endl;
     }
+    errorM /= max_output;
+    errorN /= max_output;
+  }
   default:
     break;
   }
+  //for (size_t i = 0; i < 20; i++)
+  //{
+  //  std::cout << (double)b[i] << std::endl;
+  //}
+
   free(input_array);
   free(output_arrayM);
   free(output_arrayN);
   free(u);
   free(b);
 
-  std::cout << std::setprecision(16) << errorM*max_output << " " << errorM*max_output / ulp << " " << errorN*max_output << " " << errorN*max_output / ulp  << " " << ulp  << std::endl;
-  //std::cout << nextafter(1.0, 1e8)-1.0 << " " << nextafter(2.0, 1e8) -2.0 << " " << nextafter(4.0, 1e8) - 4.0 << std::endl;
+  std::cout << std::setprecision(16) << errorM * max_output << " "
+            << errorM * max_output / ulp << " " << errorN * max_output << " "
+            << errorN * max_output / ulp << " " << ulp << std::endl;
+  // std::cout << nextafter(1.0, 1e8)-1.0 << " " << nextafter(2.0, 1e8) -2.0 <<
+  // " " << nextafter(4.0, 1e8) - 4.0 << std::endl;
   return;
 }
 
 void test_accuracy(size_t N, size_t m) {
-  //typedef boost::multiprecision::cpp_dec_float_50 T;
+  // typedef boost::multiprecision::cpp_dec_float_50 T;
   typedef boost::multiprecision::cpp_dec_float_100 T;
   T *u = (T *)malloc(N * sizeof(T));
   T *b = (T *)calloc(N, sizeof(T));
@@ -217,8 +239,7 @@ int main(int argc, char *argv[]) {
       exit(-1);
     }
   }
-  switch (a)
-  {
+  switch (a) {
   case 0:
     test_accuracy(N, m);
     break;

diff --git a/src/timing/roundtrip.py → src/results/figure3.py b/src/timing/roundtrip.py → src/results/figure3.py
@@ -1,3 +1,7 @@
+"""This file is used to compute figure 3 in the paper
+
+It also creates a latex table of the same data that is not used.
+"""
 import subprocess
 import re
 import numpy as np
@@ -38,5 +42,5 @@
 plt.legend(['r = 0', 'r = 1/2'])
 plt.ylabel('$E_{\\infty}$')
 plt.xlabel('N')
-plt.savefig('accuracy_roundtrip.png')
+plt.savefig('figure3.png')
 plt.show()