Updates - Frobenius automorphism and multithread reader for PSI I/O

faberga · web-flow · commit d7be6f0dca6f · 2022-12-06T08:56:53.000-05:00
Optimizations of Frobenius automorphisms
Multi-threaded reader for PSI iO
diff --git a/include/helib/EncryptedArray.h b/include/helib/EncryptedArray.h
@@ -2651,7 +2651,7 @@ inline void totalSums(Ctxt& ctxt)
 
 //! @brief Map all non-zero slots to 1, leaving zero slots as zero.
 //! Assumes that r=1, and that all the slots contain elements from GF(p^d).
-void mapTo01(const EncryptedArray& ea, Ctxt& ctxt);
+void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread = true);
 // Implemented in eqtesting.cpp. We compute
 //             x^{p^d-1} = x^{(1+p+...+p^{d-1})*(p-1)}
 // by setting y=x^{p-1} and then outputting y * y^p * ... * y^{p^{d-1}},
diff --git a/misc/psi/io/io.h b/misc/psi/io/io.h
@@ -76,11 +76,14 @@ helib::Database<TXT> readDbFromFile(const std::string& databaseFilePath,
       }
     }
   } else { // Ctxt query
-    for (long i = 0; i < nrow; ++i) {
-      for (long j = 0; j < ncol; ++j) {
-        reader.value().readDatum(data(i, j), i, j);
-      }
+    NTL_EXEC_RANGE(nrow * ncol, first, last)
+    Reader<TXT> threadReader(reader.value());
+    for (long i = first; i < last; ++i) {
+      long row = i / ncol;
+      long col = i % ncol;
+      threadReader.readDatum(data(row, col), row, col);
     }
+    NTL_EXEC_RANGE_END
   }
 
   return helib::Database<TXT>(data, contextp);
@@ -128,11 +131,14 @@ helib::Matrix<TXT> readQueryFromFile(const std::string& queryFilePath,
     }
   } else { // Ctxt query
     // Read in ctxts
-    for (long i = 0; i < nrow; ++i) {
-      for (long j = 0; j < ncol; ++j) {
-        reader.value().readDatum(query(i, j), i, j);
-      }
+    NTL_EXEC_RANGE(nrow * ncol, first, last)
+    Reader<TXT> threadReader(reader.value());
+    for (long i = first; i < last; ++i) {
+      long row = i / ncol;
+      long col = i % ncol;
+      threadReader.readDatum(query(row, col), row, col);
     }
+    NTL_EXEC_RANGE_END
     if (ncol == 1) { // Transpose to make row vector
       query.transpose();
     }
diff --git a/src/eqtesting.cpp b/src/eqtesting.cpp
@@ -9,6 +9,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License. See accompanying LICENSE file.
  */
+
+/* Copyright (C) 2022 Intel Corporation
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Modifying HElib to optimize the 01 map.
+ * Contributions include
+ * Modified:
+ *   mapTo01
+ *     added parallelism to existing logic for norm calculation
+ *     added alternative logic for norm calculation which uses log(d) 
+ *     automorphisms on a single core
+ *     added an additional optional argument `multithread` which determines 
+ *     which version to run
+ *      
+ */
 /**
  * @file eqtesting.cpp
  * @brief Useful functions for equality testing...
@@ -17,6 +32,7 @@
 #include <helib/timing.h>
 #include <helib/EncryptedArray.h>
 #include <helib/Ptxt.h>
+#include <NTL/BasicThreadPool.h>
 
 #include <cstdio>
 
@@ -29,24 +45,47 @@ namespace helib {
 // and then outputting y * y^p * ... * y^{p^{d-1}}, with exponentiation to
 // powers of p done via Frobenius.
 
-// FIXME: the computation of the "norm" y * y^p * ... * y^{p^{d-1}}
-// can be done using O(log d) automorphisms, rather than O(d).
-
-void mapTo01(const EncryptedArray& ea, Ctxt& ctxt)
+void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread)
 {
   long p = ctxt.getPtxtSpace();
   if (p != ea.getPAlgebra().getP()) // ptxt space is p^r for r>1
     throw LogicError("mapTo01 not implemented for r>1");
 
   if (p > 2)
     ctxt.power(p - 1); // set y = x^{p-1}
-
   long d = ea.getDegree();
-  if (d > 1) { // compute the product of the d automorphisms
-    std::vector<Ctxt> v(d, ctxt);
-    for (long i = 1; i < d; i++)
-      v[i].frobeniusAutomorph(i);
-    totalProduct(ctxt, v);
+  // TODO: investigate this trade off more thoroughly
+  // Computing in parallel over t threads has runtime approximately
+  // (d - 1)/t, whereas single thread has runtime approx log(d)
+  if ((NTL::AvailableThreads() > 1) && multithread) {
+    // Compute O(d) Frobenius automorphisms in parallel    
+    if (d > 1) {
+      // compute the d - 1 automorphisms in parallel
+      std::vector<Ctxt> v(d, ctxt);
+      NTL_EXEC_RANGE(d - 1, first, last)
+      for (long i = first; i < last; i++)
+        v[i + 1].frobeniusAutomorph(i + 1);
+      NTL_EXEC_RANGE_END
+      // and compute the product of the d automorphisms
+      totalProduct(ctxt, v);
+    }
+  } else {
+    // Compute of the "norm" y * y^p * ... * y^{p^{d-1}}
+    //  using O(log d) automorphisms, rather than O(d).
+    long e = 1;
+    long b = NTL::NumBits(d);
+    Ctxt orig = ctxt;
+    for (long i = b - 2; i >= 0; i--) {
+      Ctxt tmp = ctxt;
+      tmp.frobeniusAutomorph(e);
+      ctxt *= tmp;
+      e *= 2;
+      if (NTL::bit(d, i)) {
+        ctxt.frobeniusAutomorph(1);
+        ctxt *= orig;
+        e++;
+      }
+    }
   }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -76,11 +76,14 @@ helib::Database<TXT> readDbFromFile(const std::string& databaseFilePath,`
`76`	`76`	`}`
`77`	`77`	`}`
`78`	`78`	`} else { // Ctxt query`
`79`		`- for (long i = 0; i < nrow; ++i) {`
`80`		`- for (long j = 0; j < ncol; ++j) {`
`81`		`- reader.value().readDatum(data(i, j), i, j);`
`82`		`- }`
	`79`	`+ NTL_EXEC_RANGE(nrow * ncol, first, last)`
	`80`	`+ Reader<TXT> threadReader(reader.value());`
	`81`	`+ for (long i = first; i < last; ++i) {`
	`82`	`+ long row = i / ncol;`
	`83`	`+ long col = i % ncol;`
	`84`	`+ threadReader.readDatum(data(row, col), row, col);`
`83`	`85`	`}`
	`86`	`+ NTL_EXEC_RANGE_END`
`84`	`87`	`}`
`85`	`88`
`86`	`89`	`return helib::Database<TXT>(data, contextp);`
`@@ -128,11 +131,14 @@ helib::Matrix<TXT> readQueryFromFile(const std::string& queryFilePath,`
`128`	`131`	`}`
`129`	`132`	`} else { // Ctxt query`
`130`	`133`	`// Read in ctxts`
`131`		`- for (long i = 0; i < nrow; ++i) {`
`132`		`- for (long j = 0; j < ncol; ++j) {`
`133`		`- reader.value().readDatum(query(i, j), i, j);`
`134`		`- }`
	`134`	`+ NTL_EXEC_RANGE(nrow * ncol, first, last)`
	`135`	`+ Reader<TXT> threadReader(reader.value());`
	`136`	`+ for (long i = first; i < last; ++i) {`
	`137`	`+ long row = i / ncol;`
	`138`	`+ long col = i % ncol;`
	`139`	`+ threadReader.readDatum(query(row, col), row, col);`
`135`	`140`	`}`
	`141`	`+ NTL_EXEC_RANGE_END`
`136`	`142`	`if (ncol == 1) { // Transpose to make row vector`
`137`	`143`	`query.transpose();`
`138`	`144`	`}`