Skip to content

Commit d7be6f0

Browse files
authored
Updates - Frobenius automorphism and multithread reader for PSI I/O
Optimizations of Frobenius automorphisms Multi-threaded reader for PSI iO
2 parents 4ae5466 + fc1dc72 commit d7be6f0

File tree

3 files changed

+64
-19
lines changed

3 files changed

+64
-19
lines changed

include/helib/EncryptedArray.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2651,7 +2651,7 @@ inline void totalSums(Ctxt& ctxt)
26512651

26522652
//! @brief Map all non-zero slots to 1, leaving zero slots as zero.
26532653
//! Assumes that r=1, and that all the slots contain elements from GF(p^d).
2654-
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt);
2654+
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread = true);
26552655
// Implemented in eqtesting.cpp. We compute
26562656
// x^{p^d-1} = x^{(1+p+...+p^{d-1})*(p-1)}
26572657
// by setting y=x^{p-1} and then outputting y * y^p * ... * y^{p^{d-1}},

misc/psi/io/io.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,14 @@ helib::Database<TXT> readDbFromFile(const std::string& databaseFilePath,
7676
}
7777
}
7878
} else { // Ctxt query
79-
for (long i = 0; i < nrow; ++i) {
80-
for (long j = 0; j < ncol; ++j) {
81-
reader.value().readDatum(data(i, j), i, j);
82-
}
79+
NTL_EXEC_RANGE(nrow * ncol, first, last)
80+
Reader<TXT> threadReader(reader.value());
81+
for (long i = first; i < last; ++i) {
82+
long row = i / ncol;
83+
long col = i % ncol;
84+
threadReader.readDatum(data(row, col), row, col);
8385
}
86+
NTL_EXEC_RANGE_END
8487
}
8588

8689
return helib::Database<TXT>(data, contextp);
@@ -128,11 +131,14 @@ helib::Matrix<TXT> readQueryFromFile(const std::string& queryFilePath,
128131
}
129132
} else { // Ctxt query
130133
// Read in ctxts
131-
for (long i = 0; i < nrow; ++i) {
132-
for (long j = 0; j < ncol; ++j) {
133-
reader.value().readDatum(query(i, j), i, j);
134-
}
134+
NTL_EXEC_RANGE(nrow * ncol, first, last)
135+
Reader<TXT> threadReader(reader.value());
136+
for (long i = first; i < last; ++i) {
137+
long row = i / ncol;
138+
long col = i % ncol;
139+
threadReader.readDatum(query(row, col), row, col);
135140
}
141+
NTL_EXEC_RANGE_END
136142
if (ncol == 1) { // Transpose to make row vector
137143
query.transpose();
138144
}

src/eqtesting.cpp

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,21 @@
99
* See the License for the specific language governing permissions and
1010
* limitations under the License. See accompanying LICENSE file.
1111
*/
12+
13+
/* Copyright (C) 2022 Intel Corporation
14+
* SPDX-License-Identifier: Apache-2.0
15+
*
16+
* Modifying HElib to optimize the 01 map.
17+
* Contributions include
18+
* Modified:
19+
* mapTo01
20+
* added parallelism to existing logic for norm calculation
21+
* added alternative logic for norm calculation which uses log(d)
22+
* automorphisms on a single core
23+
* added an additional optional argument `multithread` which determines
24+
* which version to run
25+
*
26+
*/
1227
/**
1328
* @file eqtesting.cpp
1429
* @brief Useful functions for equality testing...
@@ -17,6 +32,7 @@
1732
#include <helib/timing.h>
1833
#include <helib/EncryptedArray.h>
1934
#include <helib/Ptxt.h>
35+
#include <NTL/BasicThreadPool.h>
2036

2137
#include <cstdio>
2238

@@ -29,24 +45,47 @@ namespace helib {
2945
// and then outputting y * y^p * ... * y^{p^{d-1}}, with exponentiation to
3046
// powers of p done via Frobenius.
3147

32-
// FIXME: the computation of the "norm" y * y^p * ... * y^{p^{d-1}}
33-
// can be done using O(log d) automorphisms, rather than O(d).
34-
35-
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt)
48+
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread)
3649
{
3750
long p = ctxt.getPtxtSpace();
3851
if (p != ea.getPAlgebra().getP()) // ptxt space is p^r for r>1
3952
throw LogicError("mapTo01 not implemented for r>1");
4053

4154
if (p > 2)
4255
ctxt.power(p - 1); // set y = x^{p-1}
43-
4456
long d = ea.getDegree();
45-
if (d > 1) { // compute the product of the d automorphisms
46-
std::vector<Ctxt> v(d, ctxt);
47-
for (long i = 1; i < d; i++)
48-
v[i].frobeniusAutomorph(i);
49-
totalProduct(ctxt, v);
57+
// TODO: investigate this trade off more thoroughly
58+
// Computing in parallel over t threads has runtime approximately
59+
// (d - 1)/t, whereas single thread has runtime approx log(d)
60+
if ((NTL::AvailableThreads() > 1) && multithread) {
61+
// Compute O(d) Frobenius automorphisms in parallel
62+
if (d > 1) {
63+
// compute the d - 1 automorphisms in parallel
64+
std::vector<Ctxt> v(d, ctxt);
65+
NTL_EXEC_RANGE(d - 1, first, last)
66+
for (long i = first; i < last; i++)
67+
v[i + 1].frobeniusAutomorph(i + 1);
68+
NTL_EXEC_RANGE_END
69+
// and compute the product of the d automorphisms
70+
totalProduct(ctxt, v);
71+
}
72+
} else {
73+
// Compute of the "norm" y * y^p * ... * y^{p^{d-1}}
74+
// using O(log d) automorphisms, rather than O(d).
75+
long e = 1;
76+
long b = NTL::NumBits(d);
77+
Ctxt orig = ctxt;
78+
for (long i = b - 2; i >= 0; i--) {
79+
Ctxt tmp = ctxt;
80+
tmp.frobeniusAutomorph(e);
81+
ctxt *= tmp;
82+
e *= 2;
83+
if (NTL::bit(d, i)) {
84+
ctxt.frobeniusAutomorph(1);
85+
ctxt *= orig;
86+
e++;
87+
}
88+
}
5089
}
5190
}
5291

0 commit comments

Comments
 (0)