@@ -129,13 +129,6 @@ void FileBgen::read_all() {
129
129
}
130
130
131
131
void FileBgen::read_block_initial (uint64 start_idx, uint64 stop_idx, bool standardize) {
132
- read_bgen_block (G, F, bg, dosages.data (), frequency_was_estimated, nsamples, nsnps, blocksize, start_idx,
133
- stop_idx, standardize);
134
- }
135
-
136
- void read_bgen_block (Mat2D &G, Mat1D &F, bgen::CppBgenReader *bg, float *dosages,
137
- bool &frequency_was_estimated, uint64 nsamples, uint64 nsnps, uint blocksize,
138
- uint64 start_idx, uint64 stop_idx, bool standardize) {
139
132
uint actual_block_size = stop_idx - start_idx + 1 ;
140
133
uint i, j, snp_idx;
141
134
if (G.cols () < blocksize || (actual_block_size < blocksize)) {
@@ -145,7 +138,7 @@ void read_bgen_block(Mat2D &G, Mat1D &F, bgen::CppBgenReader *bg, float *dosages
145
138
for (i = 0 ; i < actual_block_size; ++i) {
146
139
snp_idx = start_idx + i;
147
140
auto var = bg->next_var ();
148
- var.minor_allele_dosage (dosages);
141
+ var.minor_allele_dosage (dosages. data () );
149
142
#pragma omp parallel for
150
143
for (j = 0 ; j < nsamples; j++) {
151
144
if (std::isnan (dosages[j])) {
@@ -163,7 +156,7 @@ void read_bgen_block(Mat2D &G, Mat1D &F, bgen::CppBgenReader *bg, float *dosages
163
156
for (i = 0 ; i < actual_block_size; ++i) {
164
157
snp_idx = start_idx + i;
165
158
auto var = bg->next_var ();
166
- var.minor_allele_dosage (dosages);
159
+ var.minor_allele_dosage (dosages. data () );
167
160
gc = 0 ;
168
161
gs = 0.0 ;
169
162
#pragma omp parallel for reduction(+ : gc) reduction(+ : gs)
@@ -194,53 +187,11 @@ void read_bgen_block(Mat2D &G, Mat1D &F, bgen::CppBgenReader *bg, float *dosages
194
187
}
195
188
}
196
189
197
- // this would be fast
198
- int shuffle_bgen_to_bin (std::string &fin, std::string fout, uint gb, bool standardize) {
199
- cao.print (tick.date (), " begin to permute BGEN into BINARY file" );
200
- bgen::CppBgenReader *bg = new bgen::CppBgenReader (fin, " " , true );
201
- uint nsamples = bg->header .nsamples ;
202
- uint nsnps = bg->header .nvariants ;
203
- const uint ibyte = 4 ;
204
- uint64 bytes_per_snp = nsamples * ibyte;
205
- uint blocksize = 1073741824 * gb / bytes_per_snp;
206
- uint nblocks = (nsnps + blocksize - 1 ) / blocksize;
207
- std::ofstream ofs (fout + " .perm.bin" , std::ios::binary);
208
- std::ofstream ofs2 (fout + " .perm.txt" );
209
- ofs.write ((char *)&nsnps, ibyte);
210
- ofs.write ((char *)&nsamples, ibyte);
211
- uint magic = ibyte * 2 ;
212
- std::vector<float > dosages (nsamples);
213
-
214
- bool frequency_was_estimated = false ;
215
- std::vector<uint> perm (nsnps);
216
- std::iota (perm.begin (), perm.end (), 0 );
217
- auto rng = std::default_random_engine{};
218
- std::shuffle (perm.begin (), perm.end (), rng);
219
- Mat2D G;
220
- Mat1D F (nsnps);
221
- uint64 idx, cur = 0 ;
222
- for (uint i = 0 ; i < nblocks; i++) {
223
- auto start_idx = i * blocksize;
224
- auto stop_idx = start_idx + blocksize - 1 ;
225
- stop_idx = stop_idx >= nsnps ? nsnps - 1 : stop_idx;
226
- read_bgen_block (G, F, bg, dosages.data (), frequency_was_estimated, nsamples, nsnps, blocksize,
227
- start_idx, stop_idx, standardize);
228
- for (Eigen::Index p = 0 ; p < G.cols (); p++, cur++) {
229
- ofs2 << perm[cur] << " \n " ;
230
- idx = magic + perm[cur] * bytes_per_snp;
231
- ofs.seekp (idx, std::ios_base::beg);
232
- ofs.write ((char *)G.col (p).data (), bytes_per_snp);
233
- }
234
- }
235
- delete bg;
236
- fin = fout + " .perm.bin" ;
237
- return (nsnps == cur);
238
- }
239
-
240
190
void permute_bgen_thread (std::vector<int > idx, std::string fin, std::string fout, int ithread) {
241
191
fout = fout + " .perm." + to_string (ithread) + " .bgen" ;
242
- bgen::CppBgenReader br (fin, " " , false ); // will call parse_all_variants();
243
- bgen::CppBgenWriter bw (fout, br.header .nsamples , br.header .extra , br.header .compression , br.header .layout , br.samples .samples );
192
+ bgen::CppBgenReader br (fin, " " , false ); // will call parse_all_variants();
193
+ bgen::CppBgenWriter bw (fout, br.header .nsamples , br.header .extra , br.header .compression , br.header .layout ,
194
+ br.samples .samples );
244
195
for (auto i : idx) {
245
196
auto var = br.variants [i];
246
197
std::vector<std::uint8_t > data = var.copy_data ();
@@ -253,7 +204,8 @@ PermMat permute_bgen(std::string &fin, std::string fout, int nthreads) {
253
204
bgen::CppBgenReader br (fin, " " , true );
254
205
uint nsnps = br.header .nvariants ;
255
206
std::string out = fout + " .perm.bgen" ;
256
- bgen::CppBgenWriter bw (out, br.header .nsamples , br.header .extra , br.header .compression , br.header .layout , br.samples .samples );
207
+ bgen::CppBgenWriter bw (out, br.header .nsamples , br.header .extra , br.header .compression , br.header .layout ,
208
+ br.samples .samples );
257
209
vector<int > perm (nsnps);
258
210
std::iota (perm.begin (), perm.end (), 0 );
259
211
auto rng = std::default_random_engine{};
0 commit comments