@@ -193,9 +193,9 @@ void PairFLAREKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
193
193
+ 2 // evdwls, B2_norm2s
194
194
+ 0.5 // numneigh_short
195
195
+ max_neighs * (
196
- n_max*4 // g and gT
197
- + n_harmonics*4 // Y and YT
198
- // + n_max*n_harmonics*3 // single_bond_grad
196
+ n_max*4 // g
197
+ + n_harmonics*4 // Y
198
+ + n_max*n_harmonics*3 // single_bond_grad
199
199
+ 3 // partial_forces
200
200
+ 0.5 // neighs_short
201
201
)
@@ -258,12 +258,8 @@ void PairFLAREKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
258
258
g_ra = g;
259
259
Y_ra = Y;
260
260
261
- // gT = View4D(); YT = View4D();
262
- // gT = View4D(Kokkos::ViewAllocateWithoutInitializing("FLARE: gT"), batch_size, max_neighs, 4, n_max);
263
- // YT = View4D(Kokkos::ViewAllocateWithoutInitializing("FLARE: YT"), batch_size, max_neighs, 4, n_harmonics);
264
-
265
- // single_bond_grad = View5D();
266
- // single_bond_grad = View5D(Kokkos::ViewAllocateWithoutInitializing("FLARE: single_bond_grad"), batch_size, max_neighs, 3, n_max, n_harmonics);
261
+ single_bond_grad = View5D ();
262
+ single_bond_grad = View5D (Kokkos::ViewAllocateWithoutInitializing (" FLARE: single_bond_grad" ), batch_size, max_neighs, 3 , n_max, n_harmonics);
267
263
partial_forces = View3D ();
268
264
partial_forces = View3D (Kokkos::ViewAllocateWithoutInitializing (" FLARE: partial forces" ), batch_size, max_neighs, 3 );
269
265
@@ -281,25 +277,14 @@ void PairFLAREKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
281
277
*this
282
278
);
283
279
284
- // transpose R and Y for later use
285
- int g_size = ScratchView3D::shmem_size (n_max, 4 , max_neighs);
286
- int Y_size = ScratchView3D::shmem_size (n_harmonics, 4 , max_neighs);
287
- /*
288
- auto transpolicy = Kokkos::TeamPolicy<DeviceType, TagTransposeRY>(batch_size, SINGLE_BOND_TEAM_SIZE, vector_length).set_scratch_size(
289
- 0, Kokkos::PerTeam(g_size + Y_size));
290
- Kokkos::parallel_for("FLARE: transpose R and Y",
291
- transpolicy,
292
- *this
293
- );
294
- */
295
-
296
280
// compute single bond and its gradient
297
281
// dnlm, dnlmj
298
- g_size = ScratchView1D ::shmem_size (n_max);
299
- Y_size = ScratchView1D ::shmem_size (n_harmonics);
282
+ int g_size = ScratchView2D ::shmem_size (n_max, 4 );
283
+ int Y_size = ScratchView2D ::shmem_size (n_harmonics, 4 );
300
284
auto policy = Kokkos::TeamPolicy<DeviceType, TagSingleBond>(batch_size, SINGLE_BOND_TEAM_SIZE, vector_length).set_scratch_size (
301
285
0 , Kokkos::PerThread (g_size + Y_size));
302
286
Kokkos::deep_copy (single_bond, 0.0 );
287
+ // Kokkos::deep_copy(single_bond_grad, 0.0);
303
288
Kokkos::parallel_for (" FLARE: single bond" ,
304
289
policy,
305
290
*this
@@ -343,15 +328,11 @@ void PairFLAREKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
343
328
);
344
329
345
330
// compute partial forces
346
- g_size = ScratchView2D::shmem_size (4 , n_max);
347
- Y_size = ScratchView2D::shmem_size (4 , n_harmonics);
348
331
int u_size = ScratchView2D::shmem_size (n_radial, n_harmonics);
349
332
Kokkos::parallel_for (" FLARE: partial forces" ,
350
333
Kokkos::TeamPolicy<DeviceType, TagF>(batch_size, TEAM_SIZE, vector_length).set_scratch_size (
351
334
0 , Kokkos::PerTeam (u_size)
352
- )/* .set_scratch_size(
353
- 0, Kokkos::PerThread(g_size + Y_size)
354
- )*/ ,
335
+ ),
355
336
*this
356
337
);
357
338
@@ -430,113 +411,63 @@ void PairFLAREKokkos<DeviceType>::operator()(const int ii, const int jj) const {
430
411
431
412
template <class DeviceType >
432
413
KOKKOS_INLINE_FUNCTION
433
- void PairFLAREKokkos<DeviceType>::operator ()(TagTransposeRY , const MemberType team_member) const {
414
+ void PairFLAREKokkos<DeviceType>::operator ()(TagSingleBond , const MemberType team_member) const {
434
415
int ii = team_member.league_rank ();
435
416
436
417
const int jnum = d_numneigh_short (ii);
437
418
438
- ScratchView3D gscratch (team_member.team_scratch (0 ), 4 , n_max, max_neighs );
439
- ScratchView3D Yscratch (team_member.team_scratch (0 ), 4 , n_harmonics, max_neighs );
419
+ ScratchView2D gscratch (team_member.thread_scratch (0 ), 4 , n_max);
420
+ ScratchView2D Yscratch (team_member.thread_scratch (0 ), 4 , n_harmonics);
440
421
422
+ Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, jnum), [&] (int jj){
441
423
442
- Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, 4 *n_max), [&] (int nc){
443
- Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, jnum), [&] (int jj){
444
- // int n = nc / 4;
445
- // int c = nc -4*n;
446
- int c = nc / n_max;
447
- int n = nc - c*n_max;
448
- gscratch (c, n, jj) = g (ii, jj, n, c);
449
- });
450
- });
451
- Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, 4 *n_harmonics), [&] (int lmc){
452
- Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, jnum), [&] (int jj){
453
- // int lm = lmc / 4;
454
- // int c = lmc - 4 * lm;
455
- int c = lmc / n_harmonics;
456
- int lm = lmc - c*n_harmonics;
457
- Yscratch (c, lm, jj) = Y (ii, jj, lm, c);
458
- });
459
- });
460
- team_member.team_barrier ();
424
+ int j = d_neighbors_short (ii,jj);
425
+ j &= NEIGHMASK;
426
+ int s = type[j] - 1 ;
461
427
462
- Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, jnum), [&] (int jj){
463
428
464
429
Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, 4 *n_max), [&] (int nc){
465
430
// int n = nc / 4;
466
431
// int c = nc -4*n;
467
432
int c = nc / n_max;
468
433
int n = nc - c*n_max;
469
- gT (ii, jj, c, n) = gscratch (c, n, jj );
434
+ gscratch ( c, n) = g_ra (ii, jj, n, c );
470
435
});
471
436
Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, 4 *n_harmonics), [&] (int lmc){
472
437
// int lm = lmc / 4;
473
438
// int c = lmc - 4 * lm;
474
439
int c = lmc / n_harmonics;
475
440
int lm = lmc - c*n_harmonics;
476
- YT (ii, jj, c, lm) = Yscratch (c, lm, jj);
477
- });
478
- });
479
- }
480
-
481
- template <class DeviceType >
482
- KOKKOS_INLINE_FUNCTION
483
- void PairFLAREKokkos<DeviceType>::operator ()(TagSingleBond, const MemberType team_member) const {
484
- int ii = team_member.league_rank ();
485
-
486
- const int jnum = d_numneigh_short (ii);
487
-
488
- ScratchView1D gscratch (team_member.thread_scratch (0 ), n_max);
489
- ScratchView1D Yscratch (team_member.thread_scratch (0 ), n_harmonics);
490
-
491
- Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, jnum), [&] (int jj){
492
-
493
- int j = d_neighbors_short (ii,jj);
494
- j &= NEIGHMASK;
495
- int s = type[j] - 1 ;
496
-
497
-
498
- Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, n_max), [&] (int n){
499
- // int n = nc / 4;
500
- // int c = nc -4*n;
501
- // int c = nc / n_max;
502
- // int n = nc - c*n_max;
503
- gscratch (n) = g (ii, jj, n, 0 );
504
- });
505
- Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, n_harmonics), [&] (int lm){
506
- // int lm = lmc / 4;
507
- // int c = lmc - 4 * lm;
508
- // int c = lmc / n_harmonics;
509
- // int lm = lmc - c*n_harmonics;
510
- Yscratch (lm) = Y (ii, jj, lm, 0 );
441
+ Yscratch (c, lm) = Y_ra (ii, jj, lm, c);
511
442
});
512
443
513
444
Kokkos::parallel_for (Kokkos::ThreadVectorRange (team_member, n_max*n_harmonics), [&] (int nlm){
514
445
int n = nlm / n_harmonics;
515
446
int lm = nlm - n_harmonics*n;
516
447
517
448
int radial_index = s*n_max + n;
518
- double g_val = gscratch (n);
519
- // double gx_val = gscratch(1,n);
520
- // double gy_val = gscratch(2,n);
521
- // double gz_val = gscratch(3,n);
449
+ double g_val = gscratch (0 , n);
450
+ double gx_val = gscratch (1 ,n);
451
+ double gy_val = gscratch (2 ,n);
452
+ double gz_val = gscratch (3 ,n);
522
453
523
454
524
- double h_val = Yscratch (lm);
525
- // double hx_val = Yscratch(1,lm);
526
- // double hy_val = Yscratch(2,lm);
527
- // double hz_val = Yscratch(3,lm);
455
+ double h_val = Yscratch (0 , lm);
456
+ double hx_val = Yscratch (1 ,lm);
457
+ double hy_val = Yscratch (2 ,lm);
458
+ double hz_val = Yscratch (3 ,lm);
528
459
529
460
double bond = g_val * h_val;
530
- // double bond_x = gx_val * h_val + g_val * hx_val;
531
- // double bond_y = gy_val * h_val + g_val * hy_val;
532
- // double bond_z = gz_val * h_val + g_val * hz_val;
461
+ double bond_x = gx_val * h_val + g_val * hx_val;
462
+ double bond_y = gy_val * h_val + g_val * hy_val;
463
+ double bond_z = gz_val * h_val + g_val * hz_val;
533
464
534
465
// Update single bond basis arrays.
535
466
Kokkos::atomic_add (&single_bond (ii, radial_index, lm),bond); // TODO: bad?
536
467
537
- // single_bond_grad(ii,jj,0,n,lm) = bond_x;
538
- // single_bond_grad(ii,jj,1,n,lm) = bond_y;
539
- // single_bond_grad(ii,jj,2,n,lm) = bond_z;
468
+ single_bond_grad (ii,jj,0 ,n,lm) = bond_x;
469
+ single_bond_grad (ii,jj,1 ,n,lm) = bond_y;
470
+ single_bond_grad (ii,jj,2 ,n,lm) = bond_z;
540
471
});
541
472
});
542
473
}
@@ -659,9 +590,6 @@ void PairFLAREKokkos<DeviceType>::operator()(TagF, const MemberType team_member)
659
590
const int i = ilist_curr_type[ii+startatom];
660
591
const int jnum = d_numneigh_short (ii);
661
592
662
- // ScratchView2D gscratch(team_member.thread_scratch(0), 4, n_max);
663
- // ScratchView2D Yscratch(team_member.thread_scratch(0), 4, n_harmonics);
664
-
665
593
ScratchView2D uscratch (team_member.team_scratch (0 ), n_radial, n_harmonics);
666
594
Kokkos::parallel_for (Kokkos::TeamVectorRange (team_member, n_bond), [&] (int nlm){
667
595
int n = nlm / n_harmonics;
@@ -683,16 +611,7 @@ void PairFLAREKokkos<DeviceType>::operator()(TagF, const MemberType team_member)
683
611
int n = nlm / n_harmonics;
684
612
int lm = nlm - n*n_harmonics;
685
613
int radial_index = s*n_max + n;
686
-
687
- double gval = g (ii, jj, n, 0 );
688
- double gg = g (ii, jj, n, c+1 );
689
-
690
- double Yval = Y (ii, jj, lm, 0 );
691
- double Yg = Y (ii, jj, lm, c+1 );
692
-
693
- tmp += (gg*Yval + gval*Yg) * uscratch (radial_index, lm);
694
-
695
- // tmp += single_bond_grad(ii, jj, c, n, lm)*uscratch(radial_index, lm);
614
+ tmp += single_bond_grad (ii, jj, c, n, lm)*uscratch (radial_index, lm);
696
615
}, tmp);
697
616
partial_forces (ii,jj,c) = tmp;
698
617
});
0 commit comments