Skip to content

Commit

Permalink
assertion
Browse files Browse the repository at this point in the history
  • Loading branch information
mklarqvist committed Aug 18, 2019
1 parent bc0d45a commit c150197
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 32 deletions.
58 changes: 31 additions & 27 deletions benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ uint64_t* generate_random_data(uint32_t n_bitmaps) {
return mem;
}

#if !defined(__clang__) && !defined(_MSC_VER)
__attribute__((optimize("no-tree-vectorize")))
#endif
uint64_t popcount_scalar_naive_nosimd(const uint8_t* data, size_t len) {
uint64_t total = 0;
// for (int i = 0; i < len; ++i) {
// total += STORM_popcount64(data1[i] & data2[i]);
// }
// assert(len % 8 == 0);

for (int j = 0; j < len; j += 8) {
// total += STORM_popcount64(data[i]);
// diff = data1[i] & data2[i];
total += STORM_popcnt_lookup8bit[data[j+0]];
total += STORM_popcnt_lookup8bit[data[j+1]];
total += STORM_popcnt_lookup8bit[data[j+2]];
total += STORM_popcnt_lookup8bit[data[j+3]];
total += STORM_popcnt_lookup8bit[data[j+4]];
total += STORM_popcnt_lookup8bit[data[j+5]];
total += STORM_popcnt_lookup8bit[data[j+6]];
total += STORM_popcnt_lookup8bit[data[j+7]];
}

return total;
}

#ifdef __linux__

#include <asm/unistd.h> // for __NR_perf_event_open
Expand Down Expand Up @@ -266,10 +292,14 @@ int linux_popcount_wrapper(std::string name,

unified.start();
// Call argument subroutine pointer.
total += (*f)((uint8_t*)mem1, n_bitmaps*8);
uint64_t a = (*f)((uint8_t*)mem1, n_bitmaps*8);
unified.end(results);
allresults.push_back(results);

uint64_t b = popcount_scalar_naive_nosimd((uint8_t*)mem1, n_bitmaps*8);
assert(a == b);
total += a;

STORM_aligned_free(mem1);
}

Expand Down Expand Up @@ -332,32 +362,6 @@ uint64_t get_cpu_cycles() {
return result;
};

#if !defined(__clang__) && !defined(_MSC_VER)
__attribute__((optimize("no-tree-vectorize")))
#endif
uint64_t popcount_scalar_naive_nosimd(const uint8_t* data, size_t len) {
uint64_t total = 0;
// for (int i = 0; i < len; ++i) {
// total += STORM_popcount64(data1[i] & data2[i]);
// }
// assert(len % 8 == 0);

for (int j = 0; j < len; j += 8) {
// total += STORM_popcount64(data[i]);
// diff = data1[i] & data2[i];
total += STORM_popcnt_lookup8bit[data[j+0]];
total += STORM_popcnt_lookup8bit[data[j+1]];
total += STORM_popcnt_lookup8bit[data[j+2]];
total += STORM_popcnt_lookup8bit[data[j+3]];
total += STORM_popcnt_lookup8bit[data[j+4]];
total += STORM_popcnt_lookup8bit[data[j+5]];
total += STORM_popcnt_lookup8bit[data[j+6]];
total += STORM_popcnt_lookup8bit[data[j+7]];
}

return total;
}

#if !defined(__clang__) && !defined(_MSC_VER)
__attribute__((optimize("no-tree-vectorize")))
#endif
Expand Down
9 changes: 4 additions & 5 deletions libalgebra.h
Original file line number Diff line number Diff line change
Expand Up @@ -2935,10 +2935,9 @@ uint64_t STORM_popcnt_avx512(const uint64_t* data,
const size_t n_ints)
{
uint64_t count = 0;
const uint64_t n_64b = n_ints / 64;
const uint32_t n_cycles = n_64b / 8;
const uint32_t n_cycles_avx2 = (n_64b % 8) / 4;
const uint32_t n_cycles_sse = ((n_64b % 8) % 4) / 2;
const uint32_t n_cycles = n_ints / 8;
const uint32_t n_cycles_avx2 = (n_ints % 8) / 4;
const uint32_t n_cycles_sse = ((n_ints % 8) % 4) / 2;

const __m512i* r1 = (__m512i*)&data[0];
const __m256i* r2 = (__m256i*)&data[n_cycles*8];
Expand All @@ -2948,7 +2947,7 @@ uint64_t STORM_popcnt_avx512(const uint64_t* data,
count += STORM_popcnt_csa_avx2(r2, n_cycles_avx2);
count += STORM_popcnt_csa_sse4(r3, n_cycles_sse);

for (int i = (8*n_cycles + 4*n_cycles + 2*n_cycles_sse); i < n_64b; ++i) {
for (int i = (8*n_cycles + 4*n_cycles + 2*n_cycles_sse); i < n_ints; ++i) {
count += STORM_POPCOUNT(data[i]);
}

Expand Down

0 comments on commit c150197

Please sign in to comment.