Skip to content

Commit

Permalink
added bench/LUT6
Browse files Browse the repository at this point in the history
  • Loading branch information
camel-cdr committed Oct 27, 2024
1 parent 260b1f7 commit 30621e9
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 2 deletions.
101 changes: 101 additions & 0 deletions bench/LUT6.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#if MX_N == 4

.global LUT6_rvv_vloxei8_m4
LUT6_rvv_vloxei8_m4:
vsetvli t0, x0, e8, m4, ta, ma
li t0, 63
vmv.v.x v24, t0
1:
vsetvli a3, a2, e8, m4, ta, ma
vle8.v v8, (a1)
vand.vv v8, v8, v24
vloxei8.v v8, (a0), v8
vse8.v v8, (a1)
sub a2, a2, a3
add a1, a1, a3
bnez a2, 1b
ret

.global LUT6_rvv_vluxei8_m4
LUT6_rvv_vluxei8_m4:
vsetvli t0, x0, e8, m4, ta, ma
li t0, 63
vmv.v.x v24, t0
1:
vsetvli a3, a2, e8, m4, ta, ma
vle8.v v8, (a1)
vand.vv v8, v8, v24
vluxei8.v v8, (a0), v8
vse8.v v8, (a1)
sub a2, a2, a3
add a1, a1, a3
bnez a2, 1b
ret

# a0 = lut, a1 = ptr, a2 = len
.global LUT6_rvv_gather_m4
LUT6_rvv_gather_m4:
li t0, 64
vsetvli zero, t0, e8, m4, ta, ma
vle8.v v0, (a0)

vsetvli t0, x0, e8, m4, ta, ma
li t0, 63
vmv.v.x v24, t0
1:
vsetvli a0, a2, e8, m4, ta, ma
vle8.v v8, (a1)
vand.vv v8, v8, v24
vrgather.vv v16, v0, v8
vse8.v v16, (a1)
sub a2, a2, a0
add a1, a1, a0
bnez a2, 1b
ret

.global LUT6_rvv_m1m2m4_gathers_m4
LUT6_rvv_m1m2m4_gathers_m4:
li t0, 64
vsetvli zero, t0, e8, m4, ta, ma
vle8.v v0, (a0)

vsetvli t0, x0, e8, m4, ta, ma
li t0, 63
vmv.v.x v24, t0

csrr t0, vlenb
srl t0, t0, 4
sltiu t1, t0, 4
sltiu t0, t0, 2
j 0f
1:
vsetvli t1, x0, e8, m1, ta, ma
vrgather.vv v16, v0, v8
vrgather.vv v17, v0, v9
vrgather.vv v18, v0, v10
vrgather.vv v19, v0, v11
8:
vsetvli x0, a0, e8, m4, ta, ma
vse8.v v16, (a1)
sub a2, a2, a0
add a1, a1, a0
beqz a2, 9f
0:
vsetvli a0, a2, e8, m4, ta, ma
vle8.v v8, (a1)
vand.vv v8, v8, v24
beqz t1, 1b
beqz t1, 2f
vrgather.vv v16, v0, v8
j 8b
2:
vsetvli t1, x0, e8, m2, ta, ma
vrgather.vv v16, v0, v8
vrgather.vv v18, v0, v10
j 8b
9:
ret

#endif


58 changes: 58 additions & 0 deletions bench/LUT6.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "bench.h"

void
LUT6_scalar(uint8_t lut[64], uint8_t *ptr, size_t n)
{
for (; n--; ++ptr)
*ptr = lut[*ptr & 63], BENCH_CLOBBER();
}

void
LUT6_scalar_autovec(uint8_t lut[64], uint8_t *ptr, size_t n)
{
for (; n--; ++ptr)
*ptr = lut[*ptr & 63];
}


#define IMPLS(f) \
f(scalar) \
f(scalar_autovec) \
f(rvv_gather_m4) \
f(rvv_m1m2m4_gathers_m4) \
f(rvv_vluxei8_m4) \
f(rvv_vloxei8_m4) \

typedef void Func(uint8_t lut[64], uint8_t *ptr, size_t n);

#define DECLARE(f) extern Func LUT6_##f;
IMPLS(DECLARE)

#define EXTRACT(f) { #f, &LUT6_##f },
Impl impls[] = { IMPLS(EXTRACT) };

uint8_t *ptr;

void init(void) { ptr = (uint8_t*)mem; }

ux checksum(size_t n) {
ux sum = 0;
for (size_t i = 0; i < n; ++i)
sum = uhash(sum) + ptr[i];
return sum;
}

BENCH_BEG(base) {
static uint8_t lut[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"
"+/";
bench_memrand(ptr, n * sizeof *ptr);
TIME f(lut, ptr, n);
} BENCH_END

Bench benches[] = {
BENCH( impls, MAX_MEM, "LUT6", bench_base )
}; BENCH_MAIN(benches)

2 changes: 1 addition & 1 deletion bench/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

include ../config.mk

EXECS=memcpy memset utf8_count strlen mergelines mandelbrot chacha20 poly1305 ascii_to_utf16 ascii_to_utf32 byteswap LUT4
EXECS=memcpy memset utf8_count strlen mergelines mandelbrot chacha20 poly1305 ascii_to_utf16 ascii_to_utf32 byteswap LUT4 LUT6

all: ${EXECS}

Expand Down
2 changes: 1 addition & 1 deletion bench/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ main(void)

size_t x;
randState.x ^= rv_cycles()*7;
randState.y += rv_cycles() ^ (uintptr_t)&x + 666*(uintptr_t)mem;
randState.y += rv_cycles() ^ ((uintptr_t)&x + 666*(uintptr_t)mem);

/* initialize memory */
bench_memrand(mem, MAX_MEM);
Expand Down

0 comments on commit 30621e9

Please sign in to comment.