diff --git a/.gitmodules b/.gitmodules index e69de29..cb63c95 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "thirdparty/veclibm"] + path = thirdparty/veclibm + url = https://github.com/rivosinc/veclibm diff --git a/bench/bench.h b/bench/bench.h index af1c839..453f260 100644 --- a/bench/bench.h +++ b/bench/bench.h @@ -15,8 +15,6 @@ # define BENCH_VOLATILE(x) ({__asm volatile("" : "+g"(x) : "g"(x) : "memory");}) # define BENCH_VOLATILE_REG(x) ({__asm volatile("" : "+r"(x) : "r"(x) : "memory");}) # define BENCH_VOLATILE_MEM(x) ({__asm volatile("" : "+m"(x) : "m"(x) : "memory");}) -# define BENCH_FENCE() ({__asm volatile("fence.i");}) - #define BENCH_MAY_ALIAS __attribute__((__may_alias__)) @@ -164,7 +162,7 @@ bench_run(Bench *benches, size_t nBenches) #define TIME \ for (ux beg = rv_cycles(), _once = 1; _once; \ - BENCH_FENCE(), \ + rv_fencei(), \ _cycles += rv_cycles() - beg, _once = 0) #define BENCH_BEG(name) \ diff --git a/nolibc.h b/nolibc.h index 94d4235..4e561a9 100644 --- a/nolibc.h +++ b/nolibc.h @@ -224,6 +224,8 @@ qsort(void *base, size_t len, size_t size, int (*cmp)(const void *, const void * } #endif +static void +rv_fencei() { __asm volatile("fence.i"); } static ux usqrt(ux y) diff --git a/single/Makefile b/single/Makefile new file mode 100644 index 0000000..5738467 --- /dev/null +++ b/single/Makefile @@ -0,0 +1,14 @@ +.POSIX: + +include ../config.mk + +EXECS=veclibm + +all: ${EXECS} + +veclibm: veclibm.c + ${CC} ${CFLAGS} -o $@ $< ../thirdparty/veclibm/src/*.c -I ../thirdparty/veclibm/include -lm -Wno-unused -Wno-maybe-uninitialized + +clean: + rm -f ${EXECS} + diff --git a/single/veclibm.c b/single/veclibm.c new file mode 100644 index 0000000..8278db4 --- /dev/null +++ b/single/veclibm.c @@ -0,0 +1,98 @@ +#include + +#include +#include +#include + +#ifndef N +#define N (1024*128) /* change me */ +#endif + +static void +rvvlm_sqrt(size_t x_len, const double *x, double *y) +{ + for (size_t vl; x_len > 0; x_len -= vl, x += vl, y += vl) { + vl = __riscv_vsetvl_e64m8(x_len); + vfloat64m8_t v = __riscv_vle64_v_f64m8(x, vl); + __riscv_vse64(y, __riscv_vfsqrt(v, vl), vl); + } +} + +#define APPLY(X) \ +X(exp) X(exp2) X(expm1) X(log) X(log10) X(log2) X(log1p) \ +X(sqrt) X(cbrt) \ +X(sin) X(cos) X(tan) X(asin) X(acos) X(atan) \ +X(sinh) X(cosh) X(tanh) X(asinh) X(acosh) X(atanh) \ +X(erf) X(erfc) X(tgamma) X(lgamma) + +#define DECLARE(f) void rvvlm_##f(size_t x_len, const double *x, double *y); +APPLY(DECLARE) + +#define DEFINE(f) \ + static void lm_##f(size_t x_len, const double *x, double *y) { \ + for (size_t i = 0; i < x_len; ++i) y[i] = f(x[i]); \ + } +APPLY(DEFINE) +struct Func { + void (*rvvlm)(size_t, const double*, double*); + void (*lm)(size_t, const double*, double*); + const char *name; +}; + +struct Func funcs[] = { +#define ENTRY(f) { rvvlm_##f, lm_##f, #f }, +APPLY(ENTRY) +}; + +typedef struct { uint64_t x, y, z; } URand; + +/* RomuDuoJr, see https://romu-random.org/ */ +static inline uint64_t +urand(URand *r) +{ +#define ROTL(x,n) (((x) << (n)) | ((x) >> (8*sizeof(x) - (n)))) + uint64_t xp = r->x, yp = r->y, zp = r->z; + r->x = 15241094284759029579u * zp; + r->y = ROTL(yp - xp, 12); + r->z = ROTL(zp - yp, 44); + return xp; +} + + +int +main(void) +{ + double *in = malloc(N*sizeof *in), *out = malloc(N*sizeof *out); + URand r = {123, (uintptr_t)&in, (uintptr_t)&out}; + + for (size_t i = 0; i < N; ++i) + in[i] = (urand(&r) >> (64 - 53)) * (1.0 / (1ull << 53)); + + for (size_t i = 0; i < sizeof funcs / sizeof *funcs; ++i) { + size_t beg, end; + struct Func f = funcs[i]; + printf("%s libm: ", f.name); + for (size_t i = 0; i < 3; ++i) { + __asm volatile("fence.i"); + __asm volatile ("csrr %0, cycle" : "=r"(beg)); + f.lm(N, in, out); + __asm volatile("fence.i"); + __asm volatile ("csrr %0, cycle" : "=r"(end)); + printf(" %f", ((double)N) / (end-beg)); + } + printf(" elements/cycle\n%s rvvlm:", f.name); + for (size_t i = 0; i < 3; ++i) { + __asm volatile("fence.i"); + __asm volatile ("csrr %0, cycle" : "=r"(beg)); + f.rvvlm(N, in, out); + __asm volatile("fence.i"); + __asm volatile ("csrr %0, cycle" : "=r"(end)); + printf(" %f", ((double)N) / (end-beg)); + } + printf(" elements/cycle\n"); + } + free(in); + free(out); + return 0; +} + diff --git a/thirdparty/veclibm b/thirdparty/veclibm new file mode 160000 index 0000000..deba355 --- /dev/null +++ b/thirdparty/veclibm @@ -0,0 +1 @@ +Subproject commit deba3559034ea8363c660620b3163b46aabc6267