From 73c77722f09439f390b0de74cee4a50e1daac451 Mon Sep 17 00:00:00 2001 From: HyukWoo Park Date: Thu, 13 Jul 2023 20:49:52 +0900 Subject: [PATCH] Update SIMD replace lane instructions Signed-off-by: HyukWoo Park --- src/interpreter/ByteCode.h | 68 ++++++++++++++++++++++++++------- src/interpreter/Interpreter.cpp | 19 ++++++++- src/parser/WASMParser.cpp | 35 +++++++++++------ 3 files changed, 95 insertions(+), 27 deletions(-) diff --git a/src/interpreter/ByteCode.h b/src/interpreter/ByteCode.h index fcb2decf5..907d5be67 100644 --- a/src/interpreter/ByteCode.h +++ b/src/interpreter/ByteCode.h @@ -473,16 +473,24 @@ class FunctionType; F(V128Store32Lane, uint32_t) \ F(V128Store64Lane, uint64_t) -#define FOR_EACH_BYTECODE_SIMD_LANE_OP(F) \ - F(I8X16ExtractLaneS, int8_t, int32_t) \ - F(I8X16ExtractLaneU, uint8_t, uint32_t) \ - F(I16X8ExtractLaneS, int16_t, int32_t) \ - F(I16X8ExtractLaneU, uint16_t, uint32_t) \ - F(I32X4ExtractLane, int32_t, uint32_t) \ - F(I64X2ExtractLane, uint64_t, uint64_t) \ - F(F32X4ExtractLane, float, float) \ +#define FOR_EACH_BYTECODE_SIMD_EXTRACT_LANE_OP(F) \ + F(I8X16ExtractLaneS, int8_t, int32_t) \ + F(I8X16ExtractLaneU, uint8_t, uint32_t) \ + F(I16X8ExtractLaneS, int16_t, int32_t) \ + F(I16X8ExtractLaneU, uint16_t, uint32_t) \ + F(I32X4ExtractLane, int32_t, uint32_t) \ + F(I64X2ExtractLane, uint64_t, uint64_t) \ + F(F32X4ExtractLane, float, float) \ F(F64X2ExtractLane, double, double) +#define FOR_EACH_BYTECODE_SIMD_REPLACE_LANE_OP(F) \ + F(I8X16ReplaceLane, uint32_t, uint8_t) \ + F(I16X8ReplaceLane, uint32_t, uint16_t) \ + F(I32X4ReplaceLane, uint32_t, uint32_t) \ + F(I64X2ReplaceLane, uint64_t, uint64_t) \ + F(F32X4ReplaceLane, float, float) \ + F(F64X2ReplaceLane, double, double) + #define FOR_EACH_BYTECODE_SIMD_ETC_OP(F) \ F(V128BitSelect) \ F(V128Load32Zero) \ @@ -505,7 +513,8 @@ class FunctionType; FOR_EACH_BYTECODE_SIMD_LOAD_EXTEND_OP(F) \ FOR_EACH_BYTECODE_SIMD_LOAD_LANE_OP(F) \ FOR_EACH_BYTECODE_SIMD_STORE_LANE_OP(F) \ - FOR_EACH_BYTECODE_SIMD_LANE_OP(F) \ + FOR_EACH_BYTECODE_SIMD_EXTRACT_LANE_OP(F) \ + FOR_EACH_BYTECODE_SIMD_REPLACE_LANE_OP(F) \ FOR_EACH_BYTECODE_SIMD_ETC_OP(F) class ByteCode { @@ -1545,16 +1554,16 @@ class SIMDMemoryStore : public ByteCode { #if !defined(NDEBUG) -#define DEFINE_SIMD_LANE_BYTECODE_DUMP(name) \ +#define DEFINE_SIMD_EXTRACT_LANE_BYTECODE_DUMP(name) \ void dump(size_t pos) \ { \ printf(#name " idx: %" PRIu32 " src: %" PRIu32 " dst: %" PRIu32, (uint32_t)m_index, (uint32_t)m_srcOffset, (uint32_t)m_dstOffset); \ } #else -#define DEFINE_SIMD_LANE_BYTECODE_DUMP(name) +#define DEFINE_SIMD_EXTRACT_LANE_BYTECODE_DUMP(name) #endif -#define DEFINE_SIMD_LANE_BYTECODE(name, ...) \ +#define DEFINE_SIMD_EXTRACT_LANE_BYTECODE(name, ...) \ class name : public ByteCode { \ public: \ name(uint8_t index, ByteCodeStackOffset src, ByteCodeStackOffset dst) \ @@ -1567,13 +1576,43 @@ class SIMDMemoryStore : public ByteCode { uint32_t index() const { return m_index; } \ ByteCodeStackOffset srcOffset() const { return m_srcOffset; } \ ByteCodeStackOffset dstOffset() const { return m_dstOffset; } \ - DEFINE_SIMD_LANE_BYTECODE_DUMP(name) \ + DEFINE_SIMD_EXTRACT_LANE_BYTECODE_DUMP(name) \ protected: \ uint8_t m_index; \ ByteCodeStackOffset m_srcOffset; \ ByteCodeStackOffset m_dstOffset; \ }; +#if !defined(NDEBUG) +#define DEFINE_SIMD_REPLACE_LANE_BYTECODE_DUMP(name) \ + void dump(size_t pos) \ + { \ + printf(#name " idx: %" PRIu32 " src0: %" PRIu32 " src1: %" PRIu32 " dst: %" PRIu32, (uint32_t)m_index, (uint32_t)m_srcOffsets[0], (uint32_t)m_srcOffsets[1], (uint32_t)m_dstOffset); \ + } +#else +#define DEFINE_SIMD_REPLACE_LANE_BYTECODE_DUMP(name) +#endif + +#define DEFINE_SIMD_REPLACE_LANE_BYTECODE(name, ...) \ + class name : public ByteCode { \ + public: \ + name(uint8_t index, ByteCodeStackOffset src0, ByteCodeStackOffset src1, ByteCodeStackOffset dst) \ + : ByteCode(Opcode::name##Opcode) \ + , m_index(index) \ + , m_srcOffsets{ src0, src1 } \ + , m_dstOffset(dst) \ + { \ + } \ + uint32_t index() const { return m_index; } \ + const ByteCodeStackOffset* srcOffsets() const { return m_srcOffsets; } \ + ByteCodeStackOffset dstOffset() const { return m_dstOffset; } \ + DEFINE_SIMD_REPLACE_LANE_BYTECODE_DUMP(name) \ + protected: \ + uint8_t m_index; \ + ByteCodeStackOffset m_srcOffsets[2]; \ + ByteCodeStackOffset m_dstOffset; \ + }; + FOR_EACH_BYTECODE_LOAD_OP(DEFINE_LOAD_BYTECODE) FOR_EACH_BYTECODE_STORE_OP(DEFINE_STORE_BYTECODE) @@ -1581,7 +1620,8 @@ FOR_EACH_BYTECODE_SIMD_LOAD_SPLAT_OP(DEFINE_LOAD_BYTECODE) FOR_EACH_BYTECODE_SIMD_LOAD_EXTEND_OP(DEFINE_LOAD_BYTECODE) FOR_EACH_BYTECODE_SIMD_LOAD_LANE_OP(DEFINE_SIMD_LOAD_LANE_BYTECODE) FOR_EACH_BYTECODE_SIMD_STORE_LANE_OP(DEFINE_SIMD_STORE_LANE_BYTECODE) -FOR_EACH_BYTECODE_SIMD_LANE_OP(DEFINE_SIMD_LANE_BYTECODE) +FOR_EACH_BYTECODE_SIMD_EXTRACT_LANE_OP(DEFINE_SIMD_EXTRACT_LANE_BYTECODE) +FOR_EACH_BYTECODE_SIMD_REPLACE_LANE_OP(DEFINE_SIMD_REPLACE_LANE_BYTECODE) #undef DEFINE_LOAD_BYTECODE_DUMP #undef DEFINE_LOAD_BYTECODE #undef DEFINE_STORE_BYTECODE_DUMP diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 1e8503fde..20b7f0f10 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -691,7 +691,7 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, NEXT_INSTRUCTION(); \ } -#define SIMD_LANE_OPERATION(opcodeName, readType, writeType) \ +#define SIMD_EXTRACT_LANE_OPERATION(opcodeName, readType, writeType) \ DEFINE_OPCODE(opcodeName) \ : \ { \ @@ -703,6 +703,20 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, NEXT_INSTRUCTION(); \ } +#define SIMD_REPLACE_LANE_OPERATION(opcodeName, readType, writeType) \ + DEFINE_OPCODE(opcodeName) \ + : \ + { \ + using ResultType = typename SIMDType::Type; \ + opcodeName* code = (opcodeName*)programCounter; \ + auto val = readValue(bp, code->srcOffsets()[1]); \ + ResultType result = readValue(bp, code->srcOffsets()[0]); \ + result[code->index()] = val; \ + writeValue(bp, code->dstOffset(), result); \ + ADD_PROGRAM_COUNTER(opcodeName); \ + NEXT_INSTRUCTION(); \ + } + #if defined(WALRUS_ENABLE_COMPUTED_GOTO) #if defined(WALRUS_COMPUTED_GOTO_INTERPRETER_INIT_WITH_NULL) if (UNLIKELY((((ByteCode*)programCounter)->m_opcodeInAddress) == NULL)) { @@ -983,7 +997,8 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, FOR_EACH_BYTECODE_SIMD_LOAD_EXTEND_OP(SIMD_MEMORY_LOAD_EXTEND_OPERATION) FOR_EACH_BYTECODE_SIMD_LOAD_LANE_OP(SIMD_MEMORY_LOAD_LANE_OPERATION) FOR_EACH_BYTECODE_SIMD_STORE_LANE_OP(SIMD_MEMORY_STORE_LANE_OPERATION) - FOR_EACH_BYTECODE_SIMD_LANE_OP(SIMD_LANE_OPERATION) + FOR_EACH_BYTECODE_SIMD_EXTRACT_LANE_OP(SIMD_EXTRACT_LANE_OPERATION) + FOR_EACH_BYTECODE_SIMD_REPLACE_LANE_OP(SIMD_REPLACE_LANE_OPERATION) // FOR_EACH_BYTECODE_SIMD_ETC_OP DEFINE_OPCODE(V128BitSelect) diff --git a/src/parser/WASMParser.cpp b/src/parser/WASMParser.cpp index 552de7d4d..84cb4a15d 100644 --- a/src/parser/WASMParser.cpp +++ b/src/parser/WASMParser.cpp @@ -1864,18 +1864,31 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { virtual void OnSimdLaneOpExpr(int opcode, uint64_t value) override { auto code = static_cast(opcode); - ASSERT(peekVMStackSize() == Walrus::valueSizeInStack(toValueKind(Type::V128))); - auto src = popVMStack(); - auto dst = pushVMStack(WASMCodeInfo::codeTypeToMemorySize(g_wasmCodeInfo[opcode].m_resultType)); switch (code) { -#define GENERATE_SIMD_LANE_CODE_CASE(name, ...) \ - case WASMOpcode::name##Opcode: { \ - pushByteCode(Walrus::name(static_cast(value), src, dst), code); \ - break; \ - } - - FOR_EACH_BYTECODE_SIMD_LANE_OP(GENERATE_SIMD_LANE_CODE_CASE) -#undef GENERATE_SIMD_LANE_CODE_CASE +#define GENERATE_SIMD_EXTRACT_LANE_CODE_CASE(name, ...) \ + case WASMOpcode::name##Opcode: { \ + ASSERT(WASMCodeInfo::codeTypeToMemorySize(g_wasmCodeInfo[opcode].m_paramTypes[0]) == peekVMStackSize()); \ + auto src = popVMStack(); \ + auto dst = pushVMStack(WASMCodeInfo::codeTypeToMemorySize(g_wasmCodeInfo[opcode].m_resultType)); \ + pushByteCode(Walrus::name(static_cast(value), src, dst), code); \ + break; \ + } + +#define GENERATE_SIMD_REPLACE_LANE_CODE_CASE(name, ...) \ + case WASMOpcode::name##Opcode: { \ + ASSERT(WASMCodeInfo::codeTypeToMemorySize(g_wasmCodeInfo[opcode].m_paramTypes[1]) == peekVMStackSize()); \ + auto src1 = popVMStack(); \ + ASSERT(WASMCodeInfo::codeTypeToMemorySize(g_wasmCodeInfo[opcode].m_paramTypes[0]) == peekVMStackSize()); \ + auto src0 = popVMStack(); \ + auto dst = pushVMStack(WASMCodeInfo::codeTypeToMemorySize(g_wasmCodeInfo[opcode].m_resultType)); \ + pushByteCode(Walrus::name(static_cast(value), src0, src1, dst), code); \ + break; \ + } + + FOR_EACH_BYTECODE_SIMD_EXTRACT_LANE_OP(GENERATE_SIMD_EXTRACT_LANE_CODE_CASE) + FOR_EACH_BYTECODE_SIMD_REPLACE_LANE_OP(GENERATE_SIMD_REPLACE_LANE_CODE_CASE) +#undef GENERATE_SIMD_EXTRACT_LANE_CODE_CASE +#undef GENERATE_SIMD_REPLACE_LANE_CODE_CASE default: ASSERT_NOT_REACHED(); break;