diff --git a/sim/simx/Makefile b/sim/simx/Makefile index d3e726bbe..83054edc4 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -17,12 +17,12 @@ CXXFLAGS += $(CONFIGS) LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) - SRCS += $(SRC_DIR)/execute_v.cpp + SRCS += $(SRC_DIR)/vpu.cpp endif # Debugging diff --git a/sim/simx/arch.h b/sim/simx/arch.h index d68345db6..6becf5c91 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -29,7 +29,6 @@ class Arch { uint16_t num_cores_; uint16_t num_clusters_; uint16_t socket_size_; - uint16_t vsize_; uint16_t num_barriers_; uint64_t local_mem_base_; @@ -40,7 +39,6 @@ class Arch { , num_cores_(num_cores) , num_clusters_(NUM_CLUSTERS) , socket_size_(SOCKET_SIZE) - , vsize_(VLEN / 8) , num_barriers_(NUM_BARRIERS) , local_mem_base_(LMEM_BASE_ADDR) {} @@ -73,10 +71,6 @@ class Arch { return socket_size_; } - uint16_t vsize() const { - return vsize_; - } - }; } \ No newline at end of file diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index a4c0bb2ad..b57893daa 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -453,6 +453,7 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) { if (sep++ != 0) { os << ", "; } else { os << " "; } os << "0x" << std::hex << instr.getImm() << std::dec; } +#ifdef EXT_V_ENABLE if (instr.getOpcode() == Opcode::SYS && instr.getFunc3() >= 5) { // CSRs with immediate values if (sep++ != 0) { os << ", "; } else { os << " "; } @@ -462,6 +463,7 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) { if (instr.getVattrMask() != 0) { print_vec_attr(os, instr); } +#endif return os; } } diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index b834a87f2..7abec98c5 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -33,7 +33,9 @@ using namespace vortex; Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) - , vreg_file(MAX_NUM_REGS, std::vector(arch.vsize())) +#ifdef EXT_V_ENABLE + , vreg_file(MAX_NUM_REGS, std::vector(MAX_NUM_REGS)) +#endif , uuid(0) {} @@ -43,9 +45,11 @@ void Emulator::warp_t::clear(uint64_t startup_addr) { this->uuid = 0; this->fcsr = 0; +#ifdef EXT_V_ENABLE this->vtype = {0, 0, 0, 0, 0}; this->vl = 0; this->vlmax = 0; +#endif for (auto& reg_file : this->ireg_file) { for (auto& reg : reg_file) { @@ -68,6 +72,7 @@ void Emulator::warp_t::clear(uint64_t startup_addr) { } } +#ifdef EXT_V_ENABLE for (auto& reg_file : this->vreg_file) { for (auto& reg : reg_file) { #ifndef NDEBUG @@ -77,16 +82,7 @@ void Emulator::warp_t::clear(uint64_t startup_addr) { #endif } } - - for (auto& reg_file : this->vreg_file) { - for (auto& reg : reg_file) { - #ifndef NDEBUG - reg = 0; - #else - reg = std::rand(); - #endif - } - } +#endif } /////////////////////////////////////////////////////////////////////////////// @@ -102,13 +98,17 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) // considered to be big enough to hold input tiles for one output tile. // In future versions, scratchpad size should be fixed to an appropriate value. , scratchpad(std::vector(32 * 32 * 32768)) + #ifdef EXT_V_ENABLE , csrs_(arch.num_warps()) + #endif { std::srand(50); +#ifdef EXT_V_ENABLE for (uint32_t i = 0; i < arch_.num_warps(); ++i) { csrs_.at(i).resize(arch.num_threads()); } +#endif this->clear(); } @@ -490,6 +490,7 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_FRM: return (warps_.at(wid).fcsr >> 5); case VX_CSR_FCSR: return warps_.at(wid).fcsr; +#ifdef EXT_V_ENABLE // Vector CRSs case VX_CSR_VSTART: return csrs_.at(wid).at(tid)[VX_CSR_VSTART]; @@ -514,6 +515,7 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { return csrs_.at(wid).at(tid)[VX_CSR_VTIME]; case VX_CSR_VINSTRET: return csrs_.at(wid).at(tid)[VX_CSR_VINSTRET]; +#endif case VX_CSR_MHARTID: return (core_->id() * arch_.num_warps() + wid) * arch_.num_threads() + tid; case VX_CSR_THREAD_ID: return tid; @@ -631,6 +633,7 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { csr_mscratch_ = value; break; +#ifdef EXT_V_ENABLE // Vector CRSs case VX_CSR_VSTART: csrs_.at(wid).at(tid)[VX_CSR_VSTART] = value; @@ -652,6 +655,7 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { csrs_.at(wid).at(tid)[VX_CSR_VTYPE] = value; break; case VX_CSR_VLENB: // read only, set to VLEN / 8 +#endif case VX_CSR_SATP: #ifdef VM_ENABLE diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 436d43486..aae018fc5 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -25,7 +25,9 @@ #include "emulator.h" #include "instr.h" #include "core.h" +#ifdef EXT_V_ENABLE #include "processor_impl.h" +#endif #include "VX_types.h" using namespace vortex; @@ -117,8 +119,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } DPN(2, "}" << std::endl); break; + #ifdef EXT_V_ENABLE case RegType::Vector: break; + #endif default: break; } @@ -707,11 +711,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } } rd_write = true; - } else { - #ifdef EXT_V_ENABLE + } + #ifdef EXT_V_ENABLE + else { this->loadVector(instr, wid, rsdata); - #endif } + #endif break; } case Opcode::S: @@ -744,11 +749,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::abort(); } } - } else { - #ifdef EXT_V_ENABLE + } + #ifdef EXT_V_ENABLE + else { this->storeVector(instr, wid, rsdata); - #endif } + #endif break; } case Opcode::AMO: { @@ -932,7 +938,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; - uint32_t frm = (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, t, wid) : func3; + uint32_t frm = this->get_fpu_rm(func3, t, wid); uint32_t fflags = 0; switch (func7) { case 0x00: { // RV32F: FADD.S @@ -1247,10 +1253,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } } - if (fflags) { - this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); - this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); - } + this->update_fcrs(fflags, t, wid); } rd_write = true; break; @@ -1304,10 +1307,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { default: break; } - if (fflags) { - this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); - this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); - } + this->update_fcrs(fflags, t, wid); } rd_write = true; break; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 3df8b0e1a..d6ed15a25 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -120,7 +120,9 @@ int main(int argc, char **argv) { #endif // run simulation // vector test exitcode is a special case + #ifdef EXT_V_ENABLE if (vector_test) return processor.run(); + #endif // else continue as normal processor.run(); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 96fc49df9..a11351d03 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -127,7 +127,9 @@ int ProcessorImpl::run() { done = false; continue; } + #ifdef EXT_V_ENABLE exitcode |= cluster->get_exitcode(); + #endif } perf_mem_latency_ += perf_mem_pending_reads_; } while (!done); diff --git a/sim/simx/execute_v.cpp b/sim/simx/vpu.cpp similarity index 55% rename from sim/simx/execute_v.cpp rename to sim/simx/vpu.cpp index d14338024..3a70560ec 100644 --- a/sim/simx/execute_v.cpp +++ b/sim/simx/vpu.cpp @@ -1,7 +1,8 @@ // This is a fork of https://github.com/troibe/vortex/tree/simx-v2-vector -// The purpose of this fork is to make the simx-v2-vector up to date with master +// The purpose of this fork is to make simx-v2-vector up to date with master // Thanks to Troibe for his amazing work +#ifdef EXT_V_ENABLE #include "emulator.h" #include "instr.h" #include "processor_impl.h" @@ -10,2397 +11,10 @@ #include #include #include +#include "vpu.h" using namespace vortex; -template -class Add { -public: - static R apply(T first, T second, R) { - return (R)first + (R)second; - } - static std::string name() { return "Add"; } -}; - -template -class Sub { -public: - static R apply(T first, T second, R) { - return (R)second - (R)first; - } - static std::string name() { return "Sub"; } -}; - -template -class Adc { -public: - static R apply(T first, T second, R third) { - return (R)first + (R)second + third; - } - static std::string name() { return "Adc"; } -}; - -template -class Madc { -public: - static R apply(T first, T second, R third) { - return ((R)first + (R)second + third) > (R)std::numeric_limits::max(); - } - static std::string name() { return "Madc"; } -}; - -template -class Sbc { -public: - static R apply(T first, T second, R third) { - return (R)second - (R)first - third; - } - static std::string name() { return "Sbc"; } -}; - -template -class Msbc { -public: - static R apply(T first, T second, R third) { - return (R)second < ((R)first + third); - } - static std::string name() { return "Msbc"; } -}; - -template -class Ssub { -public: - static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { - // rounding mode is not relevant for this operation - T unclippedResult = second - first; - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Ssub"; } -}; - -template -class Ssubu { -public: - static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { - // rounding mode is not relevant for this operation - if (first > second) { - vxsat_ = true; - return 0; - } else { - vxsat_ = false; - return second - first; - } - } - static std::string name() { return "Ssubu"; } -}; - -template -class Sadd { -public: - static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { - // rounding mode is not relevant for this operation - T unclippedResult = second + first; - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Sadd"; } -}; - -template -class Rsub { -public: - static R apply(T first, T second, R) { - return first - second; - } - static std::string name() { return "Rsub"; } -}; - -template -class Div { -public: - static R apply(T first, T second, R) { - // logic taken from scalar div - if (first == 0) { - return -1; - } else if (second == std::numeric_limits::min() && first == T(-1)) { - return second; - } else { - return (R)second / (R)first; - } - } - static std::string name() { return "Div"; } -}; - -template -class Rem { -public: - static R apply(T first, T second, R) { - // logic taken from scalar rem - if (first == 0) { - return second; - } else if (second == std::numeric_limits::min() && first == T(-1)) { - return 0; - } else { - return (R)second % (R)first; - } - } - static std::string name() { return "Rem"; } -}; - -template -class Mul { -public: - static R apply(T first, T second, R) { - return (R)first * (R)second; - } - static std::string name() { return "Mul"; } -}; - -template -class Mulsu { -public: - static R apply(T first, T second, R) { - R first_ext = zext((R)first, (sizeof(T) * 8)); - return first_ext * (R)second; - } - static std::string name() { return "Mulsu"; } -}; - -template -class Mulh { -public: - static R apply(T first, T second, R) { - __int128_t first_ext = sext((__int128_t)first, (sizeof(T) * 8)); - __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); - return (first_ext * second_ext) >> (sizeof(T) * 8); - } - static std::string name() { return "Mulh"; } -}; - -template -class Mulhsu { -public: - static R apply(T first, T second, R) { - __int128_t first_ext = zext((__int128_t)first, (sizeof(T) * 8)); - __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); - return (first_ext * second_ext) >> (sizeof(T) * 8); - } - static std::string name() { return "Mulhsu"; } -}; - -template -class Mulhu { -public: - static R apply(T first, T second, R) { - return ((__uint128_t)first * (__uint128_t)second) >> (sizeof(T) * 8); - } - static std::string name() { return "Mulhu"; } -}; - -template -class Madd { -public: - static R apply(T first, T second, R third) { - return ((R)first * third) + (R)second; - } - static std::string name() { return "Madd"; } -}; - -template -class Nmsac { -public: - static R apply(T first, T second, R third) { - return -((R)first * (R)second) + third; - } - static std::string name() { return "Nmsac"; } -}; - -template -class Macc { -public: - static R apply(T first, T second, R third) { - return ((R)first * (R)second) + third; - } - static std::string name() { return "Macc"; } -}; - -template -class Maccsu { -public: - static R apply(T first, T second, R third) { - R first_ext = sext((R)first, (sizeof(T) * 8)); - R second_ext = zext((R)second, (sizeof(T) * 8)); - return (first_ext * second_ext) + third; - } - static std::string name() { return "Maccsu"; } -}; - -template -class Maccus { -public: - static R apply(T first, T second, R third) { - R first_ext = zext((R)first, (sizeof(T) * 8)); - R second_ext = sext((R)second, (sizeof(T) * 8)); - return (first_ext * second_ext) + third; - } - static std::string name() { return "Maccus"; } -}; - -template -class Nmsub { -public: - static R apply(T first, T second, R third) { - return -((R)first * third) + (R)second; - } - static std::string name() { return "Nmsub"; } -}; - -template -class Min { -public: - static R apply(T first, T second, R) { - return std::min(first, second); - } - static std::string name() { return "Min"; } -}; - -template -class Max { -public: - static R apply(T first, T second, R) { - return std::max(first, second); - } - static std::string name() { return "Max"; } -}; - -template -class And { -public: - static R apply(T first, T second, R) { - return first & second; - } - static std::string name() { return "And"; } -}; - -template -class Or { -public: - static R apply(T first, T second, R) { - return first | second; - } - static std::string name() { return "Or"; } -}; - -template -class Xor { -public: - static R apply(T first, T second, R) { - return first ^ second; - } - static std::string name() { return "Xor"; } -}; - -template -class Sll { -public: - static R apply(T first, T second, R) { - // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. - return second << (first & (sizeof(T) * 8 - 1)); - } - static std::string name() { return "Sll"; } -}; - -template -bool bitAt(T value, R pos, R negOffset) { - R offsetPos = pos - negOffset; - return pos >= negOffset && ((value >> offsetPos) & 0x1); -} - -template -bool anyBitUpTo(T value, R to, R negOffset) { - R offsetTo = to - negOffset; - return to >= negOffset && (value & (((R)1 << (offsetTo + 1)) - 1)); -} - -template -bool roundBit(T value, R shiftDown, uint32_t vxrm) { - switch (vxrm) { - case 0: // round-to-nearest-up - return bitAt(value, shiftDown, (R)1); - case 1: // round-to-nearest-even - return bitAt(value, shiftDown, (R)1) && (anyBitUpTo(value, shiftDown, (R)2) || bitAt(value, shiftDown, (R)0)); - case 2: // round-down (truncate) - return 0; - case 3: // round-to-odd - return !bitAt(value, shiftDown, (R)0) && anyBitUpTo(value, shiftDown, (R)1); - default: - std::cout << "Roundoff - invalid value for vxrm: " << vxrm << std::endl; - std::abort(); - } -} - -template -class SrlSra { -public: - static R apply(T first, T second, R) { - // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. - return second >> (first & (sizeof(T) * 8 - 1)); - } - static R apply(T first, T second, uint32_t vxrm, uint32_t) { - // Saturation is not relevant for this operation - // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. - T firstValid = first & (sizeof(T) * 8 - 1); - return apply(firstValid, second, 0) + roundBit(second, firstValid, vxrm); - } - static std::string name() { return "SrlSra"; } -}; - -template -class Aadd { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t) { - // Saturation is not relevant for this operation - T sum = second + first; - return (sum >> 1) + roundBit(sum, 1, vxrm); - } - static std::string name() { return "Aadd"; } -}; - -template -class Asub { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t) { - // Saturation is not relevant for this operation - T difference = second - first; - return (difference >> 1) + roundBit(difference, 1, vxrm); - } - static std::string name() { return "Asub"; } -}; - -template -class Eq { -public: - static R apply(T first, T second, R) { - return first == second; - } - static std::string name() { return "Eq"; } -}; - -template -class Ne { -public: - static R apply(T first, T second, R) { - return first != second; - } - static std::string name() { return "Ne"; } -}; - -template -class Lt { -public: - static R apply(T first, T second, R) { - return first > second; - } - static std::string name() { return "Lt"; } -}; - -template -class Le { -public: - static R apply(T first, T second, R) { - return first >= second; - } - static std::string name() { return "Le"; } -}; - -template -class Gt { -public: - static R apply(T first, T second, R) { - return first < second; - } - static std::string name() { return "Gt"; } -}; - -template -class AndNot { -public: - static R apply(T first, T second, R) { - return second & ~first; - } - static std::string name() { return "AndNot"; } -}; - -template -class OrNot { -public: - static R apply(T first, T second, R) { - return second | ~first; - } - static std::string name() { return "OrNot"; } -}; - -template -class Nand { -public: - static R apply(T first, T second, R) { - return ~(second & first); - } - static std::string name() { return "Nand"; } -}; - -template -class Mv { -public: - static R apply(T first, T, R) { - return first; - } - static std::string name() { return "Mv"; } -}; - -template -class Nor { -public: - static R apply(T first, T second, R) { - return ~(second | first); - } - static std::string name() { return "Nor"; } -}; - -template -class Xnor { -public: - static R apply(T first, T second, R) { - return ~(second ^ first); - } - static std::string name() { return "Xnor"; } -}; - -template -class Fadd { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fadd_s(first, second, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fadd_d(first_d, second_d, frm, &fflags); - } else { - std::cout << "Fadd only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fadd"; } -}; - -template -class Fsub { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fsub_s(second, first, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fsub_d(second_d, first_d, frm, &fflags); - } else { - std::cout << "Fsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsub"; } -}; - -template -class Fmacc { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fmadd_s(first, second, third, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fmadd_d(first_d, second_d, third, frm, &fflags); - } else { - std::cout << "Fmacc only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmacc"; } -}; - -template -class Fnmacc { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fnmadd_s(first, second, third, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fnmadd_d(first_d, second_d, third, frm, &fflags); - } else { - std::cout << "Fnmacc only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmacc"; } -}; - -template -class Fmsac { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); - } else { - std::cout << "Fmsac only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmsac"; } -}; - -template -class Fnmsac { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fnmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fnmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); - } else { - std::cout << "Fnmsac only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmsac"; } -}; - -template -class Fmadd { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fmacc::apply(first, third, second); - } else { - std::cout << "Fmadd only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmadd"; } -}; - -template -class Fnmadd { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fnmacc::apply(first, third, second); - } else { - std::cout << "Fnmadd only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmadd"; } -}; - -template -class Fmsub { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fmsac::apply(first, third, second); - } else { - std::cout << "Fmsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmsub"; } -}; - -template -class Fnmsub { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fnmsac::apply(first, third, second); - } else { - std::cout << "Fnmsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmsub"; } -}; - -template -class Fmin { -public: - static R apply(T first, T second, R) { - // ignoring rounding modes for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fmin_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_fmin_d(first, second, &fflags); - } else { - std::cout << "Fmin only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmin"; } -}; - -template -class Fmax { -public: - static R apply(T first, T second, R) { - // ignoring rounding modes for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fmax_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_fmax_d(first, second, &fflags); - } else { - std::cout << "Fmax only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmax"; } -}; - -template -class Fsgnj { -public: - static R apply(T first, T second, R) { - if (sizeof(T) == 4) { - return rv_fsgnj_s(second, first); - } else if (sizeof(T) == 8) { - return rv_fsgnj_d(second, first); - } else { - std::cout << "Fsgnj only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsgnj"; } -}; - -template -class Fsgnjn { -public: - static R apply(T first, T second, R) { - if (sizeof(T) == 4) { - return rv_fsgnjn_s(second, first); - } else if (sizeof(T) == 8) { - return rv_fsgnjn_d(second, first); - } else { - std::cout << "Fsgnjn only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsgnjn"; } -}; - -template -class Fsgnjx { -public: - static R apply(T first, T second, R) { - if (sizeof(T) == 4) { - return rv_fsgnjx_s(second, first); - } else if (sizeof(T) == 8) { - return rv_fsgnjx_d(second, first); - } else { - std::cout << "Fsgnjx only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsgnjx"; } -}; - -template -class Fcvt { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - switch (first) { - case 0b00000: // vfcvt.xu.f.v - return rv_ftou_s(second, frm, &fflags); - case 0b00001: // vfcvt.x.f.v - return rv_ftoi_s(second, frm, &fflags); - case 0b00010: // vfcvt.f.xu.v - return rv_utof_s(second, frm, &fflags); - case 0b00011: // vfcvt.f.x.v - return rv_itof_s(second, frm, &fflags); - case 0b00110: // vfcvt.rtz.xu.f.v - return rv_ftou_s(second, 1, &fflags); - case 0b00111: // vfcvt.rtz.x.f.v - return rv_ftoi_s(second, 1, &fflags); - case 0b01000: // vfwcvt.xu.f.v - return rv_ftolu_s(second, frm, &fflags); - case 0b01001: // vfwcvt.x.f.v - return rv_ftol_s(second, frm, &fflags); - case 0b01010: // vfwcvt.f.xu.v - return rv_utof_d(second, frm, &fflags); - case 0b01011: // vfwcvt.f.x.v - return rv_itof_d(second, frm, &fflags); - case 0b01100: // vfwcvt.f.f.v - return rv_ftod(second); - case 0b01110: // vfwcvt.rtz.xu.f.v - return rv_ftolu_s(second, 1, &fflags); - case 0b01111: // vfwcvt.rtz.x.f.v - return rv_ftol_s(second, 1, &fflags); - default: - std::cout << "Fcvt has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else if (sizeof(T) == 8) { - switch (first) { - case 0b00000: // vfcvt.xu.f.v - return rv_ftolu_d(second, frm, &fflags); - case 0b00001: // vfcvt.x.f.v - return rv_ftol_d(second, frm, &fflags); - case 0b00010: // vfcvt.f.xu.v - return rv_lutof_d(second, frm, &fflags); - case 0b00011: // vfcvt.f.x.v - return rv_ltof_d(second, frm, &fflags); - case 0b00110: // vfcvt.rtz.xu.f.v - return rv_ftolu_d(second, 1, &fflags); - case 0b00111: // vfcvt.rtz.x.f.v - return rv_ftol_d(second, 1, &fflags); - case 0b01000: // vfwcvt.xu.f.v - case 0b01001: // vfwcvt.x.f.v - case 0b01010: // vfwcvt.f.xu.v - case 0b01011: // vfwcvt.f.x.v - case 0b01100: // vfwcvt.f.f.v - case 0b01110: // vfwcvt.rtz.xu.f.v - case 0b01111: // vfwcvt.rtz.x.f.v - std::cout << "Fwcvt only supports f32" << std::endl; - std::abort(); - default: - std::cout << "Fcvt has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else { - std::cout << "Fcvt only supports f32 and f64" << std::endl; - std::abort(); - } - } - static R apply(T first, T second, uint32_t vxrm, uint32_t &) { // saturation argument is unused - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 8) { - switch (first) { - case 0b10000: // vfncvt.xu.f.w - return rv_ftou_d(second, vxrm, &fflags); - case 0b10001: // vfncvt.x.f.w - return rv_ftoi_d(second, vxrm, &fflags); - case 0b10010: // vfncvt.f.xu.w - return rv_lutof_s(second, vxrm, &fflags); - case 0b10011: // vfncvt.f.x.w - return rv_ltof_s(second, vxrm, &fflags); - case 0b10100: // vfncvt.f.f.w - return rv_dtof_r(second, vxrm); - case 0b10101: // vfncvt.rod.f.f.w - return rv_dtof_r(second, 6); - case 0b10110: // vfncvt.rtz.xu.f.w - return rv_ftou_d(second, 1, &fflags); - case 0b10111: // vfncvt.rtz.x.f.w - return rv_ftoi_d(second, 1, &fflags); - default: - std::cout << "Fncvt has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else { - std::cout << "Fncvt only supports f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fcvt"; } -}; - -template -class Funary1 { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - switch (first) { - case 0b00000: // vfsqrt.v - return rv_fsqrt_s(second, frm, &fflags); - case 0b00100: // vfrsqrt7.v - return rv_frsqrt7_s(second, frm, &fflags); - case 0b00101: // vfrec7.v - return rv_frecip7_s(second, frm, &fflags); - case 0b10000: // vfclass.v - return rv_fclss_s(second); - default: - std::cout << "Funary1 has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else if (sizeof(T) == 8) { - switch (first) { - case 0b00000: // vfsqrt.v - return rv_fsqrt_d(second, frm, &fflags); - case 0b00100: // vfrsqrt7.v - return rv_frsqrt7_d(second, frm, &fflags); - case 0b00101: // vfrec7.v - return rv_frecip7_d(second, frm, &fflags); - case 0b10000: // vfclass.v - return rv_fclss_d(second); - default: - std::cout << "Funary1 has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else { - std::cout << "Funary1 only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Funary1"; } -}; - -template -class Xunary0 { -public: - static R apply(T, T second, T) { - return second; - } - static std::string name() { return "Xunary0"; } -}; - -template -class Feq { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_feq_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return rv_feq_d(second, first, &fflags); - } else { - std::cout << "Feq only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Feq"; } -}; - -template -class Fle { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fle_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return rv_fle_d(second, first, &fflags); - } else { - std::cout << "Fle only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fle"; } -}; - -template -class Flt { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_flt_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return rv_flt_d(second, first, &fflags); - } else { - std::cout << "Flt only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Flt"; } -}; - -template -class Fne { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return !rv_feq_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return !rv_feq_d(second, first, &fflags); - } else { - std::cout << "Fne only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fne"; } -}; - -template -class Fgt { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_flt_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_flt_d(first, second, &fflags); - } else { - std::cout << "Fgt only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fgt"; } -}; - -template -class Fge { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fle_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_fle_d(first, second, &fflags); - } else { - std::cout << "Fge only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fge"; } -}; - -template -class Fdiv { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - return rv_fdiv_s(second, first, frm, &fflags); - } else if (sizeof(T) == 8) { - return rv_fdiv_d(second, first, frm, &fflags); - } else { - std::cout << "Fdiv only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fdiv"; } -}; - -template -class Frdiv { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - return rv_fdiv_s(first, second, frm, &fflags); - } else if (sizeof(T) == 8) { - return rv_fdiv_d(first, second, frm, &fflags); - } else { - std::cout << "Frdiv only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Frdiv"; } -}; - -template -class Fmul { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fmul_s(first, second, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fmul_d(first_d, second_d, frm, &fflags); - } else { - std::cout << "Fmul only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmul"; } -}; - -template -class Frsub { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - return rv_fsub_s(first, second, frm, &fflags); - } else if (sizeof(T) == 8) { - return rv_fsub_d(first, second, frm, &fflags); - } else { - std::cout << "Frsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Frsub"; } -}; - -template -class Clip { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { - // The low lg2(2*SEW) bits of the vector or scalar shift-amount value (e.g., the low 6 bits for a SEW=64-bit to - // SEW=32-bit narrowing operation) are used to control the right shift amount, which provides the scaling. - R firstValid = first & (sizeof(T) * 8 - 1); - T unclippedResult = (second >> firstValid) + roundBit(second, firstValid, vxrm); - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Clip"; } -}; - -template -class Smul { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { - R shift = sizeof(R) * 8 - 1; - T unshiftedResult = first * second; - T unclippedResult = (unshiftedResult >> shift) + roundBit(unshiftedResult, shift, vxrm); - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Smul"; } -}; - -/////////////////////////////////////////////////////////////////////////////// - -bool isMasked(std::vector> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) { - auto &mask = vreg_file.at(maskVreg); - uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8); - uint8_t value = (emask >> (byteI % 8)) & 0x1; - DP(4, "Masking enabled: " << +!vmask << " mask element: " << +value); - return !vmask && value == 0; -} - -template -uint32_t getVreg(uint32_t baseVreg, uint32_t byteI) { - uint32_t vsew = sizeof(DT) * 8; - return (baseVreg + (byteI / (VLEN / vsew))) % 32; -} - -template -DT &getVregData(std::vector &baseVregVec, uint32_t byteI) { - uint32_t vsew = sizeof(DT) * 8; - return *(DT *)(baseVregVec.data() + (byteI % (VLEN / vsew)) * vsew / 8); -} - -template -DT &getVregData(std::vector> &vreg_file, uint32_t baseVreg, uint32_t byteI) { - auto &vr1 = vreg_file.at(getVreg
(baseVreg, byteI)); - return getVregData
(vr1, byteI); -} - -template -void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - if (nfields * emul > 8) { - std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; - std::abort(); - } - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - uint32_t nfields_strided = strided ? nfields : 1; - Word mem_addr = (base_addr & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); - Word mem_data = 0; - emul_->dcache_read(&mem_data, mem_addr, vsew / 8); - DP(4, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); - DP(4, "Previous data: " << +result); - result = (DT)mem_data; - } -} - -void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - case 16: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - case 32: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - case 64: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VLE for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template -void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - if (nfields * emul > 8) { - std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; - std::abort(); - } - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - Word offset = 0; - switch (iSew) { - case 8: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 16: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 32: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 64: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - default: - std::cout << "Unsupported iSew: " << iSew << std::endl; - std::abort(); - } - - Word mem_addr = (base_addr & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT); - Word mem_data = 0; - emul_->dcache_read(&mem_data, mem_addr, vsew / 8); - DP(4, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); - DP(4, "Previous data: " << +result); - result = (DT)mem_data; - } -} - -void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - case 16: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - case 32: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - case 64: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VLUX/VLOX for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template -void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - uint32_t nfields_strided = strided ? nfields : 1; - Word mem_addr = base_addr + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); - Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); - DP(4, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - emul_->dcache_write(&mem_data, mem_addr, vsew / 8); - } -} - -void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - case 16: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - case 32: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - case 64: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VSE for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template -void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - Word offset = 0; - switch (iSew) { - case 8: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 16: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 32: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 64: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - default: - std::cout << "Unsupported iSew: " << iSew << std::endl; - std::abort(); - } - - Word mem_addr = base_addr + offset + (i % nfields) * sizeof(DT); - Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); - DP(4, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - emul_->dcache_write(&mem_data, mem_addr, vsew / 8); - } -} - -void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - case 16: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - case 32: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - case 64: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VSUX/VSOX for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template