Skip to content

Commit

Permalink
Full floating address support for future HeMAiA chip (#37)
Browse files Browse the repository at this point in the history
* Initial Commit

* Refactor address mapping formatting in solder.py

* Fix multithreading compilation

* Bug Fix

* Reduce cva6 executable rule; modify bootaddress

* Bug Fix

* Cacheable / Execution / Region Modification

* Bug Fix

* Fix for non-idempotent region in CVA6

* Add 0xf15 as chiplet ID

* Modify bootrom for chiplet configuration

* Bug Fix

* Bug Fix

* Bootrom replacement

* LD Script Fix

* Bootrom Update

* Bootrom with floating MSB base address support

* Remove bootrom binary

* Add .gitignore

* Remove bootrom sv

* Add gitignore

* Remove bootrom dump
  • Loading branch information
IveanEx authored Sep 15, 2024
1 parent a33a68e commit 15d8d20
Show file tree
Hide file tree
Showing 49 changed files with 443 additions and 1,394 deletions.
62 changes: 29 additions & 33 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,34 @@ CFG_OVERRIDE ?= target/rtl/cfg/occamy_cfg/hemaia.hjson
CFG = $(realpath $(CFG_OVERRIDE))

clean:
make -C ./target/fpga/ clean
make -C ./target/fpga/vivado_ips/ clean
make -C ./target/fpga_chip/hemaia_chip/ clean
make -C ./target/fpga_chip/hemaia_system/ clean
make -C ./target/sim/ clean
make -C ./target/rtl/ clean
make -C ./target/fpga/sw clean
make -C ./target/fpga/bootrom clean
make -C ./target/tapeout clean
$(MAKE) -C ./target/fpga/ clean
$(MAKE) -C ./target/fpga/vivado_ips/ clean
$(MAKE) -C ./target/fpga_chip/hemaia_chip/ clean
$(MAKE) -C ./target/fpga_chip/hemaia_system/ clean
$(MAKE) -C ./target/sim/ clean
$(MAKE) -C ./target/rtl/ clean
$(MAKE) -C ./target/fpga/sw clean
$(MAKE) -C ./target/tapeout clean
rm -rf Bender.lock .bender deps
rm -rf ./target/rtl/src/bender_targets.tmp

# Software Generation
bootrom: # In Occamy Docker
# The bootrom used for simulation (light-weight bootrom)
make -C ./target/sim bootrom CFG_OVERRIDE=$(CFG)
$(MAKE) -C ./target/sim bootrom CFG_OVERRIDE=$(CFG)

# The bootrom used for FPGA protoyping (emulated eeprom, full-functional bootrom)
make -C ./target/fpga/bootrom bootrom CFG_OVERRIDE=$(CFG)

# The bootrom used for tapeout (embedded real rom, full-functional bootrom with different frequency settings)
make -C ./target/rtl/bootrom bootrom CFG_OVERRIDE=$(CFG)
# The bootrom used for tapeout / FPGA prototyping (embedded real rom, full-functional bootrom with different frequency settings)
$(MAKE) -C ./target/rtl/bootrom bootrom CFG_OVERRIDE=$(CFG)

sw: # In Occamy Docker
make -C ./target/sim sw CFG_OVERRIDE=$(CFG)
+$(MAKE) -C ./target/sim sw CFG_OVERRIDE=$(CFG)

# The software from simulation and FPGA prototyping comes from one source.
# If we intend to download the sodtware to FPGA, the bin should be extracted from elf by objcopy in Occamy docker.

# Hardware Generation
rtl: # In SNAX Docker
make -C ./target/rtl/ rtl CFG_OVERRIDE=$(CFG)
$(MAKE) -C ./target/rtl/ rtl CFG_OVERRIDE=$(CFG)

####################
# Tapeout Workflow #
Expand All @@ -54,16 +50,16 @@ tapeout_preparation: rtl tapeout_syn_flist
# Generating filelist per cluster
# Needed for a per-cluster synthesis
tapeout_syn_flist:
make -C ./target/tapeout/ syn_gen_list CFG_OVERRIDE=$(CFG)
$(MAKE) -C ./target/tapeout/ syn_gen_list CFG_OVERRIDE=$(CFG)


#################
# FPGA Workflow #
#################

occamy_system_vivado_preparation: # In SNAX Docker
make -C ./target/fpga/ define_defines_includes_no_simset.tcl
make -C ./target/fpga/vivado_ips/ define-sources.tcl
$(MAKE) -C ./target/fpga/ define_defines_includes_no_simset.tcl
$(MAKE) -C ./target/fpga/vivado_ips/ define-sources.tcl

occamy_ip_vcu128: # In ESAT Server
# debug jtag (put 1 or 0)
Expand All @@ -80,25 +76,25 @@ occamy_system_vcu128_gui: # In ESAT Server
sh -c "cd ./target/fpga/occamy_vcu128_2023/;vivado occamy_vcu128_2023.xpr"

occamy_system_download_sw: # In ESAT Server; this procedure will only inject the bootrom at present; however, it can also inject the software.
make -C ./target/fpga/sw download_sw
$(MAKE) -C ./target/fpga/sw download_sw

open_terminal: # It opens ttyUSB1 without locking it, and set baudrate at 1Mbps
$(info "shell minicom -D /dev/ttyUSB1 -b 1000000 -o")

# FPGA Workflow (with no Xilinx IP - tapeout configuration)
# Please be attention that in this configuration, injecting any binary files by Xilinx Vivado are not possible anymore; please use JTAG or embedded bootrom to load the binary
hemaia_system_vivado_preparation: # In SNAX Docker
make -C ./target/fpga_chip/hemaia_system/ define_defines_includes_no_simset.tcl
make -C ./target/fpga_chip/hemaia_chip/ define-sources.tcl
$(MAKE) -C ./target/fpga_chip/hemaia_system/ define_defines_includes_no_simset.tcl
$(MAKE) -C ./target/fpga_chip/hemaia_chip/ define-sources.tcl

hemaia_chip_vivado: # In ESAT Server
make -C ./target/fpga_chip/hemaia_chip hemaia_chip
$(MAKE) -C ./target/fpga_chip/hemaia_chip hemaia_chip

hemaia_chip_vivado_gui: # In ESAT Server
sh -c "cd ./target/fpga/fpga_chip/hemaia_chip/hemaia_chip/;vivado hemaia_chip.xpr"

hemaia_system_vivado: hemaia_chip_vivado # In ESAT Server
make -C ./target/fpga_chip/hemaia_system hemaia_system
$(MAKE) -C ./target/fpga_chip/hemaia_system hemaia_system

hemaia_system_vcu128_gui: # In ESAT Server
sh -c "cd ./target/fpga_chip/hemaia_system/hemaia_system_vcu128/;vivado hemaia_system_vcu128.xpr"
Expand All @@ -108,19 +104,19 @@ hemaia_system_vivado_gui: # In ESAT Server

# Verilator Workflow (not working, many errors comes from AXI)
occamy_system_vlt: # In SNAX Docker
make -C ./target/sim work/lib/libfesvr.a
make -C ./target/sim tb
make -C ./target/sim bin/occamy_top.vlt -j $(shell nproc)

+$(MAKE) -C ./target/sim work/lib/libfesvr.a
+$(MAKE) -C ./target/sim tb
+$(MAKE) -C ./target/sim bin/occamy_top.vlt

# Questasim Workflow
occamy_system_vsim_preparation: # In SNAX Docker
make -C ./target/sim work/lib/libfesvr.a
make -C ./target/sim tb
make -C ./target/sim work-vsim/compile.vsim.tcl
$(MAKE) -C ./target/sim work/lib/libfesvr.a
$(MAKE) -C ./target/sim tb
$(MAKE) -C ./target/sim work-vsim/compile.vsim.tcl

occamy_system_vsim: # In ESAT Server
make -C ./target/sim bin/occamy_top.vsim

$(MAKE) -C ./target/sim bin/occamy_top.vsim

debug-info:
@echo "CFG_OVERRIDE: $(CFG_OVERRIDE)"
Expand Down
16 changes: 15 additions & 1 deletion hw/occamy/occamy_chip.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import ${name}_pkg::*;
/// Real-time clock (for time keeping)
input logic rtc_i,
input logic test_mode_i,
input logic [1:0] chip_id_i,
input chip_id_t chip_id_i,
input logic [1:0] boot_mode_i,
// `uart` Interface
output logic uart_tx_o,
Expand Down Expand Up @@ -133,11 +133,25 @@ import ${name}_pkg::*;
assign bootrom_rsp.ready = '1;
assign bootrom_rsp.error = '0;

///////////////////
// Chip ID Latch //
///////////////////

// The latched chip_id
chip_id_t chip_id;

always_latch begin
if (~rst_ni) begin
chip_id <= chip_id_i;
end
end

///////////////////
// Occamy Top //
///////////////////

${name}_top i_${name} (
.chip_id_i (chip_id),
.bootrom_req_o (bootrom_axi_lite_req),
.bootrom_rsp_i (bootrom_axi_lite_rsp),
.ext_irq_i (ext_irq_i),
Expand Down
9 changes: 6 additions & 3 deletions hw/occamy/occamy_cva6.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
module ${name}_cva6 import ${name}_pkg::*; (
input logic clk_i,
input logic rst_ni,
input chip_id_t chip_id_i,
input logic [1:0] irq_i,
input logic ipi_i,
input logic time_irq_i,
Expand All @@ -25,6 +26,7 @@ module ${name}_cva6 import ${name}_pkg::*; (
assign cva6_axi_cut_rsp = axi_resp_i;

// TODO(zarubaf): Derive from system parameters
// Becareful of address overflow problem: As for now the system only has the 40bit address region, so base + size should be smaller than the maximal addressable region.
localparam ariane_pkg::ariane_cfg_t CVA6OccamyConfig = '{
RASDepth: 2,
BTBEntries: 32,
Expand All @@ -33,10 +35,10 @@ module ${name}_cva6 import ${name}_pkg::*; (
NrNonIdempotentRules: 3,
NonIdempotentAddrBase: {64'd${occamy_cfg["spm_narrow"]["address"]+occamy_cfg["spm_narrow"]["length"]} , 64'd${occamy_cfg["peripherals"]["rom"]["address"]+occamy_cfg["peripherals"]["rom"]["length"]} , 64'h1000},
NonIdempotentLength: {64'd${0x80000000-occamy_cfg["spm_narrow"]["address"]-occamy_cfg["spm_narrow"]["length"]}, 64'd${occamy_cfg["spm_narrow"]["address"]-occamy_cfg["peripherals"]["rom"]["address"]-occamy_cfg["peripherals"]["rom"]["length"]}, 64'd${occamy_cfg["peripherals"]["rom"]["address"]-0x1000}},
NrExecuteRegionRules: 5,
NrExecuteRegionRules: 4,
// DRAM, Boot ROM, SPM, Debug Module
ExecuteRegionAddrBase: {64'h10_0000_0000, 64'h8000_0000, 64'd${occamy_cfg["peripherals"]["rom"]["address"]}, 64'd${occamy_cfg["spm_narrow"]["address"]}, 64'h0 },
ExecuteRegionLength: {64'h2_0000_0000 , 64'h8000_0000, 64'd${occamy_cfg["peripherals"]["rom"]["length"]} , 64'd${occamy_cfg["spm_narrow"]["length"]} , 64'h1000},
ExecuteRegionAddrBase: {64'h8000_0000, 64'd${occamy_cfg["peripherals"]["rom"]["address"]}, 64'd${occamy_cfg["spm_narrow"]["address"]}, 64'h0 },
ExecuteRegionLength: {(64'hff_ffff_ffff-64'h8000_0000), 64'd${occamy_cfg["peripherals"]["rom"]["length"]} , 64'd${occamy_cfg["spm_narrow"]["length"]} , 64'h1000},
// cached region
NrCachedRegionRules: 2,
CachedRegionAddrBase: {64'h8000_0000, 64'd${occamy_cfg["spm_narrow"]["address"]}},
Expand Down Expand Up @@ -84,6 +86,7 @@ module ${name}_cva6 import ${name}_pkg::*; (
.clk_i,
.rst_ni,
.boot_addr_i (cva6_boot_addr),
.chip_id_i (chip_id_i),
.hart_id_i (64'h0),
.irq_i (irq),
.ipi_i (ipi),
Expand Down
4 changes: 3 additions & 1 deletion hw/occamy/occamy_soc.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ module ${name}_soc
input logic clk_i,
input logic rst_ni,
input logic test_mode_i,
input chip_id_t chip_id_i,
input logic [${occamy_cfg["addr_width"]-1}:0] boot_addr_i,
// Peripheral Ports (to AXI-lite Xbar)
output ${soc_narrow_xbar.out_periph.req_type()} periph_axi_lite_req_o,
Expand Down Expand Up @@ -108,6 +109,7 @@ module ${name}_soc
${name}_cva6 i_${name}_cva6 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.chip_id_i (chip_id_i),
.irq_i (eip_i),
.ipi_i (msip_i[0]),
.time_irq_i (mtip_i[0]),
Expand Down Expand Up @@ -142,7 +144,7 @@ module ${name}_soc
.rst_ni (rst_ni),
.test_mode_i (test_mode_i),
.boot_addr_i (boot_addr_i[31:0]),
.chip_id_i (8'b0), // Temporary solution as the Chip ID is not provided yet
.chip_id_i (chip_id_i),
.meip_i ('0),
.mtip_i (mtip_i[${lower_core + nr_cores_s1_quadrant - 1}:${lower_core}]),
.msip_i (msip_i[${lower_core + nr_cores_s1_quadrant - 1}:${lower_core}]),
Expand Down
10 changes: 7 additions & 3 deletions hw/occamy/occamy_top.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module ${name}_top
/// Real-time clock (for time keeping)
input logic rtc_i,
input logic test_mode_i,
input logic [1:0] chip_id_i,
input chip_id_t chip_id_i,
input logic [1:0] boot_mode_i,
// `uart` Interface
output logic uart_tx_o,
Expand Down Expand Up @@ -90,7 +90,8 @@ module ${name}_top
always_comb begin
soc_ctrl_in = '0;
soc_ctrl_in.boot_mode.d = boot_mode_i;
soc_ctrl_in.chip_id.d = chip_id_i;
soc_ctrl_in.chip_id.d = '0;
// The original chip_id is disabled, and replaced by chip_id of HeMAiA
end

// Machine timer and machine software interrupt pending.
Expand Down Expand Up @@ -123,6 +124,7 @@ module ${name}_top
.clk_i,
.rst_ni,
.test_mode_i,
.chip_id_i ( chip_id_i ),
.boot_addr_i ( boot_addr ),
.periph_axi_lite_req_o ( periph_axi_lite_soc2per_req ),
.periph_axi_lite_rsp_i ( periph_axi_lite_soc2per_rsp ),
Expand Down Expand Up @@ -333,10 +335,12 @@ module ${name}_top
.to_reg(context, "axi_lite_to_reg_soc_ctrl") %>
${name}_soc_ctrl #(
.reg_req_t ( ${regbus_soc_ctrl.req_type()} ),
.reg_rsp_t ( ${regbus_soc_ctrl.rsp_type()} )
.reg_rsp_t ( ${regbus_soc_ctrl.rsp_type()} ),
.chip_id_t ( chip_id_t )
) i_soc_ctrl (
.clk_i ( ${regbus_soc_ctrl.clk} ),
.rst_ni ( ${regbus_soc_ctrl.rst} ),
.chip_id_i ( chip_id_i ),
.reg_req_i ( ${regbus_soc_ctrl.req_name()} ),
.reg_rsp_o ( ${regbus_soc_ctrl.rsp_name()} ),
.reg2hw_o ( soc_ctrl_out ),
Expand Down
8 changes: 5 additions & 3 deletions hw/occamy/soc_ctrl/occamy_soc_ctrl.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@

module occamy_soc_ctrl import occamy_soc_reg_pkg::*; #(
parameter type reg_req_t = logic,
parameter type reg_rsp_t = logic
parameter type reg_rsp_t = logic,
parameter type chip_id_t = logic
) (
input clk_i,
input rst_ni,
input chip_id_t chip_id_i,

// Below Register interface can be changed
input reg_req_t reg_req_i,
Expand Down Expand Up @@ -74,8 +76,8 @@ module occamy_soc_ctrl import occamy_soc_reg_pkg::*; #(
assign boot_mode = hw2reg_i.boot_mode.d;

always_comb begin
boot_addr_init = (boot_mode == 2'b00)? ${default_boot_addr}:${backup_boot_addr};
boot_addr_d = (boot_mode == 2'b00)? ${default_boot_addr}:${backup_boot_addr};
boot_addr_init = (boot_mode == 2'b00) ? {chip_id_i,${addr_width-occamy_cfg["hemaia_multichip"]["chip_id_width"]}'h${default_boot_addr}}:{chip_id_i,${addr_width-occamy_cfg["hemaia_multichip"]["chip_id_width"]}'h${backup_boot_addr}};
boot_addr_d = (boot_mode == 2'b00) ? {chip_id_i,${addr_width-occamy_cfg["hemaia_multichip"]["chip_id_width"]}'h${default_boot_addr}}:{chip_id_i,${addr_width-occamy_cfg["hemaia_multichip"]["chip_id_width"]}'h${backup_boot_addr}};
boot_addr_o = boot_addr_q;
end

Expand Down
3 changes: 3 additions & 0 deletions hw/vendor/openhwgroup_cva6/core/ariane.sv
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ module ariane import ariane_pkg::*; #(
) (
input logic clk_i,
input logic rst_ni,
// Chip ID for the correct cache / execution region configuration
input chip_id_t chip_id_i,
// Core ID, Cluster ID and boot address are considered more or less static
input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address
input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR)
Expand Down Expand Up @@ -82,6 +84,7 @@ module ariane import ariane_pkg::*; #(
) i_cva6 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.chip_id_i ( chip_id_i ),
.boot_addr_i ( boot_addr_i ),
.hart_id_i ( hart_id_i ),
.sram_cfg_idata_i ( sram_cfg_idata_i ),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #(
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input chip_id_t chip_id_i, // Chip ID for the correct cache / execution region configuration
input logic bypass_i, // enable cache
output logic busy_o,
input logic stall_i, // stall new memory requests
Expand Down Expand Up @@ -247,7 +248,7 @@ module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #(
// -------------------------
// Check for cache-ability
// -------------------------
if (!is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}})) begin
if (!is_inside_cacheable_regions(ArianeCfg, chip_id_i, {{{64-riscv::PLEN}{1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}})) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
) (
input logic clk_i,
input logic rst_ni,
input chip_id_t chip_id_i,

// SRAM config
input sram_cfg_t sram_cfg_data_i,
Expand Down Expand Up @@ -112,7 +113,7 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;

// noncacheable if request goes to I/O space, or if cache is disabled
assign paddr_is_nc = (~cache_en_q) | (~ariane_pkg::is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}));
assign paddr_is_nc = (~cache_en_q) | (~ariane_pkg::is_inside_cacheable_regions(ArianeCfg, chip_id_i, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}));

// pass exception through
assign dreq_o.ex = areq_i.fetch_exception;
Expand Down Expand Up @@ -160,7 +161,7 @@ end else begin : gen_piton_offset
// main control logic
///////////////////////////////////////////////////////
logic addr_ni;
assign addr_ni = is_inside_nonidempotent_regions(ArianeCfg, areq_i.fetch_paddr);
assign addr_ni = is_inside_nonidempotent_regions(ArianeCfg, chip_id_i, areq_i.fetch_paddr);
always_comb begin : p_fsm
// default assignment
state_d = state_q;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
) (
input logic clk_i,
input logic rst_ni,
input chip_id_t chip_id_i,
input riscv::priv_lvl_t priv_lvl_i,

// SRAM config
Expand Down Expand Up @@ -114,6 +115,7 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
) i_cva6_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.chip_id_i ( chip_id_i ),
.sram_cfg_data_i ( sram_cfg_data_i ),
.sram_cfg_tag_i ( sram_cfg_tag_i ),
.flush_i ( flush_i ),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ module std_cache_subsystem import ariane_pkg::*; import std_cache_pkg::*; #(
) (
input logic clk_i,
input logic rst_ni,
input chip_id_t chip_id_i,
input riscv::priv_lvl_t priv_lvl_i,
output logic busy_o,
input logic stall_i, // stall new memory requests
Expand Down Expand Up @@ -94,6 +95,7 @@ module std_cache_subsystem import ariane_pkg::*; import std_cache_pkg::*; #(
) i_cva6_icache_axi_wrapper (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.chip_id_i ( chip_id_i ),
.priv_lvl_i ( priv_lvl_i ),
.sram_cfg_data_i ( sram_cfg_idata_i ),
.sram_cfg_tag_i ( sram_cfg_itag_i ),
Expand Down Expand Up @@ -127,6 +129,7 @@ module std_cache_subsystem import ariane_pkg::*; import std_cache_pkg::*; #(
) i_nbdcache (
.clk_i,
.rst_ni,
.chip_id_i,
.sram_cfg_data_i ( sram_cfg_ddata_i ),
.sram_cfg_tag_i ( sram_cfg_dtag_i ),
.sram_cfg_valid_dirty_i ( sram_cfg_dvalid_dirty_i ),
Expand Down
Loading

0 comments on commit 15d8d20

Please sign in to comment.