Skip to content

Configurable RWDS sampling and clock-start delay. #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ sources:
- src/hyperbus_phy2r.sv
- src/hyperbus_ddr_out.sv
- src/hyperbus_delay.sv
- src/hyperbus_rwds_sampler.sv
- src/hyperbus_trx.sv
- src/hyperbus_cfg_regs.sv
- src/hyperbus_phy.sv
Expand All @@ -47,5 +48,6 @@ sources:
- test/fixture_hyperbus.sv
- test/hyperbus_tb.sv
- test/dut_if.sv
- test/hyperbus_tb_pkg.sv
- test/axi_hyper_tb.sv
- src/hyperbus.sv
8 changes: 6 additions & 2 deletions src/hyperbus_cfg_regs.sv
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ module hyperbus_cfg_regs #(
`include "common_cells/registers.svh"

// Internal Parameters
localparam int unsigned NumBaseRegs = 11;
localparam int unsigned NumBaseRegs = 13;
localparam int unsigned NumRegs = 2*NumChips + NumBaseRegs;
localparam int unsigned RegsBits = cf_math_pkg::idx_width(NumRegs);
localparam int unsigned RegStrbWidth = RegDataWidth/8; // TODO ASSERT: Must be power of two >= 16!!
localparam int unsigned RegStrbWidth = RegDataWidth/8;

// Data and index types
typedef logic [RegsBits-1:0] reg_idx_t;
Expand All @@ -58,6 +58,8 @@ module hyperbus_cfg_regs #(
if (sel_reg_mapped) begin
rfield = {
crange_q,
reg_data_t'(cfg_q.rwds_sample),
reg_data_t'(cfg_q.csn_to_ck_cycles),
reg_data_t'(cfg_q.t_csh_cycles),
reg_data_t'(cfg_q.which_phy),
reg_data_t'(cfg_q.phys_in_use),
Expand Down Expand Up @@ -98,6 +100,8 @@ module hyperbus_cfg_regs #(
'h8: cfg_d.phys_in_use = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.phys_in_use ) | (wmask & reg_req_i.wdata) );
'h9: cfg_d.which_phy = (NumPhys==1) ? 0 : ( (~wmask & cfg_q.which_phy ) | (wmask & reg_req_i.wdata) );
'ha: cfg_d.t_csh_cycles = (~wmask & cfg_q.t_csh_cycles ) | (wmask & reg_req_i.wdata);
'hb: cfg_d.csn_to_ck_cycles = (~wmask & cfg_q.csn_to_ck_cycles ) | (wmask & reg_req_i.wdata);
'hc: cfg_d.rwds_sample = (~wmask & cfg_q.rwds_sample ) | (wmask & reg_req_i.wdata);
default: begin
{sel_chip, chip_reg} = sel_reg - NumBaseRegs;
crange_d[sel_chip][chip_reg] = (~wmask & crange_q[sel_chip][chip_reg]) | (wmask & reg_req_i.wdata);
Expand Down
105 changes: 75 additions & 30 deletions src/hyperbus_phy.sv
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,24 @@ module hyperbus_phy import hyperbus_pkg::*; #(
.clk_i_90,
.rst_ni,
.test_mode_i,
.cs_i ( cs_q ),
.cs_ena_i ( trx_cs_ena ),
.rwds_sample_o ( trx_rwds_sample ),
.rwds_sample_ena_i ( trx_rwds_sample_ena ),
.tx_clk_delay_i ( cfg_i.t_tx_clk_delay ),
.tx_clk_ena_i ( trx_clk_ena ),
.tx_data_i ( trx_tx_data ),
.tx_data_oe_i ( trx_tx_data_oe ),
.tx_rwds_i ( trx_tx_rwds ),
.tx_rwds_oe_i ( trx_tx_rwds_oe ),
.rx_clk_delay_i ( cfg_i.t_rx_clk_delay ),
.rx_clk_set_i ( trx_rx_clk_set ),
.rx_clk_reset_i ( trx_rx_clk_reset ),
.rx_data_o ( trx_rx_data ),
.rx_valid_o ( trx_rx_valid ),
.rx_ready_i ( trx_rx_ready ),
.cfg_edge_idx_i ( cfg_i.rwds_sample.cylce_idx ),
.cfg_edge_pol_i ( cfg_i.rwds_sample.polarity ),
.cs_i ( cs_q ),
.cs_ena_i ( trx_cs_ena ),
.rwds_sample_o ( trx_rwds_sample ),
.rwds_sample_ena_i ( trx_rwds_sample_ena ),
.tx_clk_delay_i ( cfg_i.t_tx_clk_delay ),
.tx_clk_ena_i ( trx_clk_ena ),
.tx_data_i ( trx_tx_data ),
.tx_data_oe_i ( trx_tx_data_oe ),
.tx_rwds_i ( trx_tx_rwds ),
.tx_rwds_oe_i ( trx_tx_rwds_oe ),
.rx_clk_delay_i ( cfg_i.t_rx_clk_delay ),
.rx_clk_set_i ( trx_rx_clk_set ),
.rx_clk_reset_i ( trx_rx_clk_reset ),
.rx_data_o ( trx_rx_data ),
.rx_valid_o ( trx_rx_valid ),
.rx_ready_i ( trx_rx_ready ),
.hyper_cs_no,
.hyper_ck_o,
.hyper_ck_no,
Expand Down Expand Up @@ -265,15 +267,32 @@ module hyperbus_phy import hyperbus_pkg::*; #(
if (trans_valid_i & ~b_pending_q & r_outstand_q == '0) begin
tf_d = trans_i;
cs_d = trans_cs_i;
// Send 3 CA words (t_CSS respected through clock delay)
timer_d = 2;
state_d = SendCA;
// Enable output driver (needs to be enabled one cycle
// earlier since tri-state enables of IO pads are quite
// slow compared to the data pins)

if(cfg_i.csn_to_ck_cycles != 0) begin
// assert CS but delay hyper_ck to allow more time
// for memory to drive RWDS (to satisfy t_DSV)
state_d = DelayCK;
timer_d = cfg_i.csn_to_ck_cycles -1;
end else begin
// max throughput when memory RWDS signal arrives early
state_d = SendCA;
// Send 3 CA words (t_CSS respected through clock delay)
timer_d = 2;
end

// Enable output driver (needs to be enabled at least
// one cycle earlier since tri-state enables of IO pads
// are quite slow compared to the data pins)
trx_tx_data_oe = 1'b1;
end
end
DelayCK: begin
trx_clk_ena = 1'b0;
if (ctl_timer_zero) begin
timer_d = 2; // Send 3 CA words
state_d = SendCA;
end
end
SendCA: begin
// Dataflow handled outside FSM
trx_clk_ena = 1'b1;
Expand All @@ -284,26 +303,52 @@ module hyperbus_phy import hyperbus_pkg::*; #(
timer_d = cfg_i.t_burst_max;
state_d = Write;
end else begin
timer_d = TimerWidth'(cfg_i.t_latency_access) << ctl_add_latency;
timer_d = TimerWidth'(cfg_i.t_latency_access);
state_d = WaitLatAccess;
end
end
end
WaitLatAccess: begin
trx_clk_ena = 1'b1;
trx_tx_data_oe = 1'b1;
// Substract cycle for last CA and another for state delay
// ctl_add_latency may arrive at any time (adjustable RWDS sampling)
// If no additional latency required:
if (~ctl_add_latency) begin
// Substract cycle for last CA and another for state delay
if(ctl_timer_two) begin
timer_d = cfg_i.t_burst_max;
// Switch to write or read phase and already start
// turnaround of tri-state driver (depending on latency
// config and if read or write transaction).
if (tf_q.write) begin
state_d = Write;
trx_tx_data_oe = 1'b1;
// For zero latency writes, we must not drive the RWDS
// signal (see specs page 9). Depending on the latency
// mode we thus drive only the DQ signals or DQ + RWDS.
trx_tx_rwds_oe = ~ctl_write_zero_lat;
end else begin
state_d = Read;
trx_tx_data_oe = 1'b0;
trx_tx_rwds_oe = 1'b0;
end
end
end else if (ctl_timer_one) begin
// instead of going to 0, add another latency count
state_d = WaitAddLatAccess;
timer_d = TimerWidth'(cfg_i.t_latency_access);
end
end
WaitAddLatAccess: begin
// Same as WaitLatAccess but without possibility
// of adding another latency count
trx_clk_ena = 1'b1;
trx_tx_data_oe = 1'b1;
if (ctl_timer_two) begin
timer_d = cfg_i.t_burst_max;
// Switch to write or read phase and already start
// turnaround of tri-state driver (depending on latency
// config and if read or write transaction).
if (tf_q.write) begin
state_d = Write;
trx_tx_data_oe = 1'b1;
// For zero latency writes, we must not drive the RWDS
// signal (see specs page 9). Depending on the latency
// mode we thus drive only the DQ signals or DQ + RWDS.
trx_tx_rwds_oe = ~ctl_write_zero_lat;
end else begin
state_d = Read;
Expand Down
38 changes: 26 additions & 12 deletions src/hyperbus_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,27 @@ package hyperbus_pkg;
localparam unsigned HyperBurstWidth = 8 + $clog2(1024/16) + 1;
typedef logic [HyperBurstWidth-1:0] hyper_blen_t;


typedef struct packed {
logic [3:0] cylce_idx; // number of cycles passed when sampling should occur
logic polarity; // 1: rising, 0: falling
} hyper_cfg_rwds_t;

// configuration type
typedef struct packed {
logic [3:0] t_latency_access;
logic en_latency_additional;
logic [15:0] t_burst_max;
logic [3:0] t_read_write_recovery;
logic [3:0] t_rx_clk_delay;
logic [3:0] t_tx_clk_delay;
logic [4:0] address_mask_msb;
logic address_space;
logic phys_in_use;
logic which_phy;
logic [3:0] t_csh_cycles; //add an configurable Tcsh for high freq operation(200MHz Hyperram)
logic [3:0] t_latency_access;
logic en_latency_additional;
logic [15:0] t_burst_max;
logic [3:0] t_read_write_recovery;
logic [3:0] t_rx_clk_delay;
logic [3:0] t_tx_clk_delay;
logic [4:0] address_mask_msb;
logic address_space;
logic phys_in_use;
logic which_phy;
logic [3:0] t_csh_cycles; // add an configurable Tcsh for high freq operation(200MHz Hyperram)
logic [3:0] csn_to_ck_cycles; // delay hyper_ck after CS is asserted (more time for t_DSV)
hyper_cfg_rwds_t rwds_sample;
} hyper_cfg_t;

typedef struct packed {
Expand All @@ -40,8 +48,10 @@ package hyperbus_pkg;
typedef enum logic[3:0] {
Startup,
Idle,
DelayCK,
SendCA,
WaitLatAccess,
WaitAddLatAccess,
Read,
Write,
WaitXfer,
Expand Down Expand Up @@ -74,7 +84,11 @@ package hyperbus_pkg;
address_space: 'b0,
phys_in_use: NumPhys-1,
which_phy: NumPhys-1,
t_csh_cycles: 'h1
t_csh_cycles: 'h1,
csn_to_ck_cycles: 'h1, // additional cycles from CS_N going low to start of hyper_ck
rwds_sample: hyper_cfg_rwds_t'{ // hyper_ck edge for RWDS sampling relative to CS_N going low
cylce_idx: 'h2, // cycle number after CS_N going low (first falling and rising edge is idx=0)
polarity: 'b0 } // 0: falling, 1:rising -> first edge after CS_N is a falling edge
};

return cfg;
Expand Down
115 changes: 115 additions & 0 deletions src/hyperbus_rwds_sampler.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// Philippe Sauter <[email protected]>

// This modules guarantees proper worst-case sampling of RWDS.
// RWDS may only be valid (and stable) for a single period around
// the 3rd hyper_ck_o rising edge (see t_DSV, t_CSS, t_CKDS @ 166MHz).
// Since there may be arbitrary pad and PCBs delays, the sampling edge
// is fully configurable (edge number and polarity).
// A gated clock that is only active around one edge is created
// and then the sample is taken only at the selected edge.
// The final sample is saved into a register in the phy clock domain.
//
// It is not possible to sample on the very first rising clock edge
// with this mechanism.
// Therefore cfg_edge_idx_i = 0 selects the first failling edge
// or the subsequent rising edge, depending on cfg_edge_pol_i.
// With this naming scheme, the default edge should be idx=1, pol=1.
//
// Constraints:
// cfg* signals are pseudostatic (set_false_path -setup or set_multicycle_path)

`include "common_cells/registers.svh"

module hyperbus_rwds_sampler import hyperbus_pkg::*; #()
(
// Global signals
input logic clk_i, // phy clock
input logic rst_ni,
input logic test_mode_i,

input logic [3:0] cfg_edge_idx_i, // #edge where rwds is sampled
input logic cfg_edge_pol_i, // 1: rising, 0: falling

// sampled value going to PHY-FSM
output logic rwds_sample_o,

// ungated hyperbus clock
input logic tx_clk_90_i,

// physical HyperBus signals
input logic hyper_cs_ni,
input logic hyper_rwds_i
);

// used to time the sampling of RWDS to determine additional latency
logic tx_clk_270; // inverted 90deg clock
logic [4:0] cnt_edge_d, cnt_edge_q; // one bit larger than config
logic cnt_clk; // clock used for edge counting
logic [4:0] target_value;
logic sampling_clk, sampling_clk_gated; // clock used for sampling
logic enable_sampling; // sampling clock gate enable
logic rwds_sample;

// needed so the first falling edge is cfg = '0 and it increments from there
// without this there would be an illegal config reg combination since the
// first edge we can sample is the first falling (1/2 cycle after CS going low)
// the rising edge considing with CS going low is illegal
assign target_value = cfg_edge_pol_i ? cfg_edge_idx_i +1 : cfg_edge_idx_i;

// generate and select clocks
// Sampling is either clocked by un-inverted or inverted 90deg hyperbus clock
// Counter is clocked by the inverse as it controls the clock gate
// which should be on for one cycle with sampling edge in the middle
tc_clk_inverter i_tx_clk_inv (
.clk_i ( tx_clk_90_i ),
.clk_o ( tx_clk_270 )
);

tc_clk_mux2 i_sampling_clk_mux (
.clk0_i ( tx_clk_270 ),
.clk1_i ( tx_clk_90_i ),
.clk_sel_i ( cfg_edge_pol_i ),
.clk_o ( sampling_clk )
);

tc_clk_inverter i_edge_cnt_clk_inv (
.clk_i ( sampling_clk ),
.clk_o ( cnt_clk )
);

always_comb begin : gen_edge_cnt
// only count during transfers
if(~hyper_cs_ni) begin
cnt_edge_d = cnt_edge_q +1;
if(cnt_edge_q == '1) begin
cnt_edge_d = cnt_edge_q; // saturating counter
end
end else begin
// reset counter for next transfer
cnt_edge_d = 1'b0;
end
end

`FF(cnt_edge_q, cnt_edge_d, '0, cnt_clk);

assign enable_sampling = (cnt_edge_q == target_value) & ~hyper_cs_ni;

// gate the sampling of rwds to the selected clock edge
tc_clk_gating i_rwds_sample_rise_gate (
.clk_i ( sampling_clk ),
.en_i ( enable_sampling ),
.test_en_i ( test_mode_i ),
.clk_o ( sampling_clk_gated )
);

// sample rwds exactly once using gated clock
`FF(rwds_sample, hyper_rwds_i, '0, sampling_clk_gated);

// pass rwds to phy-clock domain
`FF(rwds_sample_o, rwds_sample, '0, clk_i);

endmodule
Loading