------------------------------------------------------------------------------- -- Title : Switch allocator -- Project : Modular, heterogenous 3D NoC ------------------------------------------------------------------------------- -- File : switch_allocator.vhd -- Author : Lennart Bamberg -- Company : -- Created : 2018-11-15 -- Last update: 2018-11-28 -- Platform : -- Standard : VHDL'93/02 ------------------------------------------------------------------------------- -- Description: Switch allocator witch sets the input to output connections -- between assigned input VCs (with valid data) and the according -- outputs VCs (when credit is available). ------------------------------------------------------------------------------- --Desginer comments: With an encoded vc_transfer and a valid signal the -- input VC select signal could be determined in advance. -- Since this is part of the crit. path, it has a potential -- to enhance timing if we slightly mod. the design (-power). -- Second possiblity to improve speed would be to pre-calcu- -- late the next switch allocation. This is realized by -- simply swapping the regular RR-arbiters by delayed RR- -- arbiters (see DESIGNER-HINT l. 181). The drawback would be -- a one clock-cycle bigger initial latency for the first flit -- of a new package. ------------------------------------------------------------------------------- -- Copyright (c) 2018 ------------------------------------------------------------------------------- -- Revisions : -- Date Version Author Description -- 2018-11-15 1.0 bamberg Created ------------------------------------------------------------------------------- library ieee; use ieee.std_logic_1164.all; use ieee.std_logic_misc.all; use work.NOC_3D_PACKAGE.all; use work.TURNS_3D_NOC.all; entity switch_allocator is generic ( port_num : integer := 7; -- integer vector of range "0 to port_num-1" port_exist : integer_vec := (0, 1, 2, 3, 4, 5, 6); vc_num_vec : integer_vec := (4 ,4 ,4 ,4 ,4 ,4 ,4 ); vc_num_out_vec : integer_vec := (4 ,4 ,4 ,4 ,4 ,4 ,4 ); -- integer vector of range "0 to port_num-1, 0 to max_vc_num-1" vc_depth_array : vc_prop_int_array := ((4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ); vc_depth_out_array : vc_prop_int_array := ((4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ,(4 ,4 ,4 ,4 ) ); rout_algo : string := "DXYU" ); port( clk, rst : in std_logic; -- Inputs from VC allocator input_vc_in_use : in std_logic_vector(int_vec_sum(vc_num_vec)-1 downto 0); output_vc_in_use : in std_logic_vector(int_vec_sum(vc_num_out_vec)-1 downto 0); crossbar_ctrl_vec : in std_logic_vector(int_vec_sum(vc_num_out_vec)* bit_width(port_num-1)-1 downto 0); vc_sel_enc_vec : in vc_status_array_enc(int_vec_sum(vc_num_out_vec)-1 downto 0); -- Inputs from Input buffer valid_data_vc_vec : in std_logic_vector(int_vec_sum(vc_num_vec)-1 downto 0); -- -- Inputs from Link incr_rx_vec : in std_logic_vector(int_vec_sum(vc_num_out_vec)-1 downto 0); -- Output to crossbar crossbar_ctrl : out std_logic_vector(port_num*bit_width(port_num-1)-1 downto 0); -- output to Inbut Buffer (vc_transfer) and output-buffer (incr_tx) vc_transfer_vec : out std_logic_vector(int_vec_sum(vc_num_vec)-1 downto 0); vc_write_tx_vec : out std_logic_vector(int_vec_sum(vc_num_out_vec)-1 downto 0) ); end entity switch_allocator; architecture rtl of switch_allocator is constant poss_routes : turn_table_3D := routes_3D(rout_algo); constant sel_width : positive := bit_width(port_num-1); signal switch_rq : std_logic_vector(int_vec_sum(vc_num_vec)-1 downto 0); signal switch_rq_grant : vc_status_array(port_num-1 downto 0); signal vc_transfer_vec_int : std_logic_vector(int_vec_sum(vc_num_vec)-1 downto 0); type switch_ack_array is array(port_num-1 downto 0) of std_logic_vector(port_num-1 downto 0); signal switch_acks : switch_ack_array; signal switch_ack : std_logic_vector(port_num-1 downto 0); type crossbar_ctrl_array_type is array(int_vec_sum(vc_num_out_vec)-1 downto 0) of std_logic_vector(sel_width-1 downto 0); signal crossbar_ctrl_vc_out : crossbar_ctrl_array_type; type rq_array is array (port_num-1 downto 0) of vc_status_array(port_num-2 downto 0); signal poss_channel_rq : rq_array; signal channel_rq, channel_grant : std_logic_vector(int_vec_sum(vc_num_out_vec)-1 downto 0); signal credit_avail : std_logic_vector(int_vec_sum(vc_num_out_vec)-1 downto 0); begin -- architecture rtl ------------------------------------------------------------------------------- -- Transform the crossbar ctrls into an array for better readability ---------- ------------------------------------------------------------------------------- XBAR_CTRL : for i in 0 to int_vec_sum(vc_num_out_vec)-1 generate crossbar_ctrl_vc_out(i) <= crossbar_ctrl_vec((i+1)*sel_width-1 downto i*sel_width); end generate; ------------------------------------------------------------------------------- -- When an input VC is assigned and contains data: a switch request is made --- ------------------------------------------------------------------------------- switch_rq <= input_vc_in_use and valid_data_vc_vec; ------------------------------------------------------------------------------- -- For each input VC arbitrate grant one switch request in a RR manner -------- ------------------------------------------------------------------------------- INPUT_ARB_GEN : for i in 0 to port_num-1 generate constant ur_vc : natural := upper_range(vc_num_vec, i); constant lr_vc : natural := lower_range(vc_num_vec, i); begin PASS_NO_VC : if vc_num_vec(i) = 1 generate -- single vc --> no arb. required switch_rq_grant(i)(0) <= switch_rq(lr_vc); end generate; GEN_RR : if vc_num_vec(i) > 1 generate rr_arbiter : entity work.rr_arbiter_no_delay generic map ( CNT => vc_num_vec(i)) port map ( clk => clk, rst => rst, req => switch_rq(ur_vc downto lr_vc), ack => switch_ack(i), grant => switch_rq_grant(i)(vc_num_vec(i)-1 downto 0)); end generate; end generate; ------------------------------------------------------------------------------- -- Wiring of granted switch request to the according outputs in the order the -- crossbar select is defined (clock-wise / modulo). Thereby we exploits that -- some request are not possible, since a routing forbids it for dead and live- -- lock avoidance (don't care to reduce the circuit complexity after synthesis) ------------------------------------------------------------------------------- WIRING : process(switch_rq_grant) variable var_in : natural; begin poss_channel_rq <= (others => (others => (others => '-'))); for y in 0 to port_num-1 loop -- For the phy channel at output y, for x in 0 to port_num-2 loop -- the x^th possible input is if y+x < port_num-1 then var_in := y+x+1; -- clock wise else var_in := y+x-port_num+1; -- modulo end if; if poss_routes(port_exist(var_in))(port_exist(y)) then poss_channel_rq(y)(x) <= switch_rq_grant(var_in); end if; end loop; end loop; end process; ------------------------------------------------------------------------------- -- Choose (multiplex) the "channel_rq" out of the "poss_channel_rq" in depend. -- of VC allocation ("crossbar_ctrl_vec" & "vc_sel_enc_vec") ------------------ ------------------------------------------------------------------------------- MUX_CHANNEL_RQ_GEN : process(credit_avail, crossbar_ctrl_vc_out, output_vc_in_use, poss_channel_rq, vc_sel_enc_vec) variable assigned_input : std_logic_vector(sel_width-1 downto 0); variable assigned_vc : vc_status_vec_enc; variable lr_vc_out, ur_vc_out : natural range 0 to int_vec_sum(vc_num_out_vec)-1; begin for port_i in 0 to port_num-1 loop lr_vc_out := lower_range(vc_num_out_vec, port_i); ur_vc_out := upper_range(vc_num_out_vec, port_i); for vc_i in lr_vc_out to ur_vc_out loop assigned_input := crossbar_ctrl_vc_out(vc_i); assigned_vc := vc_sel_enc_vec(vc_i); channel_rq(vc_i) <= poss_channel_rq(port_i)(slv2int(assigned_input))(slv2int(assigned_vc)) and output_vc_in_use(vc_i) and credit_avail(vc_i); end loop; end loop; end process; ------------------------------------------------------------------------------- -- Generate the physical channel (output) arbiters ---------------------------- ------------------------------------------------------------------------------- OUTPUT_ARB_GEN : for i in 0 to port_num-1 generate constant ur_vc_out : natural := upper_range(vc_num_out_vec, i); constant lr_vc_out : natural := lower_range(vc_num_out_vec, i); begin PASS_NO_VC : if vc_num_out_vec(i) = 1 generate -- single vc --> no arb. required channel_grant(lr_vc_out) <= channel_rq(lr_vc_out); end generate; -- DESIGNER_HINT maybe use a delayed rr_arbitter (allows speed improvement) GEN_RR : if vc_num_out_vec(i) > 1 generate rr_arbiter : entity work.rr_arbiter_no_delay generic map ( CNT => vc_num_out_vec(i)) port map ( clk => clk, rst => rst, req => channel_rq(ur_vc_out downto lr_vc_out), ack => '1', grant => channel_grant(ur_vc_out downto lr_vc_out)); end generate; CREDIT_COUNT_GENERATE : for vc_i in lr_vc_out to ur_vc_out generate credit_count_i : entity work.credit_count_single generic map ( vc_depth_out => vc_depth_out_array(i)(vc_i-lr_vc_out)) port map ( clk => clk, rst => rst, incr_rx => incr_rx_vec(vc_i), vc_write_tx => channel_grant(vc_i), -- EQUAL: vc_write_tx_vec credit_avail => credit_avail(vc_i)); end generate; end generate; vc_write_tx_vec <= channel_grant; ----------------------------------------------------------------------------- -- Crossbar ctrl, input acknowledge out of winner---------------------------- ----------------------------------------------------------------------------- DECODE_INPUT_ACK : for i in 0 to port_num-1 generate constant ur : natural := upper_range(vc_num_out_vec, i); constant lr : natural := lower_range(vc_num_out_vec, i); begin process(channel_grant(ur downto lr), crossbar_ctrl_vc_out) variable winner : natural range 0 to int_vec_sum(vc_num_out_vec)-1; variable input_winner : natural range 0 to port_num-1; begin crossbar_ctrl((i+1)*sel_width-1 downto i*sel_width) <= (others => '-'); switch_acks(i) <= (others => '0'); if vc_num_out_vec(i) = 1 then winner := lr; else winner := lr + one_hot2int(channel_grant(ur downto lr)); end if; crossbar_ctrl((i+1)*sel_width-1 downto i*sel_width) <= crossbar_ctrl_vc_out(winner); if or_reduce(channel_grant(ur downto lr)) = '1' then input_winner := (slv2int(crossbar_ctrl_vc_out(winner))+ i + 1) mod port_num; switch_acks(i)(input_winner) <= '1'; end if; end process; end generate; INP_ACK : process(switch_acks) variable switch_ack_var : std_logic_vector(port_num-1 downto 0); begin switch_ack_var := (others => '0'); for i in 0 to port_num-1 loop switch_ack_var := switch_ack_var or switch_acks(i); end loop; switch_ack <= switch_ack_var; end process; process(switch_ack, switch_rq_grant) variable ur, lr : natural range 0 to int_vec_sum(vc_num_vec)-1; begin vc_transfer_vec_int <= (others => '0'); for i in 0 to port_num-1 loop if switch_ack(i) = '1' then ur := upper_range(vc_num_vec, i); lr := lower_range(vc_num_vec, i); vc_transfer_vec_int(ur downto lr) <= switch_rq_grant(i)(vc_num_vec(i)-1 downto 0); end if; end loop; end process; vc_transfer_vec <= vc_transfer_vec_int; end architecture;