paicore_behavioral/noc/quadtree.vhdl
2025-07-18 05:09:06 -05:00

201 lines
9.4 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.router_types.all;
use work.quadtree_components.all;
entity quadtree is
generic (
num_paths_up : positive := 32;
num_paths_down : positive := 16;
npu_bit_size : positive := 6;
npd_bit_size : positive := 5;
buffer_width : positive := 64;
buffer_depth : positive := 4;
fifo_ptr_size : positive := 3;
level : natural := 5;
top_level : positive := 5;
chip_x : std_logic_vector(4 downto 0) := "00000";
chip_y : std_logic_vector(4 downto 0) := "00000"
);
port (
clks : in std_logic_vector(
calculate_num_routers_qt(level, top_level)-1 downto 0);
arstN : in std_logic;
core_x : in std_logic_vector(4 downto 0);
core_y : in std_logic_vector(4 downto 0);
data_in_us : in t_DATA(4*num_paths_up/2-1 downto 0);
rcv_reqs_us : in std_logic_vector(4*num_paths_up/2-1 downto 0);
send_ack_us : in std_logic_vector(4*num_paths_up/2-1 downto 0);
pe_data_in : in t_DATA(4**level-1 downto 0);
pe_rcv_reqs : in std_logic_vector(4**level-1 downto 0);
pe_send_ack : in std_logic_vector(4**level-1 downto 0);
data_out_us : out t_DATA(4*num_paths_up/2-1 downto 0);
rcv_acks_us : out std_logic_vector(4*num_paths_up/2-1 downto 0);
send_reqs_us : out std_logic_vector(4*num_paths_up/2-1 downto 0);
pe_rcv_acks : out std_logic_vector(4**level-1 downto 0);
pe_send_reqs : out std_logic_vector(4**level-1 downto 0);
pe_data_out : out t_DATA(4**level-1 downto 0)
);
end quadtree;
architecture impl of quadtree is
constant num_routers : natural := calculate_num_routers_qt(level, top_level);
signal r_core_x : std_logic_vector(19 downto 0);
signal r_core_y : std_logic_vector(19 downto 0);
begin
set_router_core: process(arstN, core_x, core_y)
variable v_core_x, v_core_y : std_logic_vector(19 downto 0);
begin
if arstN = '0' then
v_core_x := (others => '0'); -- don't care
v_core_y := (others => '0'); -- don't care
for i in 0 to 3 loop
v_core_x((i+1)*5-1 downto i*5) := core_x;
v_core_y((i+1)*5-1 downto i*5) := core_y;
if i = 0 then
v_core_x(5*i+level-1) := '1';
v_core_y(5*i+level-1) := '1';
elsif i = 1 then
v_core_x(5*i+level-1) := '1';
v_core_y(5*i+level-1) := '0';
elsif i = 2 then
v_core_x(5*i+level-1) := '0';
v_core_y(5*i+level-1) := '1';
else
v_core_x(5*i+level-1) := '0';
v_core_y(5*i+level-1) := '0';
end if;
end loop;
r_core_x <= v_core_x;
r_core_y <= v_core_y;
end if;
end process;
g_end_quadtree: if level = 1 generate
pe_data_out <= data_in_us;
pe_send_reqs <= rcv_reqs_us;
pe_rcv_acks <= send_ack_us;
data_out_us <= pe_data_in;
send_reqs_us <= pe_rcv_reqs;
rcv_acks_us <= pe_send_ack;
end generate;
g_quadtree: if level > 1 generate
constant npu : positive := num_paths_up/2;
constant npd : positive := num_paths_down/2;
constant pow_level : positive := 4**(level-1);
signal r_data_in : t_DATA(4*npu+16*npd-1 downto 0);
signal r_data_ds_in : t_DATA(16*npd-1 downto 0);
signal r_data_out : t_DATA(4*npu+16*npd-1 downto 0);
signal r_data_ds_out : t_DATA(16*npd-1 downto 0);
signal r_data_us_out : t_DATA(4*npu-1 downto 0);
signal r_rcv_reqs : std_logic_vector(4*npu+16*npd-1 downto 0);
signal r_rcv_reqs_ds : std_logic_vector(16*npd-1 downto 0);
signal r_snd_ack : std_logic_vector(4*npu+16*npd-1 downto 0);
signal r_snd_ack_ds : std_logic_vector(16*npd-1 downto 0);
signal r_snd_reqs : std_logic_vector(4*npu+16*npd-1 downto 0);
signal r_snd_reqs_ds : std_logic_vector(16*npd-1 downto 0);
signal r_snd_reqs_us : std_logic_vector(4*npu-1 downto 0);
signal r_rcv_ack : std_logic_vector(4*npu+16*npd-1 downto 0);
signal r_rcv_ack_ds : std_logic_vector(16*npd-1 downto 0);
signal r_rcv_ack_us : std_logic_vector(4*npu-1 downto 0);
begin
-- map inputs and outputs of the routers
map_routers_inputs_outputs: process(data_in_us, r_data_ds_in, rcv_reqs_us,
r_rcv_reqs_ds, send_ack_us, r_snd_ack_ds, r_rcv_ack,
r_snd_reqs, r_data_out)
variable rui, rmi, rli, usri, dsri : natural range 0 to (16*npd+4*npu);
begin
for i in 0 to 3 loop
rui := (4*npd+npu)*(i+1); -- router upper index
rmi := (4*npd)*(i+1)+npu*i; -- router middle index
rli := (4*npd+npu)*i; -- router middle index
usri := npu*(i+1); -- upstream router index
dsri := 4*npd*(i+1); -- downstream router index
-- inputs
r_data_in(rui-1 downto rmi) <= data_in_us(usri-1 downto npu*i);
r_data_in(rmi-1 downto rli) <= r_data_ds_in(dsri-1 downto 4*npd*i);
r_rcv_reqs(rui-1 downto rmi) <= rcv_reqs_us(usri-1 downto npu*i);
r_rcv_reqs(rmi-1 downto rli) <= r_rcv_reqs_ds(dsri-1 downto 4*npd*i);
r_snd_ack(rui-1 downto rmi) <= send_ack_us(usri-1 downto npu*i);
r_snd_ack(rmi-1 downto rli) <= r_snd_ack_ds(dsri-1 downto 4*npd*i);
-- outputs
r_rcv_ack_us(usri-1 downto npu*i) <= r_rcv_ack(rui-1 downto rmi);
r_rcv_ack_ds(dsri-1 downto 4*npd*i) <= r_rcv_ack(rmi-1 downto rli);
r_snd_reqs_us(usri-1 downto npu*i) <= r_snd_reqs(rui-1 downto rmi);
r_snd_reqs_ds(dsri-1 downto 4*npd*i) <= r_snd_reqs(rmi-1 downto rli);
r_data_us_out(usri-1 downto npu*i) <= r_data_out(rui-1 downto rmi);
r_data_ds_out(dsri-1 downto 4*npd*i) <= r_data_out(rmi-1 downto rli);
end loop;
end process;
g_elements: for i in 0 to 3 generate
router_inst: Router
generic map(num_paths_up => npu, num_paths_down => npd,
npu_bit_size => npu_bit_size-1, npd_bit_size => npd_bit_size-1,
level => level-1, buffer_width => buffer_width,
buffer_depth => buffer_depth, fifo_ptr_size => fifo_ptr_size,
chip_x => chip_x, chip_y => chip_y)
port map(clk => clks(num_routers-1-i), arstN => arstN,
core_x => r_core_x(5*(i+1)-1 downto 5*i),
core_y => r_core_y(5*(i+1)-1 downto 5*i),
data_in => r_data_in((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
rcv_reqs => r_rcv_reqs((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
send_ack => r_snd_ack((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
rcv_acks => r_rcv_ack((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
send_reqs => r_snd_reqs((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
data_out => r_data_out((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i));
router_subtree: entity work.quadtree
generic map(num_paths_up => npu, num_paths_down => npd,
npu_bit_size => npu_bit_size-1, npd_bit_size => npd_bit_size-1,
level => level-1, top_level => top_level,
buffer_width => buffer_width,
buffer_depth => buffer_depth, fifo_ptr_size => fifo_ptr_size,
chip_x => chip_x, chip_y => chip_y)
port map(
clks => clks((num_routers-4)*(i+1)/4-1 downto (num_routers-4)*i/4),
arstN => arstN,
core_x => r_core_x(5*(i+1)-1 downto 5*i),
core_y => r_core_y(5*(i+1)-1 downto 5*i),
data_in_us => r_data_ds_out(4*npd*(i+1)-1 downto 4*npd*i),
rcv_reqs_us => r_snd_reqs_ds(4*npd*(i+1)-1 downto 4*npd*i),
send_ack_us => r_rcv_ack_ds(4*npd*(i+1)-1 downto 4*npd*i),
pe_data_in => pe_data_in((pow_level)*(i+1)-1 downto (pow_level)*i),
pe_rcv_reqs => pe_rcv_reqs((pow_level)*(i+1)-1 downto (pow_level)*i),
pe_send_ack => pe_send_ack((pow_level)*(i+1)-1 downto (pow_level)*i),
data_out_us => r_data_ds_in(4*npd*(i+1)-1 downto 4*npd*i),
rcv_acks_us => r_snd_ack_ds(4*npd*(i+1)-1 downto 4*npd*i),
send_reqs_us => r_rcv_reqs_ds(4*npd*(i+1)-1 downto 4*npd*i),
pe_rcv_acks => pe_rcv_acks((pow_level)*(i+1)-1 downto (pow_level)*i),
pe_send_reqs => pe_send_reqs((pow_level)*(i+1)-1 downto (pow_level)*i),
pe_data_out => pe_data_out((pow_level)*(i+1)-1 downto (pow_level)*i)
);
end generate;
data_out_us <= r_data_us_out;
rcv_acks_us <= r_rcv_ack_us;
send_reqs_us <= r_snd_reqs_us;
end generate;
end impl;