201 lines
9.4 KiB
VHDL
201 lines
9.4 KiB
VHDL
library ieee;
|
|
use ieee.std_logic_1164.all;
|
|
use ieee.numeric_std.all;
|
|
|
|
use work.router_types.all;
|
|
use work.quadtree_components.all;
|
|
|
|
entity quadtree is
|
|
generic (
|
|
num_paths_up : positive := 32;
|
|
num_paths_down : positive := 16;
|
|
npu_bit_size : positive := 6;
|
|
npd_bit_size : positive := 5;
|
|
buffer_width : positive := 64;
|
|
buffer_depth : positive := 4;
|
|
fifo_ptr_size : positive := 3;
|
|
level : natural := 5;
|
|
top_level : positive := 5;
|
|
chip_x : std_logic_vector(4 downto 0) := "00000";
|
|
chip_y : std_logic_vector(4 downto 0) := "00000"
|
|
);
|
|
port (
|
|
clks : in std_logic_vector(
|
|
calculate_num_routers_qt(level, top_level)-1 downto 0);
|
|
arstN : in std_logic;
|
|
|
|
core_x : in std_logic_vector(4 downto 0);
|
|
core_y : in std_logic_vector(4 downto 0);
|
|
|
|
data_in_us : in t_DATA(4*num_paths_up/2-1 downto 0);
|
|
rcv_reqs_us : in std_logic_vector(4*num_paths_up/2-1 downto 0);
|
|
send_ack_us : in std_logic_vector(4*num_paths_up/2-1 downto 0);
|
|
|
|
pe_data_in : in t_DATA(4**level-1 downto 0);
|
|
pe_rcv_reqs : in std_logic_vector(4**level-1 downto 0);
|
|
pe_send_ack : in std_logic_vector(4**level-1 downto 0);
|
|
|
|
data_out_us : out t_DATA(4*num_paths_up/2-1 downto 0);
|
|
rcv_acks_us : out std_logic_vector(4*num_paths_up/2-1 downto 0);
|
|
send_reqs_us : out std_logic_vector(4*num_paths_up/2-1 downto 0);
|
|
|
|
pe_rcv_acks : out std_logic_vector(4**level-1 downto 0);
|
|
pe_send_reqs : out std_logic_vector(4**level-1 downto 0);
|
|
pe_data_out : out t_DATA(4**level-1 downto 0)
|
|
);
|
|
end quadtree;
|
|
|
|
architecture impl of quadtree is
|
|
constant num_routers : natural := calculate_num_routers_qt(level, top_level);
|
|
signal r_core_x : std_logic_vector(19 downto 0);
|
|
signal r_core_y : std_logic_vector(19 downto 0);
|
|
begin
|
|
set_router_core: process(arstN, core_x, core_y)
|
|
variable v_core_x, v_core_y : std_logic_vector(19 downto 0);
|
|
begin
|
|
if arstN = '0' then
|
|
v_core_x := (others => '0'); -- don't care
|
|
v_core_y := (others => '0'); -- don't care
|
|
for i in 0 to 3 loop
|
|
v_core_x((i+1)*5-1 downto i*5) := core_x;
|
|
v_core_y((i+1)*5-1 downto i*5) := core_y;
|
|
if i = 0 then
|
|
v_core_x(5*i+level-1) := '1';
|
|
v_core_y(5*i+level-1) := '1';
|
|
elsif i = 1 then
|
|
v_core_x(5*i+level-1) := '1';
|
|
v_core_y(5*i+level-1) := '0';
|
|
elsif i = 2 then
|
|
v_core_x(5*i+level-1) := '0';
|
|
v_core_y(5*i+level-1) := '1';
|
|
else
|
|
v_core_x(5*i+level-1) := '0';
|
|
v_core_y(5*i+level-1) := '0';
|
|
end if;
|
|
end loop;
|
|
r_core_x <= v_core_x;
|
|
r_core_y <= v_core_y;
|
|
end if;
|
|
end process;
|
|
|
|
g_end_quadtree: if level = 1 generate
|
|
pe_data_out <= data_in_us;
|
|
pe_send_reqs <= rcv_reqs_us;
|
|
pe_rcv_acks <= send_ack_us;
|
|
|
|
data_out_us <= pe_data_in;
|
|
send_reqs_us <= pe_rcv_reqs;
|
|
rcv_acks_us <= pe_send_ack;
|
|
end generate;
|
|
|
|
g_quadtree: if level > 1 generate
|
|
constant npu : positive := num_paths_up/2;
|
|
constant npd : positive := num_paths_down/2;
|
|
constant pow_level : positive := 4**(level-1);
|
|
|
|
signal r_data_in : t_DATA(4*npu+16*npd-1 downto 0);
|
|
signal r_data_ds_in : t_DATA(16*npd-1 downto 0);
|
|
|
|
signal r_data_out : t_DATA(4*npu+16*npd-1 downto 0);
|
|
signal r_data_ds_out : t_DATA(16*npd-1 downto 0);
|
|
signal r_data_us_out : t_DATA(4*npu-1 downto 0);
|
|
|
|
signal r_rcv_reqs : std_logic_vector(4*npu+16*npd-1 downto 0);
|
|
signal r_rcv_reqs_ds : std_logic_vector(16*npd-1 downto 0);
|
|
|
|
signal r_snd_ack : std_logic_vector(4*npu+16*npd-1 downto 0);
|
|
signal r_snd_ack_ds : std_logic_vector(16*npd-1 downto 0);
|
|
|
|
signal r_snd_reqs : std_logic_vector(4*npu+16*npd-1 downto 0);
|
|
signal r_snd_reqs_ds : std_logic_vector(16*npd-1 downto 0);
|
|
signal r_snd_reqs_us : std_logic_vector(4*npu-1 downto 0);
|
|
|
|
signal r_rcv_ack : std_logic_vector(4*npu+16*npd-1 downto 0);
|
|
signal r_rcv_ack_ds : std_logic_vector(16*npd-1 downto 0);
|
|
signal r_rcv_ack_us : std_logic_vector(4*npu-1 downto 0);
|
|
begin
|
|
-- map inputs and outputs of the routers
|
|
map_routers_inputs_outputs: process(data_in_us, r_data_ds_in, rcv_reqs_us,
|
|
r_rcv_reqs_ds, send_ack_us, r_snd_ack_ds, r_rcv_ack,
|
|
r_snd_reqs, r_data_out)
|
|
variable rui, rmi, rli, usri, dsri : natural range 0 to (16*npd+4*npu);
|
|
begin
|
|
for i in 0 to 3 loop
|
|
rui := (4*npd+npu)*(i+1); -- router upper index
|
|
rmi := (4*npd)*(i+1)+npu*i; -- router middle index
|
|
rli := (4*npd+npu)*i; -- router middle index
|
|
usri := npu*(i+1); -- upstream router index
|
|
dsri := 4*npd*(i+1); -- downstream router index
|
|
|
|
-- inputs
|
|
r_data_in(rui-1 downto rmi) <= data_in_us(usri-1 downto npu*i);
|
|
r_data_in(rmi-1 downto rli) <= r_data_ds_in(dsri-1 downto 4*npd*i);
|
|
|
|
r_rcv_reqs(rui-1 downto rmi) <= rcv_reqs_us(usri-1 downto npu*i);
|
|
r_rcv_reqs(rmi-1 downto rli) <= r_rcv_reqs_ds(dsri-1 downto 4*npd*i);
|
|
|
|
r_snd_ack(rui-1 downto rmi) <= send_ack_us(usri-1 downto npu*i);
|
|
r_snd_ack(rmi-1 downto rli) <= r_snd_ack_ds(dsri-1 downto 4*npd*i);
|
|
|
|
-- outputs
|
|
r_rcv_ack_us(usri-1 downto npu*i) <= r_rcv_ack(rui-1 downto rmi);
|
|
r_rcv_ack_ds(dsri-1 downto 4*npd*i) <= r_rcv_ack(rmi-1 downto rli);
|
|
|
|
r_snd_reqs_us(usri-1 downto npu*i) <= r_snd_reqs(rui-1 downto rmi);
|
|
r_snd_reqs_ds(dsri-1 downto 4*npd*i) <= r_snd_reqs(rmi-1 downto rli);
|
|
|
|
r_data_us_out(usri-1 downto npu*i) <= r_data_out(rui-1 downto rmi);
|
|
r_data_ds_out(dsri-1 downto 4*npd*i) <= r_data_out(rmi-1 downto rli);
|
|
end loop;
|
|
end process;
|
|
|
|
g_elements: for i in 0 to 3 generate
|
|
router_inst: Router
|
|
generic map(num_paths_up => npu, num_paths_down => npd,
|
|
npu_bit_size => npu_bit_size-1, npd_bit_size => npd_bit_size-1,
|
|
level => level-1, buffer_width => buffer_width,
|
|
buffer_depth => buffer_depth, fifo_ptr_size => fifo_ptr_size,
|
|
chip_x => chip_x, chip_y => chip_y)
|
|
port map(clk => clks(num_routers-1-i), arstN => arstN,
|
|
core_x => r_core_x(5*(i+1)-1 downto 5*i),
|
|
core_y => r_core_y(5*(i+1)-1 downto 5*i),
|
|
data_in => r_data_in((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
|
|
rcv_reqs => r_rcv_reqs((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
|
|
send_ack => r_snd_ack((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
|
|
rcv_acks => r_rcv_ack((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
|
|
send_reqs => r_snd_reqs((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i),
|
|
data_out => r_data_out((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i));
|
|
|
|
router_subtree: entity work.quadtree
|
|
generic map(num_paths_up => npu, num_paths_down => npd,
|
|
npu_bit_size => npu_bit_size-1, npd_bit_size => npd_bit_size-1,
|
|
level => level-1, top_level => top_level,
|
|
buffer_width => buffer_width,
|
|
buffer_depth => buffer_depth, fifo_ptr_size => fifo_ptr_size,
|
|
chip_x => chip_x, chip_y => chip_y)
|
|
port map(
|
|
clks => clks((num_routers-4)*(i+1)/4-1 downto (num_routers-4)*i/4),
|
|
arstN => arstN,
|
|
core_x => r_core_x(5*(i+1)-1 downto 5*i),
|
|
core_y => r_core_y(5*(i+1)-1 downto 5*i),
|
|
data_in_us => r_data_ds_out(4*npd*(i+1)-1 downto 4*npd*i),
|
|
rcv_reqs_us => r_snd_reqs_ds(4*npd*(i+1)-1 downto 4*npd*i),
|
|
send_ack_us => r_rcv_ack_ds(4*npd*(i+1)-1 downto 4*npd*i),
|
|
pe_data_in => pe_data_in((pow_level)*(i+1)-1 downto (pow_level)*i),
|
|
pe_rcv_reqs => pe_rcv_reqs((pow_level)*(i+1)-1 downto (pow_level)*i),
|
|
pe_send_ack => pe_send_ack((pow_level)*(i+1)-1 downto (pow_level)*i),
|
|
data_out_us => r_data_ds_in(4*npd*(i+1)-1 downto 4*npd*i),
|
|
rcv_acks_us => r_snd_ack_ds(4*npd*(i+1)-1 downto 4*npd*i),
|
|
send_reqs_us => r_rcv_reqs_ds(4*npd*(i+1)-1 downto 4*npd*i),
|
|
pe_rcv_acks => pe_rcv_acks((pow_level)*(i+1)-1 downto (pow_level)*i),
|
|
pe_send_reqs => pe_send_reqs((pow_level)*(i+1)-1 downto (pow_level)*i),
|
|
pe_data_out => pe_data_out((pow_level)*(i+1)-1 downto (pow_level)*i)
|
|
);
|
|
end generate;
|
|
|
|
data_out_us <= r_data_us_out;
|
|
rcv_acks_us <= r_rcv_ack_us;
|
|
send_reqs_us <= r_snd_reqs_us;
|
|
end generate;
|
|
|
|
end impl;
|