paicore_behavioral/router/parent_router.vhdl
2025-07-18 05:09:06 -05:00

263 lines
No EOL
11 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use work.router_types.all;
use work.router_components.all;
entity parent_router is
generic (
num_paths_up : positive := 32;
num_paths_down : positive := 16;
npu_bit_size : positive := 5;
npd_bit_size : positive := 4;
level : natural := 5;
buffer_width : positive := 64;
buffer_depth : positive := 4;
fifo_ptr_size : positive := 3;
chip_x : std_logic_vector(4 downto 0) := "00000";
chip_y : std_logic_vector(4 downto 0) := "00000"
);
port (
clk : in std_logic;
arstN : in std_logic;
core_x : in std_logic_vector(4 downto 0);
core_y : in std_logic_vector(4 downto 0);
data_in_ds : in t_DATA(num_paths_down*4-1 downto 0);
data_in_us : in t_DATA_EXT(num_paths_up*4-1 downto 0);
rcv_reqs : in std_logic_vector(num_paths_up*4+num_paths_down*4-1 downto 0);
send_ack : in std_logic_vector(num_paths_up*4+num_paths_down*4-1 downto 0);
rcv_acks : out std_logic_vector(num_paths_up*4+num_paths_down*4-1 downto 0);
send_reqs : out std_logic_vector(num_paths_up*4+num_paths_down*4-1 downto 0);
data_out_ds : out t_DATA(num_paths_down*4-1 downto 0);
data_out_us : out t_DATA_EXT(num_paths_up*4-1 downto 0)
);
end parent_router;
architecture impl of parent_router is
constant TOT_NUM_PATHS : positive := num_paths_up*4 + num_paths_down*4;
constant chip_pos : t_chip_addr := (x => chip_x, y=> chip_y);
signal core_pos : t_core_addr;
signal rcv_buff_out : t_FIFO_OUTS(TOT_NUM_PATHS-1 downto 0);
signal snd_buff_wr_in : t_FIFO_WR_INS(TOT_NUM_PATHS-1 downto 0);
signal snd_buff_out : t_FIFO_OUTS(TOT_NUM_PATHS-1 downto 0);
signal snd_buff_rd_in : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal rcv_accept_ack : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal rd_data, rd_data_nxt : t_DATA(TOT_NUM_PATHS-1 downto 0);
signal valid_data_nxt : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal valid_data : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal req_flag : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal rcv_data : t_DATA(TOT_NUM_PATHS-1 downto 0);
signal rd_reqs : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal continue_send : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal avai_paths : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal arb_complete : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal out_buff_rd_reqs : std_logic_vector(TOT_NUM_PATHS-1 downto 0);
signal packet_states : t_PACKET_STATES(TOT_NUM_PATHS-1 downto 0);
signal packet_states_nxt : t_PACKET_STATES(TOT_NUM_PATHS-1 downto 0);
signal outb_rd_states : t_OUT_BUFF_RD_STATES(TOT_NUM_PATHS-1 downto 0);
signal outb_rd_states_nxt : t_OUT_BUFF_RD_STATES(TOT_NUM_PATHS-1 downto 0);
begin
g_IN_BUFF_GEN: for i in 0 to TOT_NUM_PATHS-1 generate
input_fifo: fifo
generic map(WIDTH => buffer_width, DEPTH => buffer_depth,
F_PTR_SIZE => fifo_ptr_size)
port map(arstN => arstN, clk => clk, wr_req => req_flag(i),
rd_req => rd_reqs(i), data_in => rcv_data(i),
data_out => rcv_buff_out(i).data,
full => rcv_buff_out(i).full, empty => rcv_buff_out(i).empty);
end generate;
g_OUT_BUFF_GEN: for i in 0 to TOT_NUM_PATHS-1 generate
output_fifo: fifo
generic map(WIDTH => buffer_width, DEPTH => buffer_depth,
F_PTR_SIZE => fifo_ptr_size)
port map(arstN => arstN, clk => clk, wr_req => snd_buff_wr_in(i).wr_req,
rd_req => out_buff_rd_reqs(i), data_in => snd_buff_wr_in(i).data,
data_out => snd_buff_out(i).data,
full => snd_buff_out(i).full, empty => snd_buff_out(i).empty);
end generate;
g_SENDER_DS_GEN: for i in 0 to 4*num_paths_down-1 generate
output_sender_ds: sender
port map(clk => clk, arstN => arstN, continue_send => continue_send(i),
ack => send_ack(i), data_in => snd_buff_out(i).data,
req => send_reqs(i), rd_req => snd_buff_rd_in(i),
data => data_out_ds(i));
end generate;
g_SENDER_US_GEN: for i in 0 to 4*num_paths_up-1 generate
output_sender_us: sender
port map(clk => clk, arstN => arstN,
continue_send => continue_send(i+4*num_paths_down),
ack => send_ack(i+4*num_paths_down),
data_in => snd_buff_out(i+4*num_paths_down).data,
req => send_reqs(i+4*num_paths_down),
rd_req => snd_buff_rd_in(i+4*num_paths_down),
data => data_out_us(i));
end generate;
g_RECEIVER_DS_GEN: for i in 0 to 4*num_paths_down-1 generate
output_receiver: receiver
port map(clk => clk, req => rcv_reqs(i), arstN => arstN,
accept_ack => rcv_accept_ack(i), data => data_in_ds(i),
req_flag => req_flag(i), ack => rcv_acks(i),
data_out => rcv_data(i));
end generate;
g_RECEIVER_US_GEN: for i in 0 to 4*num_paths_up-1 generate
output_receiver: receiver
port map(clk => clk, req => rcv_reqs(i+4*num_paths_down), arstN => arstN,
accept_ack => rcv_accept_ack(i+4*num_paths_down),
data => data_in_us(i),
req_flag => req_flag(i+4*num_paths_down),
ack => rcv_acks(i+4*num_paths_down),
data_out => rcv_data(i+4*num_paths_down));
end generate;
arbiter0: parent_arbiter
generic map(
level => level,
num_paths_up=>num_paths_up,
num_paths_down=>num_paths_down,
lsb_size_up => npu_bit_size,
lsb_size_down => npd_bit_size
)
port map(
clk => clk,
arstN => arstN,
chip_pos => chip_pos,
core_pos => core_pos,
packets => rd_data,
valid_data => valid_data,
avai_paths => avai_paths,
arb_complete => arb_complete,
buff_wr_in => snd_buff_wr_in
);
g_rcv_accept_ack: for i in 0 to TOT_NUM_PATHS-1 generate
rcv_accept_ack(i) <= not rcv_buff_out(i).full;
end generate;
g_snd_buff_out: for i in 0 to TOT_NUM_PATHS-1 generate
avai_paths(i) <= not snd_buff_out(i).full;
end generate;
read_out_buff_sm_next_logic: process(outb_rd_states, snd_buff_out, snd_buff_rd_in)
begin
for i in 0 to TOT_NUM_PATHS-1 loop
case outb_rd_states(i) is
when EmptyFifo =>
if snd_buff_out(i).empty = '0' then
outb_rd_states_nxt(i) <= StartRead;
else
outb_rd_states_nxt(i) <= EmptyFifo;
end if;
when StartRead =>
if snd_buff_out(i).empty = '1' then
outb_rd_states_nxt(i) <= EmptyFifo;
else
outb_rd_states_nxt(i) <= WaitForSender;
end if;
when WaitForSender =>
if snd_buff_rd_in(i) = '1' then
if snd_buff_out(i).empty = '1' then
outb_rd_states_nxt(i) <= EmptyFifo;
else
outb_rd_states_nxt(i) <= StartRead;
end if;
else
outb_rd_states_nxt(i) <= WaitForSender;
end if;
when others =>
outb_rd_states_nxt(i) <= EmptyFifo;
end case;
end loop;
end process;
read_out_buff_sm_out_gen: process(outb_rd_states)
begin
for i in 0 to TOT_NUM_PATHS-1 loop
case outb_rd_states(i) is
when StartRead =>
continue_send(i) <= '1';
out_buff_rd_reqs(i) <= '1';
when others =>
continue_send(i) <= '0';
out_buff_rd_reqs(i) <= '0';
end case;
end loop;
end process;
packet_sm_next_logic: process(packet_states, rd_reqs, arb_complete)
begin
for i in 0 to TOT_NUM_PATHS-1 loop
case packet_states(i) is
when Idle =>
if rd_reqs(i) = '1' then
packet_states_nxt(i) <= Arbitration;
else
packet_states_nxt(i) <= Idle;
end if;
when Arbitration =>
if arb_complete(i) = '1' then
packet_states_nxt(i) <= Idle;
else
packet_states_nxt(i) <= InArbQueue;
end if;
when others =>
if arb_complete(i) = '1' then
packet_states_nxt(i) <= Idle;
else
packet_states_nxt(i) <= InArbQueue;
end if;
end case;
end loop;
end process;
packet_sm_out_gen: process(packet_states, rcv_buff_out, arb_complete, rd_data)
begin
for i in 0 to TOT_NUM_PATHS-1 loop
case packet_states(i) is
when Idle =>
if rcv_buff_out(i).empty = '0' then
rd_reqs(i) <= '1';
else
rd_reqs(i) <= '0';
end if;
rd_data_nxt(i) <= (others => '0');
valid_data_nxt(i) <= '0';
when Arbitration =>
rd_reqs(i) <= '0';
rd_data_nxt(i) <= rcv_buff_out(i).data; -- first time read from buffer
valid_data_nxt(i) <= '1';
when others =>
rd_reqs(i) <= '0';
if arb_complete(i) = '1' then
rd_data_nxt(i) <= (others => '0');
valid_data_nxt(i) <= '0';
else
rd_data_nxt(i) <= rd_data(i);
valid_data_nxt(i) <= '1';
end if;
end case;
end loop;
end process;
update_regs: process(arstN, clk)
begin
if arstN = '0' then
packet_states <= (others => Idle);
rd_data <= (others => (others => '0'));
elsif rising_edge(clk) then
packet_states <= packet_states_nxt;
rd_data <= rd_data_nxt;
valid_data <= valid_data_nxt;
outb_rd_states <= outb_rd_states_nxt;
end if;
end process;
core_pos.x <= core_x(DEST_ADDR_SIZE-1 downto 1);
core_pos.y <= core_y(DEST_ADDR_SIZE-1 downto 1);
end impl;