library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.router_types.all; use work.quadtree_components.all; entity quadtree is generic ( num_paths_up : positive := 32; num_paths_down : positive := 16; npu_bit_size : positive := 6; npd_bit_size : positive := 5; buffer_width : positive := 64; buffer_depth : positive := 4; fifo_ptr_size : positive := 3; level : natural := 5; top_level : positive := 5; chip_x : std_logic_vector(4 downto 0) := "00000"; chip_y : std_logic_vector(4 downto 0) := "00000" ); port ( clks : in std_logic_vector( calculate_num_routers_qt(level, top_level)-1 downto 0); arstN : in std_logic; core_x : in std_logic_vector(4 downto 0); core_y : in std_logic_vector(4 downto 0); data_in_us : in t_DATA(4*num_paths_up/2-1 downto 0); rcv_reqs_us : in std_logic_vector(4*num_paths_up/2-1 downto 0); send_ack_us : in std_logic_vector(4*num_paths_up/2-1 downto 0); pe_data_in : in t_DATA(4**level-1 downto 0); pe_rcv_reqs : in std_logic_vector(4**level-1 downto 0); pe_send_ack : in std_logic_vector(4**level-1 downto 0); data_out_us : out t_DATA(4*num_paths_up/2-1 downto 0); rcv_acks_us : out std_logic_vector(4*num_paths_up/2-1 downto 0); send_reqs_us : out std_logic_vector(4*num_paths_up/2-1 downto 0); pe_rcv_acks : out std_logic_vector(4**level-1 downto 0); pe_send_reqs : out std_logic_vector(4**level-1 downto 0); pe_data_out : out t_DATA(4**level-1 downto 0) ); end quadtree; architecture impl of quadtree is constant num_routers : natural := calculate_num_routers_qt(level, top_level); signal r_core_x : std_logic_vector(19 downto 0); signal r_core_y : std_logic_vector(19 downto 0); begin set_router_core: process(arstN, core_x, core_y) variable v_core_x, v_core_y : std_logic_vector(19 downto 0); begin if arstN = '0' then v_core_x := (others => '0'); -- don't care v_core_y := (others => '0'); -- don't care for i in 0 to 3 loop v_core_x((i+1)*5-1 downto i*5) := core_x; v_core_y((i+1)*5-1 downto i*5) := core_y; if i = 0 then v_core_x(5*i+level-1) := '1'; v_core_y(5*i+level-1) := '1'; elsif i = 1 then v_core_x(5*i+level-1) := '1'; v_core_y(5*i+level-1) := '0'; elsif i = 2 then v_core_x(5*i+level-1) := '0'; v_core_y(5*i+level-1) := '1'; else v_core_x(5*i+level-1) := '0'; v_core_y(5*i+level-1) := '0'; end if; end loop; r_core_x <= v_core_x; r_core_y <= v_core_y; end if; end process; g_end_quadtree: if level = 1 generate pe_data_out <= data_in_us; pe_send_reqs <= rcv_reqs_us; pe_rcv_acks <= send_ack_us; data_out_us <= pe_data_in; send_reqs_us <= pe_rcv_reqs; rcv_acks_us <= pe_send_ack; end generate; g_quadtree: if level > 1 generate constant npu : positive := num_paths_up/2; constant npd : positive := num_paths_down/2; constant pow_level : positive := 4**(level-1); signal r_data_in : t_DATA(4*npu+16*npd-1 downto 0); signal r_data_ds_in : t_DATA(16*npd-1 downto 0); signal r_data_out : t_DATA(4*npu+16*npd-1 downto 0); signal r_data_ds_out : t_DATA(16*npd-1 downto 0); signal r_data_us_out : t_DATA(4*npu-1 downto 0); signal r_rcv_reqs : std_logic_vector(4*npu+16*npd-1 downto 0); signal r_rcv_reqs_ds : std_logic_vector(16*npd-1 downto 0); signal r_snd_ack : std_logic_vector(4*npu+16*npd-1 downto 0); signal r_snd_ack_ds : std_logic_vector(16*npd-1 downto 0); signal r_snd_reqs : std_logic_vector(4*npu+16*npd-1 downto 0); signal r_snd_reqs_ds : std_logic_vector(16*npd-1 downto 0); signal r_snd_reqs_us : std_logic_vector(4*npu-1 downto 0); signal r_rcv_ack : std_logic_vector(4*npu+16*npd-1 downto 0); signal r_rcv_ack_ds : std_logic_vector(16*npd-1 downto 0); signal r_rcv_ack_us : std_logic_vector(4*npu-1 downto 0); begin -- map inputs and outputs of the routers map_routers_inputs_outputs: process(data_in_us, r_data_ds_in, rcv_reqs_us, r_rcv_reqs_ds, send_ack_us, r_snd_ack_ds, r_rcv_ack, r_snd_reqs, r_data_out) variable rui, rmi, rli, usri, dsri : natural range 0 to (16*npd+4*npu); begin for i in 0 to 3 loop rui := (4*npd+npu)*(i+1); -- router upper index rmi := (4*npd)*(i+1)+npu*i; -- router middle index rli := (4*npd+npu)*i; -- router middle index usri := npu*(i+1); -- upstream router index dsri := 4*npd*(i+1); -- downstream router index -- inputs r_data_in(rui-1 downto rmi) <= data_in_us(usri-1 downto npu*i); r_data_in(rmi-1 downto rli) <= r_data_ds_in(dsri-1 downto 4*npd*i); r_rcv_reqs(rui-1 downto rmi) <= rcv_reqs_us(usri-1 downto npu*i); r_rcv_reqs(rmi-1 downto rli) <= r_rcv_reqs_ds(dsri-1 downto 4*npd*i); r_snd_ack(rui-1 downto rmi) <= send_ack_us(usri-1 downto npu*i); r_snd_ack(rmi-1 downto rli) <= r_snd_ack_ds(dsri-1 downto 4*npd*i); -- outputs r_rcv_ack_us(usri-1 downto npu*i) <= r_rcv_ack(rui-1 downto rmi); r_rcv_ack_ds(dsri-1 downto 4*npd*i) <= r_rcv_ack(rmi-1 downto rli); r_snd_reqs_us(usri-1 downto npu*i) <= r_snd_reqs(rui-1 downto rmi); r_snd_reqs_ds(dsri-1 downto 4*npd*i) <= r_snd_reqs(rmi-1 downto rli); r_data_us_out(usri-1 downto npu*i) <= r_data_out(rui-1 downto rmi); r_data_ds_out(dsri-1 downto 4*npd*i) <= r_data_out(rmi-1 downto rli); end loop; end process; g_elements: for i in 0 to 3 generate router_inst: Router generic map(num_paths_up => npu, num_paths_down => npd, npu_bit_size => npu_bit_size-1, npd_bit_size => npd_bit_size-1, level => level-1, buffer_width => buffer_width, buffer_depth => buffer_depth, fifo_ptr_size => fifo_ptr_size, chip_x => chip_x, chip_y => chip_y) port map(clk => clks(num_routers-1-i), arstN => arstN, core_x => r_core_x(5*(i+1)-1 downto 5*i), core_y => r_core_y(5*(i+1)-1 downto 5*i), data_in => r_data_in((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i), rcv_reqs => r_rcv_reqs((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i), send_ack => r_snd_ack((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i), rcv_acks => r_rcv_ack((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i), send_reqs => r_snd_reqs((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i), data_out => r_data_out((4*npd+npu)*(i+1)-1 downto (4*npd+npu)*i)); router_subtree: entity work.quadtree generic map(num_paths_up => npu, num_paths_down => npd, npu_bit_size => npu_bit_size-1, npd_bit_size => npd_bit_size-1, level => level-1, top_level => top_level, buffer_width => buffer_width, buffer_depth => buffer_depth, fifo_ptr_size => fifo_ptr_size, chip_x => chip_x, chip_y => chip_y) port map( clks => clks((num_routers-4)*(i+1)/4-1 downto (num_routers-4)*i/4), arstN => arstN, core_x => r_core_x(5*(i+1)-1 downto 5*i), core_y => r_core_y(5*(i+1)-1 downto 5*i), data_in_us => r_data_ds_out(4*npd*(i+1)-1 downto 4*npd*i), rcv_reqs_us => r_snd_reqs_ds(4*npd*(i+1)-1 downto 4*npd*i), send_ack_us => r_rcv_ack_ds(4*npd*(i+1)-1 downto 4*npd*i), pe_data_in => pe_data_in((pow_level)*(i+1)-1 downto (pow_level)*i), pe_rcv_reqs => pe_rcv_reqs((pow_level)*(i+1)-1 downto (pow_level)*i), pe_send_ack => pe_send_ack((pow_level)*(i+1)-1 downto (pow_level)*i), data_out_us => r_data_ds_in(4*npd*(i+1)-1 downto 4*npd*i), rcv_acks_us => r_snd_ack_ds(4*npd*(i+1)-1 downto 4*npd*i), send_reqs_us => r_rcv_reqs_ds(4*npd*(i+1)-1 downto 4*npd*i), pe_rcv_acks => pe_rcv_acks((pow_level)*(i+1)-1 downto (pow_level)*i), pe_send_reqs => pe_send_reqs((pow_level)*(i+1)-1 downto (pow_level)*i), pe_data_out => pe_data_out((pow_level)*(i+1)-1 downto (pow_level)*i) ); end generate; data_out_us <= r_data_us_out; rcv_acks_us <= r_rcv_ack_us; send_reqs_us <= r_snd_reqs_us; end generate; end impl;