library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use ieee.math_real.all; use work.real_vector_pkg.all; entity dptc is generic ( Nv : integer := 2; -- Number of rows Nh : integer := 2; -- Number of columns N : integer := 2 -- Vector size inside each DDOT ); port ( clk : in std_logic; reset_n : in std_logic; enable : in std_logic; x_matrix : in my_real_matrix(0 to Nv-1, 0 to N-1); -- Nv rows of X (each a vector of size N) Input matrix X, size [Nv x N] y_matrix : in my_real_matrix(0 to Nh-1, 0 to N-1); -- Nh columns of Y (each a vector of size N) Input matrix Y, size [Nh x N] out_valid : out std_logic; -- Output matrix [Nv x Nh], where each element is a dot product result_matrix : out real_matrix(0 to Nv-1, 0 to Nh-1) -- output dot products High when result_matrix contains valid results ); end entity; architecture Behavioral of dptc is component ddot_unit generic ( N : integer := 2 ); port ( clk : in std_logic; reset_n : in std_logic; enable : in std_logic; x_vec : in my_real_vector(0 to N-1); y_vec : in my_real_vector(0 to N-1); out_valid : out std_logic; dot_out : out my_real ); end component; -- Intermediate signals signal ddot_out : real_matrix(0 to Nv-1, 0 to Nh-1); signal valid_signals : std_logic_vector(0 to (Nv*Nh)-1); signal any_valid : std_logic; type my_real_vector_array is array (natural range <>) of my_real_vector(0 to N-1); signal x_vecs : my_real_vector_array(0 to Nv-1); signal y_vecs : my_real_vector_array(0 to Nh-1); begin -- Create local copies of X and Y vectors process(x_matrix, y_matrix) begin for i in 0 to Nv-1 loop for k in 0 to N-1 loop x_vecs(i)(k) <= x_matrix(i, k); end loop; end loop; for j in 0 to Nh-1 loop for k in 0 to N-1 loop y_vecs(j)(k) <= y_matrix(j, k); end loop; end loop; end process; -- Generate all DDOT units gen_rows: for i in 0 to Nv-1 generate gen_cols: for j in 0 to Nh-1 generate ddot_inst : ddot_unit generic map ( N => N ) port map ( clk => clk, reset_n => reset_n, enable => enable, x_vec => x_vecs(i), y_vec => y_vecs(j), out_valid => valid_signals(i*Nh + j), dot_out => ddot_out(i,j) ); end generate; end generate; -- Assign outputs result_matrix <= ddot_out; process(valid_signals) begin any_valid <= '0'; for i in valid_signals'range loop if valid_signals(i) = '1' then any_valid <= '1'; end if; end loop; end process; out_valid <= any_valid; end architecture;