library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use ieee.math_real.all; use work.real_vector_pkg.all; -- for my_real_matrix, real_matrix entity tile_unit_tb is end; architecture sim of tile_unit_tb is -- Constants matching the DUT generics constant Nv : integer := 2; constant Nh : integer := 2; constant Nm : integer := 4; constant Nd : integer := 2; constant N : integer := 2; -- Clock period constant clk_period : time := 10 ns; -- DUT signals signal clk : std_logic := '0'; signal reset_n : std_logic := '0'; signal enable : std_logic := '0'; signal m1_chunk : my_real_matrix(0 to Nv-1, 0 to Nm-1); signal m2_chunk : my_real_matrix(0 to Nh-1, 0 to Nm-1); signal out_valid : std_logic; signal result_out : real_matrix(0 to Nv-1, 0 to Nh-1); begin -- Clock generation clk_process : process begin while now < 500 ns loop clk <= '0'; wait for clk_period / 2; clk <= '1'; wait for clk_period / 2; end loop; wait; end process; -- DUT instantiation uut: entity work.tile_unit generic map ( N => N, Nv => Nv, Nh => Nh, Nm => Nm, Nd => Nd ) port map ( clk => clk, reset_n => reset_n, enable => enable, m1_chunk => m1_chunk, m2_chunk => m2_chunk, out_valid => out_valid, result_out => result_out ); -- Stimulus stim_proc: process begin -- Reset sequence reset_n <= '0'; enable <= '0'; wait for 2 * clk_period; reset_n <= '1'; -- Load test data wait for clk_period; -- Example: -- m1_chunk = identity 2x4 -- m2_chunk = 2x4 row-wise [1 2 3 4], [5 6 7 8] for i in 0 to Nv-1 loop for j in 0 to Nm-1 loop if i = j then m1_chunk(i, j) <= 2.0; else m1_chunk(i, j) <= 1.0; end if; end loop; end loop; for i in 0 to Nh-1 loop for j in 0 to Nm-1 loop m2_chunk(i, j) <= real(i * Nm + j + 1); -- row major fill end loop; end loop; -- Start computation enable <= '1'; wait for 2 * clk_period; enable <= '0'; -- Wait for valid wait until out_valid = '1'; report "Matrix multiplication result is valid."; wait; end process; end architecture;