`timescale 1ns / 1ps module tb_matrix_multiplication; // Parameters parameter DATA_WIDTH = 16; parameter MEM_ROWS = 20;//20 ->5bits //16 parameter MEM_COLS = 80;//80 ->7bits //32SS parameter PE_ROWS = 20;// parameter PE_COLS = 20;//10;// parameter COMMON_ROW_COL = 4; parameter OUTPUT_COL = 20; parameter OUTPUT_ROW = 20; // Clock and reset reg clk; reg rst; // Control reg enable; reg [1:0] mode; // Inputs for matrix A and B reg [DATA_WIDTH-1:0] data_input_A; reg [DATA_WIDTH-1:0] data_input_B; reg valid_mem_input_A; reg valid_mem_input_B; // Address offset configuration reg [$clog2(MEM_ROWS)-1:0] rows_start_add_reading_A; reg [$clog2(MEM_COLS)-1:0] cols_start_add_reading_A; reg [$clog2(MEM_ROWS)-1:0] rows_start_add_reading_B; reg [$clog2(MEM_COLS)-1:0] cols_start_add_reading_B; reg [$clog2(MEM_ROWS)-1:0] rows_start_add_writing; reg [$clog2(MEM_COLS)-1:0] cols_start_add_writing; // Size configuration reg [$clog2(MEM_ROWS)-1:0] rows_size_reading_A; reg [$clog2(MEM_COLS)-1:0] cols_size_reading_A; reg [$clog2(MEM_ROWS)-1:0] rows_size_reading_B; reg [$clog2(MEM_COLS)-1:0] cols_size_reading_B; // Outputs wire done; wire [$clog2(MEM_ROWS)-1:0] row_addr_A; wire [$clog2(MEM_COLS)-1:0] col_addr_A; wire [$clog2(MEM_ROWS)-1:0] row_addr_B; wire [$clog2(MEM_COLS)-1:0] col_addr_B; wire [$clog2(MEM_ROWS)-1:0] row_addr_out; wire [$clog2(MEM_COLS)-1:0] col_addr_out; wire read_enable_A; wire read_enable_B; wire write_enable_out; wire read_full_row_A, read_full_row_B,write_full_row_1, write_full_row_2, write_full_row_3; reg read_full_row_3; wire [DATA_WIDTH-1:0] data_out; // Unused memory interfaces reg [$clog2(MEM_ROWS)-1:0] row_addr_1, row_addr_2, row_addr_3, no_rows_used3; //reg reg [$clog2(MEM_COLS)-1:0] col_addr_1, col_addr_2, col_addr_3, no_cols_used3; wire [DATA_WIDTH-1:0] data_out_1, data_out_2, data_in_3; reg [DATA_WIDTH-1:0] data_out_3; wire valid_1, valid_2, valid_3; reg write_enable_3, read_enable_3;//reg is instead of wire so that multiple inputs can be driven through them. wire [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] full_row_output_1, full_row_output_2, full_row_output_3, full_row_input_1, full_row_input_2, full_row_input_3; /// reg [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] full_row_A, full_row_B;//reg reg read_full_row_or_col_from_mem; integer cycle_count; reg write_back_to_file_enable; //writing enable the file back into the memory wire done_writing_to_file; ////writing enable the file back into the memory // Instantiate the memory module top_module_mem #( .ROWS1(MEM_ROWS), .COLS1(MEM_COLS), .ROWS2(MEM_ROWS), .COLS2(MEM_COLS), .ROWS3(MEM_ROWS), .COLS3(MEM_COLS), .DATA_WIDTH(DATA_WIDTH), .COLS_USED(COMMON_ROW_COL) ) memory_inst ( .clk(clk), // Memory 1 (not used) .row_addr_1(row_addr_1), .col_addr_1(col_addr_1), .write_enable_1(1'b0), .read_enable_1(1'b0), .data_input_1(16'd0), .data_output_1(data_out_1), .valid_1(valid_1), .read_full_row_or_col1(1'b0), .read_full_row_1(1'b0), .no_cols_used1(), .no_rows_used1(), .full_row_output_1(full_row_output_1), .full_row_input_1(full_row_input_1),//// .write_full_row_1(write_full_row_1),/// // Memory 2 (not used) .row_addr_2(row_addr_2), .col_addr_2(col_addr_2), .write_enable_2(1'b0), .read_enable_2(1'b0), .data_input_2(16'd0), .data_output_2(data_out_2), .valid_2(valid_2), .read_full_row_or_col2(1'b0), .read_full_row_2(1'b0), .no_cols_used2(), .no_rows_used2(), .full_row_output_2(full_row_output_2), .full_row_input_2(full_row_input_2),//// .write_full_row_2(write_full_row_2),/// // Memory 3 (used for matrix multiplication) .row_addr_3(row_addr_3), .col_addr_3(col_addr_3), .write_enable_3(write_enable_3), .read_enable_3(read_enable_3), .data_input_3(data_out_3), .data_output_3(data_in_3), .valid_3(valid_3), .read_full_row_or_col3(read_full_row_or_col_from_mem),/// to read entire column .read_full_row_3(read_full_row_3), .no_cols_used3(no_cols_used3),/// .no_rows_used3(no_rows_used3),/// .full_row_output_3(full_row_output_3), .full_row_input_3(full_row_input_3),//// .write_full_row_3(write_full_row_3), .write_back_to_file_enable(write_back_to_file_enable), .done_writing_to_file(done_writing_to_file) ); // Instantiate the matrix multiplication unit matrix_multiplication_unit_new #( .DATA_WIDTH(DATA_WIDTH), .MEM_ROWS(MEM_ROWS), .MEM_COLS(MEM_COLS), .PE_ROWS(PE_ROWS), .PE_COLS(PE_COLS), .COMMON_ROW_COL(COMMON_ROW_COL), .OUTPUT_COL(OUTPUT_COL), .OUTPUT_ROW(OUTPUT_ROW) ) mmu_inst ( .clk(clk), .rst(rst), .enable(enable), .mode(mode), .data_input_A(data_input_A), .data_input_B(data_input_B), .valid_mem_input_A(valid_mem_input_A), .valid_mem_input_B(valid_mem_input_B), .rows_start_add_reading_A(rows_start_add_reading_A), .cols_start_add_reading_A(cols_start_add_reading_A), .rows_start_add_reading_B(rows_start_add_reading_B), .cols_start_add_reading_B(cols_start_add_reading_B), .rows_start_add_writing(rows_start_add_writing), .cols_start_add_writing(cols_start_add_writing), .rows_size_reading_A(rows_size_reading_A), .cols_size_reading_A(cols_size_reading_A), .rows_size_reading_B(rows_size_reading_B), .cols_size_reading_B(cols_size_reading_B), //outputs .done(done), .row_addr_A(row_addr_A), .col_addr_A(col_addr_A), .row_addr_B(row_addr_B), .col_addr_B(col_addr_B), .row_addr_out(row_addr_out), .col_addr_out(col_addr_out), .read_enable_A(read_enable_A), .read_enable_B(read_enable_B), .write_enable_out(write_enable_out), .data_out(data_out), .full_row_A(full_row_A), .full_row_B(full_row_B), .read_full_row_A(read_full_row_A), .read_full_row_B(read_full_row_B), .write_full_row_out(write_full_row_3), .Full_row_out(full_row_input_3) ); // Clock Generation initial begin clk = 0; forever #5 clk = ~clk; // 100MHz clock end always @(*) begin if (read_full_row_A) begin row_addr_3 = row_addr_A; col_addr_3 = col_addr_A; full_row_A = full_row_output_3;//<= changed to = no_rows_used3 = rows_size_reading_A; no_cols_used3 = cols_size_reading_A; valid_mem_input_A = valid_3;//change this logic !!!!!!!!!!!!! if (valid_3) full_row_A = full_row_output_3;//<= changed to = end else if (read_full_row_B) begin row_addr_3 = row_addr_B; col_addr_3 = col_addr_B; full_row_B = full_row_output_3;//<= changed to = no_rows_used3 = cols_size_reading_B;//because we have transporsed and saved it on the memory no_cols_used3 = rows_size_reading_B; valid_mem_input_B = valid_3;//change this logic !!!!!!!!!!!!! if (valid_3) full_row_B = full_row_output_3;//<= changed to = end else if (write_full_row_3)begin row_addr_3 = row_addr_out; col_addr_3 = col_addr_out; no_rows_used3 = rows_size_reading_A; no_cols_used3 = cols_size_reading_B; end if ((write_full_row_3 == 1) && (cols_size_reading_B >= rows_size_reading_A)) begin read_full_row_or_col_from_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default) // read_full_row_or_col_from_mem <= 1'b0;//working1'b1; ////0 to read row wise & 1 to read col wise (default) end if (valid_3 == 0) begin valid_mem_input_A = valid_3;//change this logic !!!!!!!!!!!!! valid_mem_input_B = valid_3;//change this logic !!!!!!!!!!!!! end read_full_row_3 = read_full_row_A | read_full_row_B; write_enable_3 = write_enable_out; data_out_3 = data_out; end // Cycle counting always @(posedge clk) begin if (rst) begin cycle_count <= 0; end else if (enable && !done) begin cycle_count <= cycle_count + 1; end end // Test Sequence initial begin // Initialize control signals full_row_A = 0;//<= changed to = full_row_B = 0;//<= changed to = rst = 1; enable = 0; //mode = 2'b01; // input-Stationary mode // mode = 2'b10; // weight-Stationary mode mode = 2'b00; // Output-Stationary mode // Wait for a few clock cycles #20; rst = 0; // Wait for reset deassertion #20; read_full_row_or_col_from_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default) // read_full_row_or_col_from_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default) rows_start_add_reading_A <= 5'b0; cols_start_add_reading_A <= 7'b0; rows_start_add_reading_B <= 5'b0; cols_start_add_reading_B <= 7'd4;//4 to 7 rows_start_add_writing <= 5'b0; cols_start_add_writing <= 7'd12;//12 to 21 rows_size_reading_A <= 5'd15;//5'd19;//5'd9;//A-> 20X4 cols_size_reading_A <= COMMON_ROW_COL-1; rows_size_reading_B <= COMMON_ROW_COL-1; cols_size_reading_B <= 7'd19;//7'd9;//7'd4; //(B-> 10X4)^T // Enable the matrix multiplication enable = 1; // Wait 20 ns #20; // Wait for the operation to complete wait (done); // Print number of cycles taken $display("Operation completed in %0d cycles(1/2)", cycle_count); //1261 cycles-> old o/p sationary implemenetaion // Disable the enable signal enable = 0; //enable writing signal for memory dump write_back_to_file_enable = 1; // Wait a few cycles to observe #20; wait(done_writing_to_file); #20 /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* // Initialize control signals rst = 1; enable = 0; //mode = 2'b01; // input-Stationary mode // mode = 2'b10; // weight-Stationary mode mode = 2'b00; // Output-Stationary mode // Wait for a few clock cycles #20; rst = 0; // Wait for reset deassertion #20; //assigning register properly //inputs to hdpe // data_input_A <= data_in_3; // data_input_B <= data_in_3; // valid_mem_input_A <= valid_3; // valid_mem_input_B <= valid_3; rows_start_add_reading_A <= 5'b0; cols_start_add_reading_A <= 7'd20; rows_start_add_reading_B <= 5'b0; cols_start_add_reading_B <= 7'd7;//4 to 7 rows_start_add_writing <= 5'b0; cols_start_add_writing <= 7'd35;//12 to 21 rows_size_reading_A <= 5'd19;//A-> 20X4 cols_size_reading_A <= COMMON_ROW_COL; rows_size_reading_B <= COMMON_ROW_COL; cols_size_reading_B <= 7'd9; //(B-> 10X4)^T //outputs to hdpe // row_addr_3 <= row_addr_A; // col_addr_3 <= col_addr_A; // row_addr_3 <= row_addr_B; // col_addr_3 <= col_addr_B; // row_addr_3 <= row_addr_out; // col_addr_3 <= row_addr_out; // read_enable_3 <= read_enable_A; // read_enable_3 <= read_enable_B; // write_enable_3 <= write_enable_out; // data_in_3 <= data_out; // Enable the matrix multiplication enable = 1; // Wait 20 ns #20; // Wait for the operation to complete wait (done); // Print number of cycles taken $display("Operation completed in %0d cycles(2/2)", cycle_count); //1261 cycles-> old o/p sationary implemenetaion // Disable the enable signal enable = 0; // Wait a few cycles to observe #20; */ // Finish the simulation $stop; end endmodule