AYAKA_Transformer/rtl/Top_controller.v

1025 lines
No EOL
43 KiB
Verilog

//`timescale 1ns / 1ps
module top_controller_module #(
parameter DATA_WIDTH = 16,
parameter MEM_ROWS = 20,//20 ->5bits //16
parameter MEM_COLS = 80,//80 ->7bits //32
parameter OUTPUT_COL = 20,
parameter OUTPUT_ROW = 20,
parameter COMMON_ROW_COL = 4,//3 //COLS_USED // this given to rows_size_reading_B & cols_size_reading_A
parameter PE_ROWS = 20,//
parameter PE_COLS = 4,// for TxW^Q
parameter H_LIMIT = 2,
parameter TOP_K = 4
)(
input clk,
input rst,// this reset is not yet mapped
input enable_top,
input write_back_to_file_enable_top,
output reg done_top,
output wire done_writing_to_file_top
);
parameter IDLE = 0,
RPAS_TOKEN = 1,
RPAS_TOKEN_RESET = 2,
RPAS_WEIGTHS = 3,
RPAS_WEIGTHS_RESET = 4,
HDPE_T_X_Q = 5,
HDPE_T_X_Q_RESET = 6,
HDPE_T_X_K = 7,
HDPE_T_X_K_RESET = 8,
RPAS_Q = 9,
RPAS_Q_RESET = 10,
RPAS_K = 11,
RPAS_K_RESET = 12,
HDPE_Q_X_K = 13,
HDPE_Q_X_K_RESET = 14,
MASK_A_Q_KV = 15,
MASK_A_Q_KV_RESET = 16,
UPDATE_H = 17,
DONE = 18;
reg [5:0] current_state, next_state;
////////HDPE ARRAY CONTROL SIGNALS//////////
// Control
reg rst_hdpe;
reg enable_hdpe;
reg [1:0] mode_hdpe;
// Inputs for matrix A and B
reg [DATA_WIDTH-1:0] data_input_A_hdpe;
reg [DATA_WIDTH-1:0] data_input_B_hdpe;
reg valid_mem_input_A_hdpe;
reg valid_mem_input_B_hdpe;
// Address offset configuration
reg [$clog2(MEM_ROWS)-1:0] rows_start_add_reading_A_hdpe;
reg [$clog2(MEM_COLS)-1:0] cols_start_add_reading_A_hdpe;
reg [$clog2(MEM_ROWS)-1:0] rows_start_add_reading_B_hdpe;
reg [$clog2(MEM_COLS)-1:0] cols_start_add_reading_B_hdpe;
reg [$clog2(MEM_ROWS)-1:0] rows_start_add_writing_hdpe;
reg [$clog2(MEM_COLS)-1:0] cols_start_add_writing_hdpe;
// Size configuration
reg [$clog2(MEM_ROWS)-1:0] rows_size_reading_A_hdpe;
reg [$clog2(MEM_COLS)-1:0] cols_size_reading_A_hdpe;
reg [$clog2(MEM_ROWS)-1:0] rows_size_reading_B_hdpe;
reg [$clog2(MEM_COLS)-1:0] cols_size_reading_B_hdpe;
// Outputs
wire done_hdpe;
wire [$clog2(MEM_ROWS)-1:0] row_addr_A_hdpe;
wire [$clog2(MEM_COLS)-1:0] col_addr_A_hdpe;
wire [$clog2(MEM_ROWS)-1:0] row_addr_B_hdpe;
wire [$clog2(MEM_COLS)-1:0] col_addr_B_hdpe;
wire [$clog2(MEM_ROWS)-1:0] row_addr_out_hdpe;
wire [$clog2(MEM_COLS)-1:0] col_addr_out_hdpe;
wire read_enable_A_hdpe;
wire read_enable_B_hdpe;
wire write_enable_out_hdpe;
wire read_full_row_A_hdpe, read_full_row_B_hdpe;
reg [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] full_row_A_hdpe, full_row_B_hdpe;//reg
wire [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] full_row_output_hdpe;
wire [DATA_WIDTH-1:0] data_out_hdpe;
/////////////////////////////Memory nodule/////////////////////////
// Unused memory interfaces
reg [$clog2(MEM_ROWS)-1:0] row_addr_1_mem, row_addr_2_mem, row_addr_3_mem, no_rows_used1_mem, no_rows_used2_mem, no_rows_used3_mem; //reg
reg [$clog2(MEM_COLS)-1:0] col_addr_1_mem, col_addr_2_mem, col_addr_3_mem, no_cols_used1_mem, no_cols_used2_mem, no_cols_used3_mem;
wire [DATA_WIDTH-1:0] data_out_1_mem, data_out_2_mem, data_out_3_mem;
// reg [DATA_WIDTH-1:0] data_out_3_mem;
reg [DATA_WIDTH-1:0] data_in_1_mem, data_in_2_mem, data_in_3_mem;
wire valid_1_mem, valid_2_mem, valid_3_mem;
reg write_enable_1_mem, read_enable_1_mem, write_enable_2_mem, read_enable_2_mem, write_enable_3_mem, read_enable_3_mem;//reg is instead of wire so that multiple inputs can be driven through them.
reg write_full_row_1_mem, write_full_row_2_mem, write_full_row_3_mem;
reg read_full_row_1_mem, read_full_row_2_mem, read_full_row_3_mem;
wire [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] full_row_output_1_mem, full_row_output_2_mem, full_row_output_3_mem;
reg [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] full_row_input_1_mem, full_row_input_2_mem, full_row_input_3_mem; ///
reg read_full_row_or_col1_mem, read_full_row_or_col2_mem, read_full_row_or_col3_mem;
/////////////////////////RPAS /////////////////
// === Control Signals ===
reg rst_rpas;
reg enable_rpas;
// === Address and Size Configs ===
reg [$clog2(MEM_ROWS)-1:0] rows_start_add_reading_rpas, rows_start_add_writing_rpas, rows_size_reading_rpas, rows_size_writing_rpas;
reg [$clog2(MEM_COLS)-1:0] cols_start_add_reading_rpas, cols_size_reading_rpas; // Corrected width
reg [$clog2(MEM_COLS)-1:0] cols_size_writing_rpas, cols_start_add_writing_rpas;
// === Wires for RPAS <-> Memory Connections ===
wire [$clog2(MEM_ROWS)-1:0] row_addr_out_read_rpas, row_addr_out_write_rpas;
wire [$clog2(MEM_COLS)-1:0] col_addr_out_read_rpas;
wire [$clog2(MEM_COLS)-1:0] col_addr_out_write_rpas;
wire write_enable_rpas;
wire read_enable_full_row_rpas;
reg read_valid_rpas;
wire valid_result_rpas;
wire [DATA_WIDTH - 1:0] data_input_to_mem_1_rpas, data_input_to_mem_2_rpas, data_input_to_mem_3_rpas;
reg [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS-1:MEM_COLS-1):0] data_input_rows_rpas; // For full row reads //COLS1
/////////////////////////MASKING/////////////////////////////////////////////
// Control
reg rst_mask;
reg enable_mask;
// Addressing
reg [$clog2(MEM_ROWS)-1:0] rows_start_add_reading_mask, rows_start_add_writing_mask, rows_size_reading_mask, rows_size_writing_mask;
reg [$clog2(MEM_COLS)-1:0] cols_start_add_reading_mask, cols_size_reading_mask;
reg [$clog2(MEM_COLS)-1:0] cols_start_add_writing_mask, cols_size_writing_mask;
// Connections
wire [$clog2(MEM_ROWS)-1:0] row_addr_out_read_mask, row_addr_out_write_mask;
wire [$clog2(MEM_COLS)-1:0] col_addr_out_read_mask;
wire [$clog2(MEM_COLS)-1:0] col_addr_out_write_mask;
wire write_enable_mask;
wire read_enable_full_row_mask;
reg read_valid_mask;
wire valid_result_mask;
// Data
wire [DATA_WIDTH - 1:0] data_output_mask;
reg [DATA_WIDTH*((MEM_ROWS>MEM_COLS)?MEM_ROWS:MEM_COLS)-1:0] full_row_output_mask;
integer h_count;
// Instantiate the memory module
top_module_mem #(
.ROWS1(MEM_ROWS),
.COLS1(MEM_COLS),
.ROWS2(MEM_ROWS),
.COLS2(MEM_COLS),
.ROWS3(MEM_ROWS),
.COLS3(MEM_COLS),
.DATA_WIDTH(DATA_WIDTH),
.COLS_USED(COMMON_ROW_COL)
) memory_inst (
.clk(clk),
// Memory 1 (not used)
.row_addr_1(row_addr_1_mem),
.col_addr_1(col_addr_1_mem),
.write_enable_1(write_enable_1_mem),
.read_enable_1(read_enable_1_mem),
.data_input_1(data_in_1_mem),
.data_output_1(data_out_1_mem),
.valid_1(valid_1_mem),
.read_full_row_or_col1(read_full_row_or_col1_mem),
.read_full_row_1(read_full_row_1_mem),
.no_cols_used1(no_cols_used1_mem),
.no_rows_used1(no_rows_used1_mem),
.full_row_output_1(full_row_output_1_mem),
.full_row_input_1(full_row_input_1_mem),////
.write_full_row_1(write_full_row_1_mem),///
// Memory 2 (not used)
.row_addr_2(row_addr_2_mem),
.col_addr_2(col_addr_2_mem),
.write_enable_2(write_enable_2_mem),
.read_enable_2(read_enable_2_mem),
.data_input_2(data_in_2_mem),
.data_output_2(data_out_2_mem),
.valid_2(valid_2_mem),
.read_full_row_or_col2(read_full_row_or_col2_mem),
.read_full_row_2(read_full_row_2_mem),
.no_cols_used2(no_cols_used2_mem),
.no_rows_used2(no_rows_used2_mem),
.full_row_output_2(full_row_output_2_mem),
.full_row_input_2(full_row_input_2_mem),////
.write_full_row_2(write_full_row_2_mem),///
// Memory 3 (used for matrix multiplication)
.row_addr_3(row_addr_3_mem),
.col_addr_3(col_addr_3_mem),
.write_enable_3(write_enable_3_mem),
.read_enable_3(read_enable_3_mem),
.data_input_3( data_in_3_mem),
.data_output_3( data_out_3_mem),
.valid_3(valid_3_mem),
.read_full_row_or_col3(read_full_row_or_col3_mem),
.read_full_row_3(read_full_row_3_mem),
.no_cols_used3(no_cols_used3_mem),
.no_rows_used3(no_rows_used3_mem),
.full_row_output_3(full_row_output_3_mem),
.full_row_input_3(full_row_input_3_mem),////
.write_full_row_3(write_full_row_3_mem),///
.write_back_to_file_enable(write_back_to_file_enable_top),
.done_writing_to_file(done_writing_to_file_top)
);
// Instantiate the matrix multiplication unit
matrix_multiplication_unit_new #(
.DATA_WIDTH(DATA_WIDTH),
.MEM_ROWS(MEM_ROWS),
.MEM_COLS(MEM_COLS),
.PE_ROWS(PE_ROWS),
.PE_COLS(PE_COLS),
.COMMON_ROW_COL(COMMON_ROW_COL),
.OUTPUT_COL(OUTPUT_COL),
.OUTPUT_ROW(OUTPUT_ROW)
) mmu_inst (
.clk(clk),
.rst(rst_hdpe),
// .rst(rst),
.enable(enable_hdpe),
.mode(mode_hdpe),
.data_input_A(data_input_A_hdpe),
.data_input_B(data_input_B_hdpe),
.valid_mem_input_A(valid_mem_input_A_hdpe),
.valid_mem_input_B(valid_mem_input_B_hdpe),
.rows_start_add_reading_A(rows_start_add_reading_A_hdpe),
.cols_start_add_reading_A(cols_start_add_reading_A_hdpe),
.rows_start_add_reading_B(rows_start_add_reading_B_hdpe),
.cols_start_add_reading_B(cols_start_add_reading_B_hdpe),
.rows_start_add_writing(rows_start_add_writing_hdpe),
.cols_start_add_writing(cols_start_add_writing_hdpe),
.rows_size_reading_A(rows_size_reading_A_hdpe),
.cols_size_reading_A(cols_size_reading_A_hdpe),
.rows_size_reading_B(rows_size_reading_B_hdpe),
.cols_size_reading_B(cols_size_reading_B_hdpe),
//outputs
.done(done_hdpe),
.row_addr_A(row_addr_A_hdpe),
.col_addr_A(col_addr_A_hdpe),
.row_addr_B(row_addr_B_hdpe),
.col_addr_B(col_addr_B_hdpe),
.row_addr_out(row_addr_out_hdpe),
.col_addr_out(col_addr_out_hdpe),
.read_enable_A(read_enable_A_hdpe),
.read_enable_B(read_enable_B_hdpe),
.write_enable_out(write_enable_out_hdpe),
.data_out(data_out_hdpe),
.full_row_A(full_row_A_hdpe),
.full_row_B(full_row_B_hdpe),
.read_full_row_A(read_full_row_A_hdpe),
.read_full_row_B(read_full_row_B_hdpe),
.write_full_row_out(write_full_row_3_hdpe),
.Full_row_out(full_row_output_hdpe)
);
// === Instantiate top_module_rpas ===
top_module_rpas #(
.ROWS_READING(MEM_ROWS),
.COLS_READING(MEM_COLS),
.ROWS_WRITING(MEM_ROWS),
.COLS_WRITING(MEM_COLS),
.DATA_WIDTH(DATA_WIDTH),
.MIN_THRESHOLD(COMMON_ROW_COL-1)
) u_rpas (
.clk(clk),
.rst(rst_rpas),
.enable_rpas(enable_rpas),
.read_valid(read_valid_rpas),
.rows_start_add_reading(rows_start_add_reading_rpas),
.cols_start_add_reading(cols_start_add_reading_rpas),
.rows_start_add_writing(rows_start_add_writing_rpas),
.cols_start_add_writing(cols_start_add_writing_rpas),
.rows_size_reading(rows_size_reading_rpas),
.cols_size_reading(cols_size_reading_rpas),
.rows_size_writing(rows_size_writing_rpas),
.cols_size_writing(cols_size_writing_rpas),
.data_input_rows(data_input_rows_rpas),
.row_addr_out_read(row_addr_out_read_rpas),
.col_addr_out_read(col_addr_out_read_rpas),
.row_addr_out_write(row_addr_out_write_rpas),///////
.col_addr_out_write(col_addr_out_write_rpas),
.data_output(data_input_to_mem_3_rpas),
.write_enable(write_enable_rpas),
.read_enable_full_row(read_enable_rpas),
.valid_result(valid_result_rpas)
);
// === Instantiate masking unit ===
top_module_mask #(
.ROWS_READING(MEM_ROWS),
.COLS_READING(MEM_COLS),
.ROWS_WRITING(MEM_ROWS),
.COLS_WRITING(MEM_COLS),
.DATA_WIDTH(DATA_WIDTH),
.TOP_K(TOP_K)
) u_mask (
.clk(clk),
.rst(rst_mask),
.enable_mask(enable_mask),
.read_valid(read_valid_mask),
.rows_start_add_reading(rows_start_add_reading_mask),
.cols_start_add_reading(cols_start_add_reading_mask),
.rows_start_add_writing(rows_start_add_writing_mask),
.cols_start_add_writing(cols_start_add_writing_mask),
.rows_size_reading(rows_size_reading_mask),
.cols_size_reading(cols_size_reading_mask),
.rows_size_writing(rows_size_writing_mask),
.cols_size_writing(cols_size_writing_mask),
.data_input_rows(full_row_output_mask),
.data_output(data_output_mask),
.write_enable(write_enable_mask),
.row_addr_out_read(row_addr_out_read_mask),
.col_addr_out_read(col_addr_out_read_mask),
.row_addr_out_write(row_addr_out_write_mask),
.col_addr_out_write(col_addr_out_write_mask),
.read_enable_full_row(read_enable_full_row_mask),
.valid_result(valid_result_mask)
);
// FSM Sequential
always @(posedge clk or posedge rst) begin
if (rst)
current_state <= IDLE;
else
current_state <= next_state;
end
// FSM Next-State Logic
always @(*) begin
$display("Time=%0t, current_state=%0d, enable_top=%b, next_state=%0d", $time, current_state, enable_top, next_state);
case (current_state)
IDLE: next_state = (enable_top == 1) ? RPAS_TOKEN : IDLE; // MASK_A_Q_KV : IDLE; //
RPAS_TOKEN: next_state = valid_result_rpas ? RPAS_TOKEN_RESET : RPAS_TOKEN ;
RPAS_TOKEN_RESET: next_state = RPAS_WEIGTHS;
RPAS_WEIGTHS: next_state = valid_result_rpas ? RPAS_WEIGTHS_RESET : RPAS_WEIGTHS;
RPAS_WEIGTHS_RESET: next_state = HDPE_T_X_Q;
HDPE_T_X_Q: next_state = done_hdpe ? HDPE_T_X_Q_RESET : HDPE_T_X_Q ;
HDPE_T_X_Q_RESET: next_state = HDPE_T_X_K;
HDPE_T_X_K: next_state = done_hdpe ? HDPE_T_X_K_RESET : HDPE_T_X_K;
HDPE_T_X_K_RESET: next_state = RPAS_Q;
RPAS_Q : next_state = valid_result_rpas ? RPAS_Q_RESET: RPAS_Q;
RPAS_Q_RESET: next_state = RPAS_K;
RPAS_K: next_state = valid_result_rpas ? RPAS_K_RESET: RPAS_K;
RPAS_K_RESET: next_state = HDPE_Q_X_K;
HDPE_Q_X_K: next_state = done_hdpe ? HDPE_Q_X_K_RESET : HDPE_Q_X_K;
HDPE_Q_X_K_RESET: next_state = MASK_A_Q_KV;
MASK_A_Q_KV: next_state = (valid_result_mask) ? MASK_A_Q_KV_RESET: MASK_A_Q_KV ;
MASK_A_Q_KV_RESET: next_state = UPDATE_H ; //DONE;//
UPDATE_H: next_state =((h_count+1) < H_LIMIT) ? HDPE_T_X_Q : DONE;
DONE: next_state = DONE;
default: next_state = IDLE;
endcase
end
//signal assignments
always @(*) begin
case (current_state)
IDLE: begin
$display("[IDLE] Waiting for enable in signal assignment...");
end
RPAS_TOKEN: begin
row_addr_1_mem = row_addr_out_read_rpas;
col_addr_1_mem = col_addr_out_read_rpas;
row_addr_3_mem = row_addr_out_write_rpas;
col_addr_3_mem = col_addr_out_write_rpas;
data_in_3_mem = data_input_to_mem_3_rpas;
write_enable_3_mem = write_enable_rpas;
data_input_rows_rpas = full_row_output_1_mem;
read_full_row_1_mem = read_enable_rpas;
read_valid_rpas = valid_1_mem;
no_rows_used1_mem = rows_size_reading_rpas;
no_cols_used1_mem = cols_size_reading_rpas;
$display("[RPAS_TOKEN] in signal assignment...");
end
RPAS_TOKEN_RESET: begin
$display("[RPAS_TOKEN_RESET] in signal assignment...");
end
RPAS_WEIGTHS: begin
row_addr_2_mem = row_addr_out_read_rpas;
col_addr_2_mem = col_addr_out_read_rpas;
row_addr_3_mem = row_addr_out_write_rpas;
col_addr_3_mem = col_addr_out_write_rpas;
data_in_3_mem = data_input_to_mem_3_rpas;
write_enable_3_mem = write_enable_rpas;
data_input_rows_rpas = full_row_output_2_mem;
read_full_row_2_mem = read_enable_rpas;
read_valid_rpas = valid_2_mem;
no_rows_used2_mem = rows_size_reading_rpas;
no_cols_used2_mem = cols_size_reading_rpas;
$display("[RPAS_WEIGTHS] in signal assignment...");
end
RPAS_WEIGTHS_RESET: begin
$display("[RPAS_WEIGTHS_RESET] in signal assignment...");
end
HDPE_T_X_Q: begin
if (read_full_row_A_hdpe) begin
row_addr_3_mem = row_addr_A_hdpe;
col_addr_3_mem = col_addr_A_hdpe;
full_row_A_hdpe = full_row_output_3_mem;//<= changed to =
no_rows_used3_mem = rows_size_reading_A_hdpe;
no_cols_used3_mem = cols_size_reading_A_hdpe;
valid_mem_input_A_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
if (valid_3_mem)
full_row_A_hdpe = full_row_output_3_mem;//<= changed to =
end else if (read_full_row_B_hdpe) begin
row_addr_3_mem = row_addr_B_hdpe;
col_addr_3_mem = col_addr_B_hdpe;
full_row_B_hdpe = full_row_output_3_mem;//<= changed to =
no_rows_used3_mem = cols_size_reading_B_hdpe;//because we have transporsed and saved it on the memory
no_cols_used3_mem = rows_size_reading_B_hdpe;
valid_mem_input_B_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
if (valid_3_mem)
full_row_B_hdpe = full_row_output_3_mem;//<= changed to =
end else if (write_full_row_3_hdpe)begin
row_addr_3_mem = row_addr_out_hdpe;
col_addr_3_mem = col_addr_out_hdpe;
no_rows_used3_mem = rows_size_reading_A_hdpe;
no_cols_used3_mem = cols_size_reading_B_hdpe;
end
if ((write_full_row_3_hdpe == 1) && (cols_size_reading_B_hdpe >= rows_size_reading_A_hdpe)) begin
read_full_row_or_col3_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default)
end
if (valid_3_mem == 0) begin
valid_mem_input_A_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
valid_mem_input_B_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
end
read_full_row_3_mem = read_full_row_A_hdpe | read_full_row_B_hdpe;
write_full_row_3_mem = write_full_row_3_hdpe;
data_in_3_mem = data_out_hdpe;
full_row_input_3_mem = full_row_output_hdpe;
$display("[HDPE_T_X_Q] in signal assignment...");
end
HDPE_T_X_Q_RESET: begin
$display("[HDPE_T_X_Q_RESET] in signal assignment...");
end
HDPE_T_X_K: begin
if (read_full_row_A_hdpe) begin
row_addr_3_mem = row_addr_A_hdpe;
col_addr_3_mem = col_addr_A_hdpe;
full_row_A_hdpe = full_row_output_3_mem;//<= changed to =
no_rows_used3_mem = rows_size_reading_A_hdpe;
no_cols_used3_mem = cols_size_reading_A_hdpe;
valid_mem_input_A_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
if (valid_3_mem)
full_row_A_hdpe = full_row_output_3_mem;//<= changed to =
end else if (read_full_row_B_hdpe) begin
row_addr_3_mem = row_addr_B_hdpe;
col_addr_3_mem = col_addr_B_hdpe;
full_row_B_hdpe = full_row_output_3_mem;//<= changed to =
no_rows_used3_mem = cols_size_reading_B_hdpe;//because we have transporsed and saved it on the memory
no_cols_used3_mem = rows_size_reading_B_hdpe;
valid_mem_input_B_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
if (valid_3_mem)
full_row_B_hdpe = full_row_output_3_mem;//<= changed to =
end else if (write_full_row_3_hdpe)begin
row_addr_3_mem = row_addr_out_hdpe;
col_addr_3_mem = col_addr_out_hdpe;
no_rows_used3_mem = rows_size_reading_A_hdpe;
no_cols_used3_mem = cols_size_reading_B_hdpe;
end
if ((write_full_row_3_hdpe == 1) && (cols_size_reading_B_hdpe >= rows_size_reading_A_hdpe)) begin
read_full_row_or_col3_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default)
end
if (valid_3_mem == 0) begin
valid_mem_input_A_hdpe = valid_3_mem;
valid_mem_input_B_hdpe = valid_3_mem;
end
read_full_row_3_mem = read_full_row_A_hdpe | read_full_row_B_hdpe;
write_full_row_3_mem = write_full_row_3_hdpe;
data_in_3_mem = data_out_hdpe;
full_row_input_3_mem = full_row_output_hdpe;
$display("[HDPE_T_X_K] in signal assignment...");
end
HDPE_T_X_K_RESET: begin
$display("[HDPE_T_X_K_RESET] in signal assignment...");
end
RPAS_Q : begin
if (read_enable_rpas) begin
row_addr_3_mem = row_addr_out_read_rpas;
col_addr_3_mem = col_addr_out_read_rpas;
read_full_row_or_col3_mem <= 1'b0;//0 to read row wise!!!!!!!!!!!
end else if (write_enable_rpas) begin
row_addr_3_mem = row_addr_out_write_rpas;
col_addr_3_mem = col_addr_out_write_rpas;
read_full_row_or_col3_mem <= 1'b1;//0 to write col wise!!!!!!!!!!11
end
data_in_3_mem = data_input_to_mem_3_rpas;
write_enable_3_mem = write_enable_rpas;
data_input_rows_rpas = full_row_output_3_mem;
read_full_row_3_mem = read_enable_rpas;
read_valid_rpas = valid_3_mem;
no_rows_used3_mem = rows_size_reading_rpas;
no_cols_used3_mem = cols_size_reading_rpas;
$display("[RPAS_Q] in signal assignment...");
end
RPAS_Q_RESET: begin
$display("[RPAS_Q_RESET] in signal assignment...");
end
RPAS_K: begin
if (read_enable_rpas) begin
row_addr_3_mem = row_addr_out_read_rpas;
col_addr_3_mem = col_addr_out_read_rpas;
read_full_row_or_col3_mem <= 1'b0;//0 to read row wise!!!!!!!!!!!
end else if (write_enable_rpas) begin
row_addr_3_mem = row_addr_out_write_rpas;
col_addr_3_mem = col_addr_out_write_rpas;
read_full_row_or_col3_mem <= 1'b1;//0 to write col wise!!!!!!!!!!11
end
data_in_3_mem = data_input_to_mem_3_rpas;
write_enable_3_mem = write_enable_rpas;
data_input_rows_rpas = full_row_output_3_mem;
read_full_row_3_mem = read_enable_rpas;
read_valid_rpas = valid_3_mem;
no_rows_used3_mem = rows_size_reading_rpas;
no_cols_used3_mem = cols_size_reading_rpas;
$display("[RPAS_K] in signal assignment...");
end
RPAS_K_RESET: begin
$display("[RPAS_K_RESET] in signal assignment...");
end
HDPE_Q_X_K: begin
if (read_full_row_A_hdpe) begin
row_addr_3_mem = row_addr_A_hdpe;
col_addr_3_mem = col_addr_A_hdpe;
full_row_A_hdpe = full_row_output_3_mem;//<= changed to =
no_rows_used3_mem = rows_size_reading_A_hdpe;
no_cols_used3_mem = cols_size_reading_A_hdpe;
valid_mem_input_A_hdpe = valid_3_mem;//change this logic !!!!!!!!!!!!!
if (valid_3_mem)
full_row_A_hdpe = full_row_output_3_mem;//<= changed to =
end else if (read_full_row_B_hdpe) begin
row_addr_3_mem = row_addr_B_hdpe;
col_addr_3_mem = col_addr_B_hdpe;
full_row_B_hdpe = full_row_output_3_mem;//<= changed to =
no_rows_used3_mem = cols_size_reading_B_hdpe;//because we have transporsed and saved it on the memory
no_cols_used3_mem = rows_size_reading_B_hdpe;
valid_mem_input_B_hdpe = valid_3_mem;
if (valid_3_mem)
full_row_B_hdpe = full_row_output_3_mem;//<= changed to =
end else if (write_full_row_3_hdpe)begin
row_addr_3_mem = row_addr_out_hdpe;
col_addr_3_mem = col_addr_out_hdpe;
no_rows_used3_mem = rows_size_reading_A_hdpe;
no_cols_used3_mem = cols_size_reading_B_hdpe;
end
if ((write_full_row_3_hdpe == 1) && (cols_size_reading_B_hdpe >= rows_size_reading_A_hdpe)) begin
read_full_row_or_col3_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default)
end
if (valid_3_mem == 0) begin
valid_mem_input_A_hdpe = valid_3_mem;
valid_mem_input_B_hdpe = valid_3_mem;
end
read_full_row_3_mem = read_full_row_A_hdpe | read_full_row_B_hdpe;
write_full_row_3_mem = write_full_row_3_hdpe;
data_in_3_mem = data_out_hdpe;
full_row_input_3_mem = full_row_output_hdpe;
$display("[HDPE_Q_X_K] in signal assignment...");
end
HDPE_Q_X_K_RESET: begin
$display("[HDPE_Q_X_K_RESET] in signal assignment...");
end
MASK_A_Q_KV: begin
read_full_row_3_mem = read_enable_full_row_mask;
read_valid_mask = valid_3_mem;
full_row_output_mask = full_row_output_3_mem;
data_in_3_mem = data_output_mask;
write_enable_3_mem = write_enable_mask;
no_rows_used3_mem = rows_size_reading_mask;
no_cols_used3_mem = cols_size_reading_mask;
if(read_enable_full_row_mask) begin
row_addr_3_mem = row_addr_out_read_mask;
col_addr_3_mem = col_addr_out_read_mask;
end else if (write_enable_mask) begin
// end else begin
row_addr_3_mem = row_addr_out_write_mask;
col_addr_3_mem = col_addr_out_write_mask;
$display("WR_EN=%b, ADDR=(%d,%d), DATA=%h", write_enable_3_mem, row_addr_3_mem, col_addr_3_mem, data_in_3_mem);
end
$display("[MASK_A_Q_KV] in signal assignment...");
end
MASK_A_Q_KV_RESET:begin
$display("[MASK_A_Q_KV_RESET] in signal assignment...");
end
endcase
end
always @(posedge clk or posedge rst) begin
if (rst) begin
current_state <= IDLE;
done_top <= 0;
rst_hdpe <= 1;
rst_rpas <= 1;
rst_mask <= 1;
// done_writing_to_file_top <= 0;
h_count <= 0;
end else begin
case (current_state)
IDLE: begin
done_top <= 0;
rst_hdpe <= 1;
rst_rpas <= 1;
// done_writing_to_file_top <= 0;
h_count <= 0;
$display("[IDLE] Waiting for enable with clk...");
end
RPAS_TOKEN: begin
///////////////RPAS initialization/////////////
rst_rpas <= 0;
enable_rpas <= 1 ;//
rows_start_add_reading_rpas <= 0;//mem 1 token is stored
cols_start_add_reading_rpas <= 0;
rows_start_add_writing_rpas <= 0;
cols_start_add_writing_rpas <= 0;
rows_size_reading_rpas <= 19;//T -> 20x16
cols_size_reading_rpas <= 15;
rows_size_writing_rpas <= 19;//T^ ->20x4
cols_size_writing_rpas <= 3;
///////////////memory initialization///////////
read_full_row_or_col1_mem <= 1'b0;//0 to read row wise
no_cols_used1_mem <= 19;//T -> 20x16
no_rows_used1_mem <= 15;
read_full_row_or_col3_mem <= 1'b1;//0 to write col wise
no_cols_used3_mem <= 19;//T^ ->20x4
no_rows_used3_mem <= 3;
//memory unused signals
read_enable_1_mem <= 0;
write_full_row_1_mem <= 0;
full_row_input_1_mem <= 0;
write_enable_1_mem <= 0;
$display("[RPAS_TOKEN] ");
end
RPAS_TOKEN_RESET: begin
//RPAS UNIT RESETING/////
rst_rpas <= 1;
enable_rpas <= 0;//
$display("[RPAS_TOKEN_RESET] ");
end
RPAS_WEIGTHS: begin
///////////////RPAS initialization/////////////
rst_rpas <= 0;
enable_rpas <= 1 ;//
rows_start_add_reading_rpas <= 0;//mem 2 weights is stored
cols_start_add_reading_rpas <= 0;
rows_start_add_writing_rpas <= 0;
cols_start_add_writing_rpas <= 4;
rows_size_reading_rpas <= 15;//W -> 16x16
cols_size_reading_rpas <= 15;
rows_size_writing_rpas <= 15;//W^ ->16x4
cols_size_writing_rpas <= 3;
///////////////memory initialization///////////
read_full_row_or_col2_mem <= 1'b0;//0 to read row wise
no_cols_used2_mem <= 15;//W -> 16x16
no_rows_used2_mem <= 15;
read_full_row_or_col3_mem <= 1'b1;//0 to write col wise
no_cols_used3_mem <= 15;//W^ ->16x4
no_rows_used3_mem <= 3;
//memory unused signals
read_enable_2_mem <= 0;
write_full_row_2_mem <= 0;
full_row_input_2_mem <= 0;
write_enable_2_mem <= 0;
$display("[RPAS_WEIGTHS] ");
end
RPAS_WEIGTHS_RESET: begin
//RPAS UNIT RESETING/////
rst_rpas <= 1;
enable_rpas <= 0;//
$display("[RPAS_WEIGTHS_RESET] ");
end
HDPE_T_X_Q: begin ///update the logic of the h_count !!!!!!!!!!!!!!
///////////////HDPE initialization/////////////
rst_hdpe <= 0;
enable_hdpe <= 1 ;//
//mode = 2'b01; // input-Stationary mode
// mode = 2'b10; // weight-Stationary mode
mode_hdpe <= 2'b00; // Output-Stationary mode
//inputs to hdpe
rows_start_add_reading_A_hdpe <= 0;
cols_start_add_reading_A_hdpe <= 0;
rows_start_add_reading_B_hdpe <= (0+4*h_count);//h1 of W^Q
cols_start_add_reading_B_hdpe <= 4;
rows_start_add_writing_hdpe <= 0;// this place is common for all h for the time being
cols_start_add_writing_hdpe <= (12+4*h_count);//12 to 21
rows_size_reading_A_hdpe <= 19;//T-> 20X4
cols_size_reading_A_hdpe <= 3;//COMMON_ROW_COL-1
rows_size_reading_B_hdpe <= 3;//COMMON_ROW_COL-1
cols_size_reading_B_hdpe <= 3;// h1 of W^Q-> 4X4
///////////////memory initialization///////////
read_full_row_or_col3_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default)
//memory unused signals
read_enable_2_mem <= 0;
write_full_row_2_mem <= 0;
full_row_input_2_mem <= 0;
write_enable_2_mem <= 0;
read_enable_1_mem <= 0;
write_full_row_1_mem <= 0;
full_row_input_1_mem <= 0;
write_enable_1_mem <= 0;
$display("[HDPE_T_X_Q] ");
end
HDPE_T_X_Q_RESET: begin
//HDPE UNIT RESETING/////
rst_hdpe <= 1;
enable_hdpe <= 0;//
$display("[HDPE_T_X_Q_RESET] ");
end
HDPE_T_X_K: begin ///update the logic of the h_count !!!!!!!!!!!!!!
///////////////HDPE initialization/////////////
rst_hdpe <= 0;
enable_hdpe <= 1 ;//
//mode = 2'b01; // input-Stationary mode
// mode = 2'b10; // weight-Stationary mode
mode_hdpe <= 2'b00; // Output-Stationary mode
//inputs to hdpe
rows_start_add_reading_A_hdpe <= 0;
cols_start_add_reading_A_hdpe <= 0;
rows_start_add_reading_B_hdpe <= (8+4*h_count);
cols_start_add_reading_B_hdpe <= 4;//h1 of W^K
rows_start_add_writing_hdpe <= 0;//h1 of W^K
cols_start_add_writing_hdpe <= (22+4*h_count);//22 to 25
rows_size_reading_A_hdpe <= 19;//T-> 20X4
cols_size_reading_A_hdpe <= 3;//COMMON_ROW_COL-1
rows_size_reading_B_hdpe <= 3;//COMMON_ROW_COL-1
cols_size_reading_B_hdpe <= 3;// h1 of W^K-> 4X4
///////////////memory initialization///////////
read_full_row_or_col3_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default)
//memory unused signals
read_enable_2_mem <= 0;
write_full_row_2_mem <= 0;
full_row_input_2_mem <= 0;
// data_out_2_mem <= 0;
write_enable_2_mem <= 0;
read_enable_1_mem <= 0;
write_full_row_1_mem <= 0;
full_row_input_1_mem <= 0;
// data_out_1_mem <= 0;
write_enable_1_mem <= 0;
$display("[HDPE_T_X_K] ");
end
HDPE_T_X_K_RESET: begin
//HDPE UNIT RESETING/////
rst_hdpe <= 1;
enable_hdpe <= 0;//
$display("[HDPE_T_X_K_RESET] ");
end
RPAS_Q: begin
///////////////RPAS initialization/////////////
rst_rpas <= 0;
enable_rpas <= 1 ;//
rows_start_add_reading_rpas <= 0;//mem 3 Q is stored
cols_start_add_reading_rpas <= (12+4*h_count);//12 to 21
rows_start_add_writing_rpas <= 0;
cols_start_add_writing_rpas <= (32+4*h_count);//32 to 41
rows_size_reading_rpas <= 19;//Q^ -> 20x4
cols_size_reading_rpas <= 3;
rows_size_writing_rpas <= 19;//Q^- ->20x4
cols_size_writing_rpas <= 3;
///////////////memory initialization///////////
no_cols_used3_mem <= 15;//Q^- ->20x4
no_rows_used3_mem <= 3;
//memory unused signals
read_enable_3_mem <= 0;
write_full_row_3_mem <= 0;
full_row_input_3_mem <= 0;
write_enable_3_mem <= 0;
$display("[RPAS_Q] ");
end
RPAS_Q_RESET: begin
//RPAS UNIT RESETING/////
rst_rpas <= 1;
enable_rpas <= 0;//
$display("[RPAS_Q_RESET] ");
end
RPAS_K: begin
///////////////RPAS initialization/////////////
rst_rpas <= 0;
enable_rpas <= 1 ;//
rows_start_add_reading_rpas <= 0;//mem 3 W is stored
cols_start_add_reading_rpas <= (22+4*h_count);//22 to 31
rows_start_add_writing_rpas <= 0;
cols_start_add_writing_rpas <= (42+4*h_count);//42 to 51
rows_size_reading_rpas <= 19;//W^ -> 20x4
cols_size_reading_rpas <= 3;
rows_size_writing_rpas <= 19;//W^- ->20x4
cols_size_writing_rpas <= 3;
///////////////memory initialization///////////
no_cols_used3_mem <= 15;//W^- ->20x4
no_rows_used3_mem <= 3;
//memory unused signals
read_enable_3_mem <= 0;
write_full_row_3_mem <= 0;
full_row_input_3_mem <= 0;
write_enable_3_mem <= 0;
$display("[RPAS_K] ");
end
RPAS_K_RESET: begin
//RPAS UNIT RESETING/////
rst_rpas <= 1;
enable_rpas <= 0;//
$display("[RPAS_K_RESET] ");
end
HDPE_Q_X_K: begin ///update the logic of the h_count !!!!!!!!!!!!!!
///////////////HDPE initialization/////////////
rst_hdpe <= 0;
enable_hdpe <= 1 ;//
//mode = 2'b01; // input-Stationary mode
// mode = 2'b10; // weight-Stationary mode
mode_hdpe <= 2'b00; // Output-Stationary mode
//inputs to hdpe
rows_start_add_reading_A_hdpe <= 0; //Q^- -20X4
cols_start_add_reading_A_hdpe <= (32+4*h_count);//32 to 41
rows_start_add_reading_B_hdpe <= 0;//K^- -20X4
cols_start_add_reading_B_hdpe <= (42+4*h_count);//42 to 51
rows_start_add_writing_hdpe <= 0;// this place is common for all h for the time being
cols_start_add_writing_hdpe <= (52+20*h_count);//52 to 71 & 72 to 91
rows_size_reading_A_hdpe <= 19;//Q^- -> 20X4
cols_size_reading_A_hdpe <= 3;//COMMON_ROW_COL-1
rows_size_reading_B_hdpe <= 3;//COMMON_ROW_COL-1
cols_size_reading_B_hdpe <= 19;// h1 of K^- -> 4X4
///////////////memory initialization///////////
read_full_row_or_col3_mem <= 1'b1;//working1'b1; ////0 to read row wise & 1 to read col wise (default)
//memory unused signals
read_enable_2_mem <= 0;
write_full_row_2_mem <= 0;
full_row_input_2_mem <= 0;
write_enable_2_mem <= 0;
read_enable_1_mem <= 0;
write_full_row_1_mem <= 0;
full_row_input_1_mem <= 0;
write_enable_1_mem <= 0;
$display("[HDPE_Q_X_K] ");
end
HDPE_Q_X_K_RESET: begin
//HDPE UNIT RESETING/////
rst_hdpe <= 1;
enable_hdpe <= 0;//
$display("[HDPE_Q_X_K_RESET] ");
end
MASK_A_Q_KV: begin
read_full_row_or_col3_mem = 0; // row-wise
enable_mask = 1;
rst_mask = 0;
// Configure parameters
rows_start_add_reading_mask <= 0;
cols_start_add_reading_mask <= (52+20*h_count);
rows_start_add_writing_mask <= 0;
cols_start_add_writing_mask <= (92+4*h_count);
rows_size_reading_mask <= 19;//A-> 20x20
cols_size_reading_mask <= 19;
rows_size_writing_mask <= 19; //mask 20X4
cols_size_writing_mask <= 4;
$display("[MASK_A_Q_KV] ");
end
MASK_A_Q_KV_RESET: begin
//HDPE UNIT RESETING/////
rst_mask <= 1;
enable_mask <= 0;//
$display("[MASK_A_Q_KV_RESET] ");
end
UPDATE_H: begin
h_count <= h_count + 1;
$display("[UPDATE__H]At time %0t: h_count = %d", $time, h_count);
$display("[UPDATE_H] updating the value of the h");
end
DONE: begin
done_top <= 1;
$display("[DONE] Matrix multiplication completed.");
end
default: begin
done_top <= 0;
$display("[DEFAULT] Default option");
end
endcase
end
end
endmodule