Add initial code from old version of SbS core
This commit is contained in:
parent
768d16789e
commit
a365b9b01d
20 changed files with 1332 additions and 0 deletions
57
hw/beh/hu.vhd
Normal file
57
hw/beh/hu.vhd
Normal file
|
@ -0,0 +1,57 @@
|
|||
-- hu
|
||||
-- Update H using stream of weights
|
||||
|
||||
use work.pkg_sbs.all;
|
||||
|
||||
entity hu is
|
||||
|
||||
port (
|
||||
clk, rstn : in bit;
|
||||
cfg_hu : in bit_vector(BW_HU_CFG -1 downto 0); -- Config
|
||||
ena_w : in bit; -- New weight
|
||||
is_ini : in bit; -- First vector (get w and h when ena)
|
||||
is_fst : in bit; -- Fist component in vector
|
||||
ena_ho : out bit; -- Signal a valid ho value
|
||||
wi : in real; -- stream of weights
|
||||
hi : in real; -- stream of state
|
||||
ho : out real); -- stream of states
|
||||
|
||||
end entity hu;
|
||||
|
||||
|
||||
|
||||
architecture rtlf of hu is
|
||||
|
||||
signal ctr_hu : bit_vector(BW_HU_CTR-1 downto 0);
|
||||
signal loc_h : bit_vector(ADDR_H_MAX-1 downto 0);
|
||||
signal eps : real;
|
||||
|
||||
begin -- architecture rtlf
|
||||
|
||||
i_hu_dp: entity work.hu_dp
|
||||
port map (
|
||||
clk => clk,
|
||||
rstn => rstn,
|
||||
eps => eps,
|
||||
ctr_hu => ctr_hu,
|
||||
loc_h => loc_h,
|
||||
wi => wi,
|
||||
hi => hi,
|
||||
ho => ho);
|
||||
|
||||
|
||||
i_hu_ctr: entity work.hu_ctr
|
||||
port map (
|
||||
clk => clk,
|
||||
rstn => rstn,
|
||||
eps => eps,
|
||||
cfg_hu => cfg_hu,
|
||||
loc_h => loc_h,
|
||||
ena_w => ena_w,
|
||||
is_ini => is_ini,
|
||||
is_fst => is_fst,
|
||||
ena_ho => ena_ho,
|
||||
ctr_hu => ctr_hu);
|
||||
|
||||
|
||||
end architecture rtlf;
|
72
hw/beh/hu_ctr.vhd
Normal file
72
hw/beh/hu_ctr.vhd
Normal file
|
@ -0,0 +1,72 @@
|
|||
-- hu_ctr
|
||||
-- Control path for Update H using stream of weights
|
||||
|
||||
use work.pkg_sbs.all;
|
||||
|
||||
entity hu_ctr is
|
||||
port (
|
||||
clk, rstn : in bit;
|
||||
cfg_hu : in bit_vector(BW_HU_CFG -1 downto 0); -- Config
|
||||
ena_w : in bit; -- New weight
|
||||
is_ini : in bit; -- First vector (get w and h when ena)
|
||||
is_fst : in bit; -- Fist component in vector
|
||||
loc_h : in bit_vector(ADDR_H_MAX-1 downto 0); -- Current location in H
|
||||
ena_ho : out bit; -- Signal a valid ho value
|
||||
eps : out real;
|
||||
ctr_hu : out bit_vector(BW_HU_CTR-1 downto 0)); -- Control for data path
|
||||
|
||||
end entity hu_ctr;
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_bit.all;
|
||||
|
||||
architecture beh of hu_ctr is
|
||||
|
||||
signal ctr_sel_ini, ctr_sum_ini, ctr_update_sum, ctr_update_sum2 : bit;
|
||||
signal ctr_addr_rst, ctr_addr_inc, ctr_write_hw : bit;
|
||||
signal ctr_wr_hw, ctr_wr_hp : bit;
|
||||
|
||||
-- Number of elements in H (currently fixed)
|
||||
constant MAX_LOC_H : bit_vector(ADDR_H_MAX-1 downto 0) := bit_vector(to_unsigned(8, ADDR_H_MAX));
|
||||
|
||||
--constant T : time := 10 ns;
|
||||
|
||||
begin -- architecture beh
|
||||
|
||||
eps <= 0.2;
|
||||
|
||||
ctr_hu(0) <= ctr_sel_ini ;
|
||||
ctr_hu(1) <= ctr_sum_ini ;
|
||||
ctr_hu(2) <= ctr_update_sum ;
|
||||
ctr_hu(3) <= ctr_addr_rst ;
|
||||
ctr_hu(4) <= ctr_addr_inc ;
|
||||
ctr_hu(5) <= ctr_write_hw ;
|
||||
ctr_hu(6) <= ctr_wr_hw ;
|
||||
--ctr_hu(7) <= ctr_wr_hp ; -- ctr_wr_hp and ctr_wr_hw are the same
|
||||
ctr_hu(7) <= ctr_update_sum2 ;
|
||||
|
||||
-- Code in first approximation
|
||||
ctr_sel_ini <= is_ini;
|
||||
ctr_wr_hp <= ena_w; --is_ini;
|
||||
ctr_wr_hw <= ena_w;
|
||||
ctr_sum_ini <= is_fst;
|
||||
ctr_update_sum <= transport is_fst after 7*T ;
|
||||
--ctr_update_sum2 <= transport ctr_update_sum after T;
|
||||
--ctr_update_sum <= '1' when (loc_h = MAX_LOC_H) else '0';
|
||||
|
||||
ctr_addr_rst <= ctr_update_sum;
|
||||
ctr_addr_inc <= ena_w and not ctr_addr_rst;
|
||||
|
||||
ena_ho <= ena_w and not is_ini;
|
||||
|
||||
rg: process (clk, rstn) is
|
||||
begin
|
||||
if rstn = '0' then -- asynchronous reset (active low)
|
||||
ctr_update_sum2 <= '0';
|
||||
elsif clk'event and clk = '1' then -- rising clock edge
|
||||
ctr_update_sum2 <= ctr_update_sum;
|
||||
end if;
|
||||
end process rg;
|
||||
|
||||
|
||||
end architecture beh;
|
124
hw/beh/hu_dp.vhd
Normal file
124
hw/beh/hu_dp.vhd
Normal file
|
@ -0,0 +1,124 @@
|
|||
-- hu_dp
|
||||
-- Data path for Update H using stream of weights
|
||||
|
||||
use work.pkg_sbs.all;
|
||||
|
||||
entity hu_dp is
|
||||
port (
|
||||
clk, rstn : in bit;
|
||||
ctr_hu : in bit_vector(BW_HU_CTR-1 downto 0); -- Control for data path
|
||||
loc_h : out bit_vector(ADDR_H_MAX-1 downto 0); -- Current location in H
|
||||
eps : in real;
|
||||
wi : in real; -- stream of weights
|
||||
hi : in real; -- stream of state
|
||||
ho : out real); -- stream of states
|
||||
|
||||
end entity hu_dp;
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_bit.all;
|
||||
|
||||
architecture rtlf of hu_dp is
|
||||
-- Memory
|
||||
signal mem_hp : array_as_h; -- State (internal)
|
||||
signal mem_hw : array_as_h; -- Copy of w*h
|
||||
signal addr_wr, addr_nxt : bit_vector(ADDR_H_MAX-1 downto 0); -- Address
|
||||
|
||||
-- Data path for hp (i.t. h un-normalized) and hw (hp*w)
|
||||
signal hp_new, hp_new_rg, hp_p, h_eff : real := 0.0;
|
||||
signal hw_p, hw_nxt : real := 0.0;
|
||||
|
||||
-- Accumulators for normalization
|
||||
signal sum_hw, sum_hw_nxt : real := 0.0; -- Running sum hw
|
||||
signal sum_hw_p, sum_hw_p_nxt : real := 0.0; -- Saved sum hw of previous
|
||||
signal sum_hp, sum_hp_nxt : real := 0.0; -- Running sum hp
|
||||
signal sum_hp_p, sum_hp_p_nxt : real := 0.0; -- Saved sum hw of previous (normalization)
|
||||
|
||||
-- Control signals
|
||||
signal ctr_sel_ini, ctr_sum_ini, ctr_update_sum, ctr_update_sum2 : bit;
|
||||
signal ctr_addr_rst, ctr_addr_inc, ctr_write_hw : bit;
|
||||
signal ctr_wr_hw, ctr_wr_hp : bit;
|
||||
|
||||
begin -- architecture rtlf
|
||||
|
||||
-- Get control signals
|
||||
ctr_sel_ini <= ctr_hu(0);
|
||||
ctr_sum_ini <= ctr_hu(1);
|
||||
ctr_update_sum <= ctr_hu(2);
|
||||
ctr_addr_rst <= ctr_hu(3);
|
||||
ctr_addr_inc <= ctr_hu(4);
|
||||
ctr_write_hw <= ctr_hu(5);
|
||||
ctr_wr_hw <= ctr_hu(6);
|
||||
ctr_wr_hp <= ctr_hu(6);
|
||||
ctr_update_sum2 <= ctr_hu(7);
|
||||
|
||||
|
||||
-- Main calculation
|
||||
hp_new <= hp_p * sum_hw_p + sum_hp_p * hw_p ;
|
||||
|
||||
-- Mux to select first h or saved one
|
||||
h_eff <= hi when ctr_sel_ini='1' else hp_new_rg;
|
||||
|
||||
-- Calculate hw
|
||||
hw_nxt <= h_eff * wi ;
|
||||
|
||||
-- Output h (note latency of a complete group)
|
||||
ho <= h_eff;
|
||||
|
||||
-- Accumulate hw and hp
|
||||
sum_hw_nxt <= hw_nxt when ctr_sum_ini='1' else sum_hw + hw_nxt;
|
||||
sum_hw_p_nxt <= 0.0 when ctr_update_sum='1' else
|
||||
sum_hw when ctr_update_sum2='1' else sum_hw_p;
|
||||
|
||||
sum_hp_nxt <= h_eff when ctr_sum_ini='1' else sum_hp + h_eff; -- Accumulate h
|
||||
--sum_hp_p_nxt <= sum_hp_nxt * eps when ctr_update_sum='1' else sum_hp_p;
|
||||
sum_hp_p_nxt <= eps when ctr_update_sum='1' else
|
||||
hp_new when ctr_update_sum2='1' else sum_hp_p;
|
||||
|
||||
|
||||
-- Read from memory
|
||||
--hw_p <= mem_hw(to_integer(unsigned(addr_nxt)));
|
||||
hw_p <= mem_hw(to_integer(unsigned(addr_nxt)))
|
||||
when ctr_update_sum2='0' else sum_hp; -- Put sum_hp in mult
|
||||
hp_p <= mem_hp(to_integer(unsigned(addr_nxt)));
|
||||
|
||||
-- Address calculation
|
||||
addr_nxt <= (others => '0') when ctr_addr_rst='1' else
|
||||
bit_vector(unsigned(addr_wr) + 1) when ctr_addr_inc='1' else addr_wr;
|
||||
loc_h <= addr_wr; -- Output for ctrl path
|
||||
|
||||
-- Registers
|
||||
rg: process (clk, rstn) is
|
||||
begin -- process pipe1
|
||||
if rstn = '0' then
|
||||
hp_new_rg <= 0.0;
|
||||
sum_hw <= 0.0;
|
||||
sum_hw_p <= 0.0;
|
||||
sum_hp <= 0.0;
|
||||
sum_hp_p <= 0.0;
|
||||
addr_wr <= (others => '0');
|
||||
elsif clk'event and clk = '1' then
|
||||
hp_new_rg <= hp_new;
|
||||
sum_hw <= sum_hw_nxt;
|
||||
sum_hw_p <= sum_hw_p_nxt;
|
||||
sum_hp <= sum_hp_nxt;
|
||||
sum_hp_p <= sum_hp_p_nxt;
|
||||
addr_wr <= addr_nxt;
|
||||
end if;
|
||||
end process rg;
|
||||
|
||||
|
||||
-- Memory
|
||||
mem: process (clk) is
|
||||
begin -- process mem
|
||||
if clk'event and clk = '1' then -- rising clock edge
|
||||
if ctr_wr_hw='1' then
|
||||
mem_hw(to_integer(unsigned(addr_wr))) <= hw_nxt;
|
||||
end if;
|
||||
if ctr_wr_hp='1' then
|
||||
mem_hp(to_integer(unsigned(addr_wr))) <= h_eff;
|
||||
end if;
|
||||
end if;
|
||||
end process mem;
|
||||
|
||||
end architecture rtlf;
|
49
hw/beh/mem_sync.vhd
Normal file
49
hw/beh/mem_sync.vhd
Normal file
|
@ -0,0 +1,49 @@
|
|||
-- Implementation of a synchronous single port memory
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_bit.all;
|
||||
|
||||
|
||||
entity mem_sync is
|
||||
generic(
|
||||
BA : natural := 7); -- log2 addresses
|
||||
port(
|
||||
clk : in bit;
|
||||
wr, rd : in bit;
|
||||
addr : in bit_vector(BA-1 downto 0);
|
||||
dti : in real;
|
||||
dto : out real);
|
||||
end entity mem_sync;
|
||||
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_bit.all;
|
||||
|
||||
|
||||
architecture beh of mem_sync is
|
||||
|
||||
signal addr_rg : unsigned(BA-1 downto 0);
|
||||
|
||||
begin -- architecture beh
|
||||
|
||||
mem: process (clk) is
|
||||
constant mem_size : natural := 2**(addr'length);
|
||||
type mem_ty is array (0 to mem_size-1) of real;
|
||||
variable w_mem : mem_ty;
|
||||
begin -- process mem
|
||||
if clk'event and clk = '1' then -- rising clock edge
|
||||
addr_rg <= unsigned(addr);
|
||||
if wr='1' then
|
||||
w_mem(to_integer(addr_rg)) := dti;
|
||||
end if;
|
||||
if rd='1' then
|
||||
dto <= w_mem(to_integer(addr_rg));
|
||||
end if;
|
||||
end if;
|
||||
end process mem;
|
||||
|
||||
end architecture beh;
|
||||
|
||||
-- Local Variables:
|
||||
-- compile-command: "ghdl -a --std=00 --workdir=../do_sim/ mem_sync.vhd"
|
||||
-- End:
|
15
hw/beh/pkg_sbs.vhd
Normal file
15
hw/beh/pkg_sbs.vhd
Normal file
|
@ -0,0 +1,15 @@
|
|||
package pkg_sbs is
|
||||
|
||||
constant BW_HU_CTR : natural := 10; -- bits for control
|
||||
constant BW_HU_CFG : natural := 15; -- bits for configuration
|
||||
|
||||
constant N_H_MAX : natural := 8; -- Max size of H
|
||||
constant ADDR_H_MAX : natural := 3; -- log2 of N_H_MAX; it is size of addr
|
||||
-- bus
|
||||
|
||||
--subtype hu_ctr is bit_vector 4 downto 0;
|
||||
|
||||
-- Array of reals with max size of H
|
||||
type array_as_h is array (N_H_MAX-1 downto 0) of real;
|
||||
|
||||
end package pkg_sbs;
|
99
hw/beh/pkg_ufp.vhd
Normal file
99
hw/beh/pkg_ufp.vhd
Normal file
|
@ -0,0 +1,99 @@
|
|||
-- Library of functions to work with unsigned FP numbers
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
|
||||
package pkg_ufp is
|
||||
-- Format
|
||||
-- [ ee ][ mm ]
|
||||
-- with total BW bits, and BE exponent bits and offset of exponent EO
|
||||
-- b(ee) in EO-[0..2**BE-1]
|
||||
-- b(mm) in [0..2**(BW-BE)-1]/2**(BW-BE)
|
||||
|
||||
-- Convert a number if unsigned floating point to real
|
||||
function ufp_to_real (
|
||||
ee_mm : std_logic_vector; -- Data in format exponent_mantissa as bits
|
||||
BW : natural; -- Bit width
|
||||
BE : natural; -- Number of bits used for exponent
|
||||
EO : natural) -- Offset of exponent
|
||||
return real;
|
||||
|
||||
-- Convert a real number to unsigned floating point
|
||||
function real_to_ufp (
|
||||
r : real; -- Real number to convert
|
||||
BW : natural; -- Bit width
|
||||
BE : natural; -- Number of bits used for exponent
|
||||
EO : natural) -- Offset of exponent
|
||||
return std_logic_vector;
|
||||
|
||||
|
||||
|
||||
end package pkg_ufp;
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_std.all;
|
||||
use ieee.math_real.all;
|
||||
|
||||
package body pkg_ufp is
|
||||
|
||||
function ufp_to_real (
|
||||
ee_mm : std_logic_vector; -- Data in format exponent_mantissa as bits
|
||||
BW : natural; -- Bit width
|
||||
BE : natural; -- Number of bits used for exponent
|
||||
EO : natural) -- Offset of exponent
|
||||
return real
|
||||
is
|
||||
variable mm : unsigned(BW-BE-1 downto 0);
|
||||
variable ee : unsigned(BE-1 downto 0);
|
||||
variable d : real;
|
||||
begin
|
||||
ee := unsigned(ee_mm(BW-1 downto BW-BE));
|
||||
mm := unsigned(ee_mm(BW-BE-1 downto 0));
|
||||
d := real(to_integer(mm)) * 2.0**real(EO-to_integer(ee)-(BW-BE));
|
||||
return d;
|
||||
end function ufp_to_real;
|
||||
|
||||
|
||||
function real_to_ufp (
|
||||
r : real; -- Real number to convert
|
||||
BW : natural; -- Bit width
|
||||
BE : natural; -- Number of bits used for exponent
|
||||
EO : natural) -- Offset of exponent
|
||||
return std_logic_vector
|
||||
is
|
||||
variable BO, MAX_EXP, BM : integer;
|
||||
variable R_MIN, BM_LIM : real;
|
||||
variable mm : unsigned(BW-BE-1 downto 0);
|
||||
variable ee : unsigned(BE-1 downto 0);
|
||||
variable aa : integer; -- Scaling to normalize r into ufp representation
|
||||
variable mm_id : integer;
|
||||
begin
|
||||
BO := BW-BE-EO; -- Exponent of Scaling factor
|
||||
BM := BW - BE; -- Bits for mantissa
|
||||
BM_lim := log2(2.0**BM-1.0); -- A bit less than BM
|
||||
MAX_EXP := 2**BE-1; -- Max exponent
|
||||
R_MIN := 2.0**(-MAX_EXP-BO); -- Min value (not equal zero)
|
||||
if r<R_MIN then -- If too small, set to zero
|
||||
ee := (others => '1');
|
||||
mm := (others => '0');
|
||||
else
|
||||
--aa := BM - integer(ceil(log2(r)));
|
||||
aa := integer(floor(BM_LIM - log2(r)));
|
||||
if aa < BO then
|
||||
aa := BO;
|
||||
end if;
|
||||
if aa > MAX_EXP+BO then
|
||||
aa := MAX_EXP+BO;
|
||||
end if;
|
||||
--report "[TST] aa=" & integer'image(aa) severity note;
|
||||
--report "[TST] rr=" & real'image( r * 2.0**aa ) severity note;
|
||||
|
||||
mm_id := integer(round(r * 2.0**aa));
|
||||
--mm_id := integer(floor(r * 2.0**aa)); -- ????
|
||||
ee := to_unsigned(aa-BO, ee'length);
|
||||
mm := to_unsigned(mm_id, mm'length);
|
||||
end if;
|
||||
return std_logic_vector(ee) & std_logic_vector(mm);
|
||||
end function real_to_ufp;
|
||||
|
||||
|
||||
end package body pkg_ufp;
|
126
hw/beh/tst_hu.vhd
Normal file
126
hw/beh/tst_hu.vhd
Normal file
|
@ -0,0 +1,126 @@
|
|||
-- tst_hu
|
||||
-- Testbench for Update H using stream of weights
|
||||
|
||||
use work.pkg_sbs.all;
|
||||
|
||||
entity tst_hu is
|
||||
|
||||
end entity tst_hu;
|
||||
|
||||
|
||||
architecture tst of tst_hu is
|
||||
|
||||
constant T : time := 10 ns; -- Period
|
||||
|
||||
signal clk, rstn : bit := '0';
|
||||
signal cfg_hu : bit_vector(BW_HU_CFG -1 downto 0);
|
||||
signal ena_w : bit;
|
||||
signal is_ini : bit;
|
||||
signal is_fst : bit;
|
||||
signal ena_ho : bit;
|
||||
signal wi : real := 0.0;
|
||||
signal hi : real := 0.0;
|
||||
signal ho : real;
|
||||
|
||||
begin -- architecture tst
|
||||
|
||||
clk <= not clk after T/2;
|
||||
rstn <= '0', '1' after T/2+T/4;
|
||||
|
||||
i_hu: entity work.hu
|
||||
port map (
|
||||
clk => clk,
|
||||
rstn => rstn,
|
||||
cfg_hu => cfg_hu,
|
||||
ena_w => ena_w,
|
||||
is_ini => is_ini,
|
||||
is_fst => is_fst,
|
||||
ena_ho => ena_ho,
|
||||
wi => wi,
|
||||
hi => hi,
|
||||
ho => ho);
|
||||
|
||||
process (clk) is
|
||||
type array_sol is array (natural range <>) of real;
|
||||
|
||||
-- Example of solution from python
|
||||
constant h_sol : array_sol := (
|
||||
--0.1 , 0.2 , 0.3 , 0.0 , 0.01, 0.01, 0.1 , 0.28,
|
||||
0.01628 , 0.02056 , 0.03144 , 0.0 , 0.001228, 0.001588, 0.01228 , 0.039984,
|
||||
1.28343706e-04, 1.62085171e-04, 3.17669760e-04, 0.00000000e+00, 1.24077120e-05, 1.99630656e-05, 1.84671552e-04, 3.05349811e-04,
|
||||
2.17637063e-08, 2.74853687e-08, 5.38684101e-08, 0.00000000e+00, 2.10402060e-09, 3.38520923e-09, 3.13154230e-08, 5.17792718e-08
|
||||
);
|
||||
|
||||
variable idx : natural;
|
||||
|
||||
begin -- process
|
||||
if clk'event and clk = '1' then -- rising clock edge
|
||||
if ena_ho='1' then
|
||||
if idx<h_sol'length-1 then
|
||||
if abs(ho-h_sol(idx)) > 1.0e-09 then
|
||||
report LF & ESC & "[31;1m [ERROR] h_sol= " & real'image(h_sol(idx)) & ESC & "[0m" & LF severity error;
|
||||
end if;
|
||||
idx := idx+1;
|
||||
end if;
|
||||
report LF & "[INFO] h_exp= " & real'image(ho) & LF severity note;
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
|
||||
process is
|
||||
constant h : array_as_h := (0.1, 0.2, 0.3, 0.0, 0.01, 0.01, 0.1, 0.28);
|
||||
begin -- process
|
||||
hi <= 0.0;
|
||||
is_ini <= '0';
|
||||
wait for T/4 + T/2 + T;
|
||||
|
||||
for n in 0 to 1 loop
|
||||
for ki in h'range loop
|
||||
hi <= h(ki);
|
||||
is_ini <= '1';
|
||||
wait for T;
|
||||
end loop; -- ki
|
||||
is_ini <= '0';
|
||||
hi <= 0.0;
|
||||
--wait for T*(h'length+1)*3; -- note +1 for void cycle
|
||||
wait for T*(2+(h'length+2)*3); -- note +2 for void cycle
|
||||
end loop; -- n
|
||||
wait;
|
||||
end process;
|
||||
|
||||
|
||||
process is
|
||||
constant w0 : array_as_h := (0.3, 0.0, 0.01, 0.01, 0.1, 0.28, 0.1, 0.2);
|
||||
constant w1 : array_as_h := (0.01, 0.01, 0.1, 0.28, 0.1, 0.2, 0.3, 0.0);
|
||||
constant w2 : array_as_h := (0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125);
|
||||
|
||||
type array_w is array (0 to 2) of array_as_h;
|
||||
constant w : array_w := (w0, w1, w2);
|
||||
|
||||
begin -- process
|
||||
ena_w <= '0';
|
||||
wi <= 0.0;
|
||||
wait for T/4 + T/2 + T;
|
||||
|
||||
for n in 0 to 1 loop
|
||||
for kj in w'range loop
|
||||
is_fst <= '1', '0' after T;
|
||||
for ki in w0'range loop
|
||||
ena_w <= '1';
|
||||
wi <= w(kj)(ki);
|
||||
wait for T;
|
||||
end loop; -- ki
|
||||
ena_w <= '0'; -- void cycle
|
||||
wait for 2*T; --
|
||||
end loop; -- kj
|
||||
end loop; -- n
|
||||
|
||||
ena_w <= '0';
|
||||
wait for 4*T;
|
||||
report LF & LF & ESC & "[35;1m [TST] End simulation" & ESC & "[0m" & LF severity failure;
|
||||
|
||||
end process;
|
||||
|
||||
|
||||
end architecture tst;
|
136
hw/beh/tst_pkg_ufp.vhd
Normal file
136
hw/beh/tst_pkg_ufp.vhd
Normal file
|
@ -0,0 +1,136 @@
|
|||
use work.pkg_ufp.all;
|
||||
|
||||
entity tst_pkg_ufp is
|
||||
|
||||
end entity tst_pkg_ufp;
|
||||
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
architecture tst of tst_pkg_ufp is
|
||||
|
||||
|
||||
|
||||
begin -- architecture tst
|
||||
|
||||
process
|
||||
-- Define params of ufp
|
||||
constant BW : natural := 10;
|
||||
constant BE : natural := 3;
|
||||
constant EO : natural := 1;
|
||||
|
||||
constant BO : natural := BW - BE -EO;
|
||||
|
||||
variable r, r2 : real;
|
||||
variable ee_mm : std_logic_vector(BW-1 downto 0);
|
||||
variable mm : std_logic_vector(BW-1-BE downto 0);
|
||||
variable ee : std_logic_vector(BE-1 downto 0);
|
||||
|
||||
constant r_ini : real := 0.0125;
|
||||
constant r_inc : real := 0.0125/4.0;
|
||||
constant N : natural := 10;
|
||||
|
||||
procedure print (
|
||||
ar : in real;
|
||||
aee : in std_logic_vector(BE-1 downto 0);
|
||||
amm : in std_logic_vector(BW-1-BE downto 0)) is
|
||||
begin
|
||||
report LF & "[TST] " &
|
||||
"r=" & real'image(ar) & HT & HT &
|
||||
"ufp= " & integer'image(to_integer(unsigned(amm))) &
|
||||
" *2**(-" & integer'image(BO) & "- " & integer'image(to_integer(unsigned(aee))) & " )"
|
||||
severity note;
|
||||
end procedure;
|
||||
|
||||
|
||||
procedure tst_conv_r (
|
||||
aee : in std_logic_vector(BE-1 downto 0);
|
||||
amm : in std_logic_vector(BW-1-BE downto 0)) is
|
||||
variable ar : real;
|
||||
variable aee_mm : std_logic_vector(BW-1 downto 0);
|
||||
begin -- procedure tst_conv_r
|
||||
aee_mm := aee & amm;
|
||||
ar := ufp_to_real(aee_mm, BW, BE, EO);
|
||||
print(ar, aee, amm);
|
||||
|
||||
aee_mm := real_to_ufp(ar, BW, BE, EO);
|
||||
print(ar, aee, amm);
|
||||
|
||||
report LF & "[TST] ----------------------------------" severity note;
|
||||
|
||||
end procedure tst_conv_r;
|
||||
|
||||
constant ee_0min : std_logic_vector(BE-1 downto 0) := (others => '0');
|
||||
constant ee_1min : std_logic_vector(BE-1 downto 0) := (0 => '1', others => '0');
|
||||
constant ee_2min : std_logic_vector(BE-1 downto 0) := (1 => '1', others => '0');
|
||||
constant ee_0max : std_logic_vector(BE-1 downto 0) := (others => '1');
|
||||
constant ee_1max : std_logic_vector(BE-1 downto 0) := (0 => '0', others => '1');
|
||||
constant ee_2max : std_logic_vector(BE-1 downto 0) := (1 => '0', others => '1');
|
||||
|
||||
constant mm_0min : std_logic_vector(BW-1-BE downto 0) := (others => '0');
|
||||
constant mm_1min : std_logic_vector(BW-1-BE downto 0) := (0 => '1', others => '0');
|
||||
constant mm_2min : std_logic_vector(BW-1-BE downto 0) := (1 => '1', others => '0');
|
||||
constant mm_0max : std_logic_vector(BW-1-BE downto 0) := (others => '1');
|
||||
constant mm_1max : std_logic_vector(BW-1-BE downto 0) := (0 => '0', others => '1');
|
||||
constant mm_2max : std_logic_vector(BW-1-BE downto 0) := (1 => '0', others => '1');
|
||||
|
||||
begin
|
||||
if true then
|
||||
report LF & "[TST] Test corner examples =============================" severity note;
|
||||
|
||||
-- Conversion from ee_mm to real
|
||||
tst_conv_r(ee_0max, mm_0min);
|
||||
tst_conv_r(ee_0max, mm_1min);
|
||||
tst_conv_r(ee_0max, mm_2min);
|
||||
tst_conv_r(ee_0max, mm_2max);
|
||||
tst_conv_r(ee_0max, mm_1max);
|
||||
tst_conv_r(ee_0max, mm_0max);
|
||||
|
||||
tst_conv_r(ee_1max, mm_0min);
|
||||
tst_conv_r(ee_1max, mm_1min);
|
||||
tst_conv_r(ee_1max, mm_2min);
|
||||
tst_conv_r(ee_1max, mm_2max);
|
||||
tst_conv_r(ee_1max, mm_1max);
|
||||
tst_conv_r(ee_1max, mm_0max);
|
||||
|
||||
tst_conv_r(ee_1min, mm_0min);
|
||||
tst_conv_r(ee_1min, mm_1min);
|
||||
tst_conv_r(ee_1min, mm_2min);
|
||||
tst_conv_r(ee_1min, mm_2max);
|
||||
tst_conv_r(ee_1min, mm_1max);
|
||||
tst_conv_r(ee_1min, mm_0max);
|
||||
|
||||
tst_conv_r(ee_0min, mm_0min);
|
||||
tst_conv_r(ee_0min, mm_1min);
|
||||
tst_conv_r(ee_0min, mm_2min);
|
||||
tst_conv_r(ee_0min, mm_2max);
|
||||
tst_conv_r(ee_0min, mm_1max);
|
||||
tst_conv_r(ee_0min, mm_0max);
|
||||
|
||||
end if;
|
||||
|
||||
if true then
|
||||
report LF & "[TST] Test ramp =============================" severity note;
|
||||
-- Conersion from real to ee_mm
|
||||
r:= r_ini;
|
||||
for ki in 0 to N-1 loop
|
||||
ee_mm := real_to_ufp(r, BW, BE, EO);
|
||||
|
||||
ee := ee_mm(BW-1 downto BW-BE);
|
||||
mm := ee_mm(BW-BE-1 downto 0);
|
||||
|
||||
print(r, ee, mm);
|
||||
r2 := ufp_to_real(ee_mm, BW, BE, EO);
|
||||
print(r2, ee, mm);
|
||||
report LF & "[TST] ----------------------------------" severity note;
|
||||
|
||||
r := r + r_inc;
|
||||
end loop; -- ki
|
||||
end if;
|
||||
`
|
||||
wait;
|
||||
end process;
|
||||
|
||||
|
||||
end architecture tst;
|
126
hw/beh/wg_mem.vhd
Normal file
126
hw/beh/wg_mem.vhd
Normal file
|
@ -0,0 +1,126 @@
|
|||
-- wg_mem
|
||||
--
|
||||
-- Generate weights using stream of idx
|
||||
--
|
||||
-- Inputs are spike index and location of kernel to read
|
||||
--
|
||||
-- Current implementation assumes that all weights are cached
|
||||
-- and that sizes of and KI and KO are powers of 2
|
||||
|
||||
use work.pkg_sbs.all;
|
||||
|
||||
entity wg_mem is
|
||||
generic (
|
||||
LOG2_H : natural := 2; -- size of H (number of output IPs per
|
||||
-- output location)
|
||||
LOG2_KI : natural := 4; -- number IPs per input (thus spike index)
|
||||
LOG2_KO : natural := 3); -- number connections from IPi
|
||||
-- block to IPo block (thus,
|
||||
-- number of output IPs of full connected,
|
||||
-- kernel size in conv)
|
||||
port (
|
||||
clk, rstn : in bit;
|
||||
|
||||
-- Initial update
|
||||
do_init_str : in bit; -- First step in init process
|
||||
do_init_nxt : in bit; -- Next step in init process
|
||||
w_init : in real; -- Weight value to update
|
||||
|
||||
-- Normal
|
||||
idx : in bit_vector(LOG2_KI-1 downto 0); -- Index of spike
|
||||
pos : in bit_vector(LOG2_KO-1 downto 0); -- Location of output (edge, kernel)
|
||||
ena_idx : in bit;
|
||||
busy_idx : out bit;
|
||||
ena_w : out bit; -- Send a weight
|
||||
w : out real); -- stream of states
|
||||
|
||||
end entity wg_mem;
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_bit.all;
|
||||
|
||||
architecture rtl of wg_mem is
|
||||
|
||||
signal busy_rg, busy_nxt, i_done, idx_done, pos_done : bit;
|
||||
signal idx_rg, idx_nxt : unsigned(LOG2_KI-1 downto 0);
|
||||
signal pos_rg, pos_nxt : unsigned(LOG2_KO-1 downto 0);
|
||||
signal i_rg, i_nxt : unsigned(LOG2_H-1 downto 0);
|
||||
|
||||
-- All these params could be configurable..
|
||||
constant I_LAST : unsigned(LOG2_H-1 downto 0) := (others=>'1');
|
||||
constant IDX_LAST : unsigned(LOG2_KI-1 downto 0) := (others=>'1');
|
||||
constant POS_LAST : unsigned(LOG2_KO-1 downto 0) := (others=>'1');
|
||||
|
||||
constant I_ZERO : unsigned(LOG2_H-1 downto 0) := (others=>'0');
|
||||
constant IDX_ZERO : unsigned(LOG2_KI-1 downto 0) := (others=>'0');
|
||||
constant POS_ZERO : unsigned(LOG2_KO-1 downto 0) := (others=>'0');
|
||||
|
||||
-- Memory
|
||||
signal mem_addr, mem_addr_nxt : unsigned(LOG2_H+LOG2_KI+LOG2_KO-1 downto 0);
|
||||
signal mem_wr, mem_rd : bit;
|
||||
|
||||
begin -- architecture rtl
|
||||
|
||||
busy_idx <= busy_rg;
|
||||
ena_w <= busy_rg;
|
||||
|
||||
i_done <= '1' when i_rg = I_ZERO else '0';
|
||||
idx_done <= '1' when idx_rg = IDX_ZERO else '0';
|
||||
pos_done <= '1' when pos_rg = POS_ZERO else '0';
|
||||
|
||||
busy_nxt <= '1' when ena_idx='1' else
|
||||
'0' when i_done='1' else
|
||||
busy_rg;
|
||||
|
||||
i_nxt <= I_LAST when (ena_idx='1') or (do_init_str='1') else
|
||||
i_nxt-1 when (busy_rg='1') or (do_init_nxt='1') else
|
||||
i_rg;
|
||||
|
||||
idx_nxt <= unsigned(idx) when ena_idx='1' else
|
||||
IDX_LAST when do_init_str='1' else
|
||||
idx_nxt-1 when (do_init_nxt='1') and (i_done='1') else
|
||||
idx_rg;
|
||||
|
||||
pos_nxt <= unsigned(pos) when ena_idx='1' else
|
||||
POS_LAST when do_init_str='1' else
|
||||
pos_nxt-1 when (do_init_nxt='1') and (idx_done='1') else
|
||||
pos_rg;
|
||||
|
||||
reg: process (clk, rstn) is
|
||||
begin -- process reg
|
||||
if rstn = '0' then -- asynchronous reset (active low)
|
||||
idx_rg <= IDX_LAST;
|
||||
pos_rg <= POS_LAST;
|
||||
i_rg <= I_LAST;
|
||||
busy_rg <= '0';
|
||||
elsif clk'event and clk = '1' then -- rising clock edge
|
||||
idx_rg <= idx_nxt;
|
||||
pos_rg <= pos_nxt;
|
||||
i_rg <= i_nxt;
|
||||
busy_rg <= busy_nxt;
|
||||
end if;
|
||||
end process reg;
|
||||
|
||||
|
||||
mem_addr_nxt <= pos_nxt & idx_nxt & i_nxt;
|
||||
mem_wr <= do_init_str or do_init_nxt;
|
||||
mem_rd <= '1';
|
||||
|
||||
-- Implementation of a synchronous single port memory
|
||||
mem: process (clk) is
|
||||
constant mem_size : natural := 2**(mem_addr'length);
|
||||
type mem_ty is array (0 to mem_size-1) of real;
|
||||
variable w_mem : mem_ty;
|
||||
begin -- process mem
|
||||
if clk'event and clk = '1' then -- rising clock edge
|
||||
mem_addr <= mem_addr_nxt;
|
||||
if mem_wr='1' then
|
||||
w_mem(to_integer(mem_addr)) := w_init;
|
||||
end if;
|
||||
if mem_rd='1' then
|
||||
w <= w_mem(to_integer(mem_addr));
|
||||
end if;
|
||||
end if;
|
||||
end process mem;
|
||||
|
||||
end architecture rtl;
|
51
hw/do_sim/ex.py
Normal file
51
hw/do_sim/ex.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
import numpy as np
|
||||
|
||||
h = np.array([0.1, 0.2, 0.3, 0.0, 0.01, 0.01, 0.1, 0.28])
|
||||
|
||||
w0 = [0.3, 0.0, 0.01, 0.01, 0.1, 0.28, 0.1, 0.2]
|
||||
w1 = [0.01, 0.01, 0.1, 0.28, 0.1, 0.2, 0.3, 0.0]
|
||||
w2 = [0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125]
|
||||
|
||||
w = np.array([w0, w1, w2])
|
||||
|
||||
eps = 0.2
|
||||
|
||||
h0 = h
|
||||
hw0 = h0 * w[0]
|
||||
sum_hw0 = np.sum(hw0)
|
||||
|
||||
h1 = (h0 + eps*hw0/sum_hw0) /(1+eps)
|
||||
hw1 = h1 * w[1]
|
||||
sum_hw1 = np.sum(hw1)
|
||||
|
||||
h2 = (h1 + eps*hw1/sum_hw1) /(1+eps)
|
||||
hw2 = h2 * w[2]
|
||||
sum_hw2 = np.sum(hw2)
|
||||
|
||||
h3 = (h2 + eps*hw2/sum_hw2) /(1+eps)
|
||||
|
||||
hp0 = h0
|
||||
hpw0 = hp0 * w[0]
|
||||
sum_hpw0 = np.sum(hpw0)
|
||||
norm_hp0 = np.sum(hp0)
|
||||
|
||||
hp1 = (sum_hpw0 * hp0 + eps * norm_hp0 * hpw0)
|
||||
hpw1 = hp1 * w[1]
|
||||
sum_hpw1 = np.sum(hpw1)
|
||||
norm_hp1 = np.sum(hp1)
|
||||
|
||||
hp2 = (sum_hpw1*hp1 + eps * norm_hp1 * hpw1)
|
||||
hpw2 = hp2 * w[2]
|
||||
sum_hpw2 = np.sum(hpw2)
|
||||
norm_hp2 = np.sum(hp2)
|
||||
|
||||
hp3 = (sum_hpw2*hp2 + eps * norm_hp2 * hpw2)
|
||||
|
||||
|
||||
# Show that hp are just multiple of h
|
||||
print(hp1/h1)
|
||||
print(hp2/h2)
|
||||
print(hp3/h3)
|
||||
|
||||
# This should be the output of HW model
|
||||
print(hp0, hp1, hp2, hp3)
|
55
hw/do_sim/ex_ufp.py
Normal file
55
hw/do_sim/ex_ufp.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
from math import log2, ceil, floor
|
||||
|
||||
BW = 5
|
||||
BE = 3
|
||||
EO = 1
|
||||
|
||||
BM = BW-BE
|
||||
BO = BM-EO # BW-BE-EO
|
||||
EXP_MAX = 2**BE-1
|
||||
R_MIN = ufp_to_r(EXP_MAX, 1, BW, BE, EO)
|
||||
|
||||
|
||||
def r_to_ufp(r, BW, BE, EO):
|
||||
BM = BW-BE
|
||||
BO = BM-EO # BW-BE-EO
|
||||
EXP_MAX = 2**BE-1
|
||||
R_MIN = ufp_to_r(EXP_MAX, 1, BW, BE, EO)
|
||||
|
||||
if r<R_MIN: # Small values
|
||||
ee = EXP_MAX
|
||||
mm = 0
|
||||
return ee, mm
|
||||
|
||||
aa = floor(BM - log2(r))
|
||||
aa = max(min(aa, EXP_MAX+BO), BO)
|
||||
ee = aa - BO
|
||||
|
||||
mm = round(r*2**aa)
|
||||
return ee, mm
|
||||
|
||||
|
||||
def ufp_to_r(ee, mm, BW, BE, EO):
|
||||
BM = BW-BE
|
||||
BO = BM-EO # BW-BE-EO
|
||||
|
||||
r = mm * 2**(-BO-ee)
|
||||
return r
|
||||
|
||||
|
||||
for mm in range(2**BM):
|
||||
for ee in range(2**BE):
|
||||
r_exp = ufp_to_r(ee, mm, BW, BE, EO)
|
||||
ee_exp, mm_exp = r_to_ufp(r_exp, BW, BE, EO)
|
||||
r_exp2 = ufp_to_r(ee_exp, mm_exp, BW, BE, EO)
|
||||
#print("r={} r_exp={} mm={} ee={}".format(r, r_exp, mm_exp, ee_exp))
|
||||
|
||||
print("mm={} ee={} r={} ".format(mm, ee, r_exp))
|
||||
print("mm={} ee={} r={} ".format(mm_exp, ee_exp, r_exp2))
|
||||
print("---------------------------------------------------")
|
||||
|
||||
|
||||
r=1.5625e-2
|
||||
#r=3.1249999999999997e-2
|
||||
ee, mm = r_to_ufp(r, BW, BE, EO)
|
||||
r2 = ufp_to_r(ee, mm, BW, BE, EO)
|
11
hw/do_synth/.synopsys_dc.setup
Normal file
11
hw/do_synth/.synopsys_dc.setup
Normal file
|
@ -0,0 +1,11 @@
|
|||
# Load library of functions...
|
||||
source ~/SVN/ids_setup/flow/flow_lib/lib_synth/flow_lib_synth.tcl
|
||||
source ~/SVN/ids_setup/flow/flow_lib/lib_synth/flow_tech_lib_synth.tcl
|
||||
|
||||
# Load local library
|
||||
source cmd/lib_synth.tcl
|
||||
|
||||
# Not executed to allow the user to change the defaults
|
||||
# flow_setup_def
|
||||
# flow_set_tech tcbn65lptc
|
||||
|
8
hw/do_synth/cmd/do_synth.tcl
Normal file
8
hw/do_synth/cmd/do_synth.tcl
Normal file
|
@ -0,0 +1,8 @@
|
|||
# Load local library
|
||||
# source cmd/lib_synth.tcl
|
||||
|
||||
#do_synth_def mua 10 tcbn65lptc clk rst
|
||||
|
||||
#do_synth_def mua 1 tcbn40lptc clk rst
|
||||
|
||||
do_synth_def hu_dp 0.6 tcbn40lptc clk rst
|
31
hw/do_synth/cmd/lib_synth.tcl
Normal file
31
hw/do_synth/cmd/lib_synth.tcl
Normal file
|
@ -0,0 +1,31 @@
|
|||
# the local library for synthesis
|
||||
|
||||
proc do_synth { UNIT_NAME { T } { TECH tcbn65lptc } {clk clk} {rst rst} } {
|
||||
|
||||
# Set-up the environment and the technology
|
||||
flow_setup_def
|
||||
flow_set_tech $TECH
|
||||
|
||||
# Analyze and elaborate automatically. Params can be added using -param width=>32,ports=>8. Also possible to use a file
|
||||
analyze -library work -autoread -recursive ../rtl -top $UNIT_NAME
|
||||
elaborate -library work $UNIT_NAME
|
||||
link
|
||||
check_design
|
||||
|
||||
# Set constraints
|
||||
flow_def_rst $rst
|
||||
flow_def_clock $T $clk
|
||||
flow_def_timing [expr $T/8] [expr $T/8]
|
||||
check_timing
|
||||
|
||||
# Synthesize
|
||||
compile_ultra ;# Run the synthesize
|
||||
#change_names -rules vhdl -hier -verbose -log_changes ./log/change_names.log
|
||||
|
||||
# Write reports
|
||||
#set prefix ${UNIT_NAME}
|
||||
set prefix ${UNIT_NAME}_T=${T}_TECH=${TECH} ;# Define prefix to identify reports.
|
||||
flow_report_all $prefix ;# Write reports
|
||||
flow_write_netlist $prefix ;# Write results
|
||||
}
|
||||
|
5
hw/do_synth/source.csh
Normal file
5
hw/do_synth/source.csh
Normal file
|
@ -0,0 +1,5 @@
|
|||
#source /eda/synopsys/synopsys_lic_init_2015-2016.csh
|
||||
#source /eda/synopsys/2015-16/scripts/SYN_2015.06-SP4_RHELx86.csh
|
||||
|
||||
setenv SNPSLMD_LICENSE_FILE "28231@item0096"
|
||||
setenv PATH "/usrf01/prog/synopsys/syn/R-2020.09-SP4/bin:${PATH}"
|
135
hw/rtl/hu_dp.vhd
Normal file
135
hw/rtl/hu_dp.vhd
Normal file
|
@ -0,0 +1,135 @@
|
|||
-- hu_dp
|
||||
-- Data path for Update H using stream of weights
|
||||
-- Trivial fix point implementation
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use work.pkg_sbs.all;
|
||||
|
||||
entity hu_dp is
|
||||
generic (
|
||||
K : natural := 3; -- additional bits for sum
|
||||
B : natural := 10); -- bitwidth of input
|
||||
port (
|
||||
clk, rstn : in std_logic;
|
||||
ctr_hu : in std_logic_vector(BW_HU_CTR-1 downto 0); -- Control for data path
|
||||
loc_h : out std_logic_vector(ADDR_H_MAX-1 downto 0); -- Current location in H
|
||||
eps : in std_logic_vector(B-1 downto 0);
|
||||
wi : in std_logic_vector(B-1 downto 0); -- stream of weights
|
||||
hi : in std_logic_vector(B-1 downto 0); -- stream of state
|
||||
ho : out std_logic_vector(B-1 downto 0)); -- stream of states
|
||||
|
||||
end entity hu_dp;
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
architecture rtl of hu_dp is
|
||||
|
||||
-- Memory
|
||||
subtype word is std_logic_vector(B-1 downto 0);
|
||||
type array_as_h_w is array (N_H_MAX-1 downto 0) of word;
|
||||
|
||||
signal mem_hp : array_as_h_w; -- State (internal)
|
||||
signal mem_hw : array_as_h_w; -- Copy of w*h
|
||||
signal addr_wr, addr_nxt : std_logic_vector(ADDR_H_MAX-1 downto 0); -- Address
|
||||
|
||||
-- Data path for hp (i.t. h un-normalized) and hw (hp*w)
|
||||
signal hp_new, hw_nxt : unsigned(2*B-1 downto 0);
|
||||
|
||||
signal hp_new_rg, hp_p, h_eff : std_logic_vector(B-1 downto 0);
|
||||
signal hw_p : std_logic_vector(B-1 downto 0);
|
||||
|
||||
-- Accumulators for normalization
|
||||
signal sum_hw, sum_hw_nxt : std_logic_vector(B-1 downto 0); -- Running sum hw
|
||||
signal sum_hw_p, sum_hw_p_nxt : std_logic_vector(B-1 downto 0); -- Saved sum hw of previous
|
||||
signal sum_hp, sum_hp_nxt : std_logic_vector(B-1 downto 0); -- Running sum hp
|
||||
signal sum_hp_p, sum_hp_p_nxt : std_logic_vector(B-1 downto 0); -- Saved sum hw of previous (normalization)
|
||||
|
||||
-- Control signals
|
||||
signal ctr_sel_ini, ctr_sum_ini, ctr_update_sum, ctr_update_sum2 : std_logic;
|
||||
signal ctr_addr_rst, ctr_addr_inc, ctr_write_hw : std_logic;
|
||||
signal ctr_wr_hw, ctr_wr_hp : std_logic;
|
||||
|
||||
begin -- architecture rtlf
|
||||
|
||||
-- Get control signals
|
||||
ctr_sel_ini <= ctr_hu(0);
|
||||
ctr_sum_ini <= ctr_hu(1);
|
||||
ctr_update_sum <= ctr_hu(2);
|
||||
ctr_addr_rst <= ctr_hu(3);
|
||||
ctr_addr_inc <= ctr_hu(4);
|
||||
ctr_write_hw <= ctr_hu(5);
|
||||
ctr_wr_hw <= ctr_hu(6);
|
||||
ctr_wr_hp <= ctr_hu(6);
|
||||
ctr_update_sum2 <= ctr_hu(7);
|
||||
|
||||
|
||||
-- Main calculation
|
||||
hp_new <= unsigned(hp_p) * unsigned(sum_hw_p) + unsigned(sum_hp_p) * unsigned(hw_p) ;
|
||||
|
||||
-- Mux to select first h or saved one
|
||||
h_eff <= hi when ctr_sel_ini='1' else hp_new_rg;
|
||||
|
||||
-- Calculate hw
|
||||
hw_nxt <= unsigned(h_eff) * unsigned(wi) ;
|
||||
|
||||
-- Output h (note latency of a complete group)
|
||||
ho <= h_eff;
|
||||
|
||||
-- Accumulate hw and hp
|
||||
sum_hw_nxt <= std_logic_vector(hw_nxt(2*B-1 downto B)) when ctr_sum_ini='1' else std_logic_vector(unsigned(sum_hw) + hw_nxt(2*B-1 downto B));
|
||||
sum_hw_p_nxt <= (others=>'0') when ctr_update_sum='1' else
|
||||
sum_hw when ctr_update_sum2='1' else sum_hw_p;
|
||||
|
||||
sum_hp_nxt <= h_eff when ctr_sum_ini='1' else std_logic_vector(unsigned(sum_hp) + unsigned(h_eff)); -- Accumulate h
|
||||
--sum_hp_p_nxt <= sum_hp_nxt * eps when ctr_update_sum='1' else sum_hp_p;
|
||||
sum_hp_p_nxt <= eps when ctr_update_sum='1' else
|
||||
std_logic_vector(hp_new(2*B-1 downto B)) when ctr_update_sum2='1' else sum_hp_p;
|
||||
|
||||
|
||||
-- Read from memory
|
||||
--hw_p <= mem_hw(to_integer(unsigned(addr_nxt)));
|
||||
hw_p <= mem_hw(to_integer(unsigned(addr_nxt)))
|
||||
when ctr_update_sum2='0' else sum_hp; -- Put sum_hp in mult
|
||||
hp_p <= mem_hp(to_integer(unsigned(addr_nxt)));
|
||||
|
||||
-- Address calculation
|
||||
addr_nxt <= (others => '0') when ctr_addr_rst='1' else
|
||||
std_logic_vector(unsigned(addr_wr) + 1) when ctr_addr_inc='1' else addr_wr;
|
||||
loc_h <= addr_wr; -- Output for ctrl path
|
||||
|
||||
-- Registers
|
||||
rg: process (clk, rstn) is
|
||||
begin -- process pipe1
|
||||
if rstn = '0' then
|
||||
hp_new_rg <= (others=>'0');
|
||||
sum_hw <= (others=>'0');
|
||||
sum_hw_p <= (others=>'0');
|
||||
sum_hp <= (others=>'0');
|
||||
sum_hp_p <= (others=>'0');
|
||||
addr_wr <= (others => '0');
|
||||
elsif clk'event and clk = '1' then
|
||||
hp_new_rg <= std_logic_vector(hp_new(2*B-1 downto B));
|
||||
sum_hw <= sum_hw_nxt;
|
||||
sum_hw_p <= sum_hw_p_nxt;
|
||||
sum_hp <= sum_hp_nxt;
|
||||
sum_hp_p <= sum_hp_p_nxt;
|
||||
addr_wr <= addr_nxt;
|
||||
end if;
|
||||
end process rg;
|
||||
|
||||
|
||||
-- Memory
|
||||
mem: process (clk) is
|
||||
begin -- process mem
|
||||
if clk'event and clk = '1' then -- rising clock edge
|
||||
if ctr_wr_hw='1' then
|
||||
mem_hw(to_integer(unsigned(addr_wr))) <= std_logic_vector(hw_nxt(2*B-1 downto B));
|
||||
end if;
|
||||
if ctr_wr_hp='1' then
|
||||
mem_hp(to_integer(unsigned(addr_wr))) <= h_eff;
|
||||
end if;
|
||||
end if;
|
||||
end process mem;
|
||||
|
||||
end architecture rtl;
|
52
hw/rtl/mua.vhd
Normal file
52
hw/rtl/mua.vhd
Normal file
|
@ -0,0 +1,52 @@
|
|||
-- Simple multiply with adder to check speed
|
||||
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
|
||||
entity mua is
|
||||
|
||||
generic (
|
||||
B : natural := 10); -- bitwidth
|
||||
|
||||
port (
|
||||
clk, arstn : in std_logic;
|
||||
dt_mv, dt_mc : in std_logic_vector(B-1 downto 0); -- input for multiplicatin
|
||||
dt_add : in std_logic_vector(2*B-1 downto 0); -- constant to add
|
||||
dt_mua : out std_logic_vector(2*B-1 downto 0)); -- output
|
||||
|
||||
end entity mua;
|
||||
|
||||
|
||||
library ieee;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
architecture rtl of mua is
|
||||
|
||||
signal dt_mv_rg, dt_mc_rg : unsigned(B-1 downto 0);
|
||||
signal dt_add_rg : unsigned(2*B-1 downto 0);
|
||||
signal dt_mua_rg, dt_mua_nxt : unsigned(2*B-1 downto 0);
|
||||
|
||||
begin -- architecture rtl
|
||||
|
||||
dt_mua_nxt <= dt_mv_rg * dt_mc_rg + dt_add_rg;
|
||||
|
||||
dt_mua <= std_logic_vector(dt_mua_rg);
|
||||
|
||||
reg: process (clk, arstn) is
|
||||
begin -- process reg
|
||||
if arstn = '0' then -- asynchronous reset (active low)
|
||||
dt_mv_rg <= (others=>'0');
|
||||
dt_mc_rg <= (others=>'0');
|
||||
dt_add_rg <= (others=>'0');
|
||||
dt_mua_rg <= (others=>'0');
|
||||
elsif clk'event and clk = '1' then -- rising clock edge
|
||||
dt_mv_rg <= unsigned(dt_mv);
|
||||
dt_mc_rg <= unsigned(dt_mc);
|
||||
dt_add_rg <= unsigned(dt_add);
|
||||
dt_mua_rg <= dt_mua_nxt;
|
||||
end if;
|
||||
end process reg;
|
||||
|
||||
end architecture rtl;
|
65
hw/rtl/mult_unsgn_pp_trunc.vhd
Normal file
65
hw/rtl/mult_unsgn_pp_trunc.vhd
Normal file
|
@ -0,0 +1,65 @@
|
|||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
entity mult_unsgn_pp_trunc is
|
||||
|
||||
generic (
|
||||
BWa : natural := 16; -- Bit width of Multiplier
|
||||
BWb : natural := 16;
|
||||
K : natural := 15); -- Vertical truncation
|
||||
|
||||
port (
|
||||
da : in std_logic_vector(BWa-1 downto 0);
|
||||
db : in std_logic_vector(BWb-1 downto 0);
|
||||
dout : out std_logic_vector(BWa+BWb-1 downto 0));
|
||||
|
||||
end mult_unsgn_pp_trunc;
|
||||
|
||||
architecture str of mult_unsgn_pp_trunc is
|
||||
|
||||
type stlv_array is array (0 to BWa-1) of std_logic_vector(BWa+BWb-2 downto 0);
|
||||
signal pp : stlv_array;
|
||||
--signal pp_res : std_logic_vector(BWa+BWb-1 downto 0);
|
||||
|
||||
begin -- str
|
||||
|
||||
ppGen : process (da, db)
|
||||
variable ppt : stlv_array;
|
||||
begin
|
||||
ppt := (others => (others => '0'));
|
||||
-- partial products da(i)db(j) EX:
|
||||
-- da(0)db(3) da(0)db(2) da(0)db(1) da(0)db(0)
|
||||
-- da(1)db(3) da(1)db(2) da(1)db(1) da(1)db(0)
|
||||
-- da(2)db(3) da(2)db(2) da(2)db(1) da(2)db(0)
|
||||
-- da(3)db(3) da(3)db(2) da(3)db(1) da(3)db(0)
|
||||
for i in 0 to BWa-1 loop
|
||||
for j in 0 to BWb-1 loop
|
||||
if (i+j > K-1) then
|
||||
ppt(i)(i+j) := da(i) and db(j);
|
||||
end if;
|
||||
end loop;
|
||||
end loop;
|
||||
PP <= ppt;
|
||||
end process ppGen;
|
||||
|
||||
|
||||
CSA_tree : process (pp)
|
||||
variable pp_add : std_logic_vector(BWa+BWb-1 downto 0);
|
||||
begin -- process CSA_tree
|
||||
for i in 0 to BWa-1 loop
|
||||
if i = 0 then
|
||||
pp_add := '0' & pp(0)(BWa+BWb-2 downto 0);
|
||||
else
|
||||
pp_add := std_logic_vector(unsigned('0'&pp(i)(BWa+BWb-2 downto 0)) + unsigned(pp_add));
|
||||
end if;
|
||||
end loop; -- i
|
||||
--pp_res <= pp_add;
|
||||
dout <= pp_add;
|
||||
end process CSA_tree;
|
||||
|
||||
-- dout <= pp_res;
|
||||
--dout(BWa+BWb-1 downto 16) <= pp_res(BWa+BWb-1 downto 16);
|
||||
--dout(15 downto 0) <= (others => '0');
|
||||
|
||||
end str;
|
86
hw/rtl/two_mult_unsgn_pp_trunc.vhd
Normal file
86
hw/rtl/two_mult_unsgn_pp_trunc.vhd
Normal file
|
@ -0,0 +1,86 @@
|
|||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
entity two_mult_unsgn_pp_trunc is
|
||||
|
||||
generic (
|
||||
BWa : natural := 16; -- Bit width of Multiplier
|
||||
BWb : natural := 16;
|
||||
K : natural := 15); -- Vertical truncation
|
||||
|
||||
port (
|
||||
da : in std_logic_vector(BWa-1 downto 0);
|
||||
db : in std_logic_vector(BWb-1 downto 0);
|
||||
dc : in std_logic_vector(BWa-1 downto 0);
|
||||
dd : in std_logic_vector(BWb-1 downto 0);
|
||||
dout : out std_logic_vector(BWa+BWb downto 0));
|
||||
|
||||
end two_mult_unsgn_pp_trunc;
|
||||
|
||||
architecture str of two_mult_unsgn_pp_trunc is
|
||||
|
||||
type stlv_array is array (0 to 2*BWa-1) of std_logic_vector(BWa+BWb-2 downto 0);
|
||||
signal pp : stlv_array;
|
||||
--signal pp_res : std_logic_vector(BWa+BWb downto 0);
|
||||
|
||||
begin -- str
|
||||
|
||||
ppGen1 : process (da, db)
|
||||
variable ppt : stlv_array;
|
||||
begin
|
||||
ppt := (others => (others => '0'));
|
||||
-- partial products da(i)db(j) EX:
|
||||
-- da(0)db(3) da(0)db(2) da(0)db(1) da(0)db(0)
|
||||
-- da(1)db(3) da(1)db(2) da(1)db(1) da(1)db(0)
|
||||
-- da(2)db(3) da(2)db(2) da(2)db(1) da(2)db(0)
|
||||
-- da(3)db(3) da(3)db(2) da(3)db(1) da(3)db(0)
|
||||
for i in 0 to BWa-1 loop
|
||||
for j in 0 to BWb-1 loop
|
||||
if (i+j > K-1) then
|
||||
ppt(i)(i+j) := da(i) and db(j);
|
||||
end if;
|
||||
end loop;
|
||||
end loop;
|
||||
PP(0 to BWa-1) <= ppt(0 to BWa-1);
|
||||
end process ppGen1;
|
||||
|
||||
ppGen2 : process (dc, dd)
|
||||
variable ppt : stlv_array;
|
||||
begin
|
||||
ppt := (others => (others => '0'));
|
||||
-- partial products dc(i)dd(j) EX:
|
||||
-- dc(0)dd(3) dc(0)dd(2) dc(0)dd(1) dc(0)dd(0)
|
||||
-- dc(1)dd(3) dc(1)dd(2) dc(1)dd(1) dc(1)dd(0)
|
||||
-- dc(2)dd(3) dc(2)dd(2) dc(2)dd(1) dc(2)dd(0)
|
||||
-- dc(3)dd(3) dc(3)dd(2) dc(3)dd(1) dc(3)dd(0)
|
||||
for i in 0 to BWa-1 loop
|
||||
for j in 0 to BWb-1 loop
|
||||
if (i+j > K-1) then
|
||||
ppt(i)(i+j) := dc(i) and dd(j);
|
||||
end if;
|
||||
end loop;
|
||||
end loop;
|
||||
PP(BWa to 2*BWa-1) <= ppt(0 to BWa-1);
|
||||
end process ppGen2;
|
||||
|
||||
|
||||
CSA_tree : process (pp)
|
||||
variable pp_add : std_logic_vector(BWa+BWb downto 0);
|
||||
begin -- process CSA_tree
|
||||
for i in 0 to 2*BWa-1 loop
|
||||
if i = 0 then
|
||||
pp_add := "00" & pp(0)(BWa+BWb-2 downto 0);
|
||||
else
|
||||
pp_add := std_logic_vector(unsigned(pp(i)(BWa+BWb-2 downto 0)) + unsigned(pp_add));
|
||||
end if;
|
||||
end loop; -- i
|
||||
--pp_res <= pp_add;
|
||||
dout <= pp_add;
|
||||
end process CSA_tree;
|
||||
|
||||
-- dout <= pp_res;
|
||||
--dout(BWa+BWb downto 16) <= pp_res(BWa+BWb-1 downto 16);
|
||||
--dout(15 downto 0) <= (others => '0');
|
||||
|
||||
end str;
|
29
hw/sbs_hw.org
Normal file
29
hw/sbs_hw.org
Normal file
|
@ -0,0 +1,29 @@
|
|||
#+TITLE: sbs_hw.org
|
||||
|
||||
* Idea
|
||||
Start with HW implementation.
|
||||
|
||||
* Modules
|
||||
hu_ : h update
|
||||
wg_ : weight generator
|
||||
sg_ : spikes generator
|
||||
|
||||
* HU
|
||||
This block updates H according to the SbS equations.
|
||||
- First version done.
|
||||
- Need to decide if reciprocal or multiplication
|
||||
|
||||
|
||||
* WG
|
||||
This block generates a stream of weights given a stream of spikes
|
||||
- instead of stream of spikes scaned per region, it could be better to receive a spike identifier, and
|
||||
then all the edges (from location to location) that use this particular spike.
|
||||
|
||||
|
||||
|
||||
|
||||
* New strategy for read
|
||||
|
||||
When doing a convolution, we can read
|
||||
|
||||
|
Loading…
Reference in a new issue