768 lines
23 KiB
Python
768 lines
23 KiB
Python
|
import torch
|
||
|
import math
|
||
|
import random
|
||
|
|
||
|
from PyTestKernel import TestKernel
|
||
|
|
||
|
# TODO: kernel_phxy_plus_pxy, kernel_phxy_times_pxy,
|
||
|
# kernel_phxy_fill_h, kernel_phxy_one_over_sum_into_pxy,
|
||
|
# test_kernel_phxy_fill_with_spike_selected_w => 4D index
|
||
|
|
||
|
# pxy = number_of_pattern * dim_x * dim_y
|
||
|
# phxy = number_of_pattern * h_dim * dim_x * dim_y
|
||
|
# sxy = s_dim * dim_x * dim_y
|
||
|
|
||
|
|
||
|
def test_kernel_pxy_times_spike_selected_sxy(
|
||
|
h_dim,
|
||
|
s_dim,
|
||
|
number_of_pattern,
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
display_debug,
|
||
|
spike_time,
|
||
|
number_of_spikes,
|
||
|
):
|
||
|
print("test_kernel_pxy_times_spike_selected_sxy")
|
||
|
# void test_kernel_pxy_times_spike_selected_sxy(
|
||
|
# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, int64_t pxy_memory_addr, int64_t sxy_memory_addr,
|
||
|
# int64_t spike_memory_addr, size_t spike_time, size_t spike_dim_c0,
|
||
|
# size_t spike_dim_c1, size_t spike_dim_c2, size_t pxy_dim_c0,
|
||
|
# size_t pxy_dim_c1, size_t sxy_dim_c0, size_t sxy_dim_c1);
|
||
|
|
||
|
memory_pxy = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_sxy = torch.rand(
|
||
|
(s_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_spikes = (
|
||
|
torch.rand(
|
||
|
(number_of_pattern, number_of_spikes, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
* float(s_dim)
|
||
|
).type(dtype=torch.int64)
|
||
|
|
||
|
pxy_dim_c0 = int(dim_x * dim_y)
|
||
|
pxy_dim_c1 = int(dim_y)
|
||
|
|
||
|
sxy_dim_c0 = int(dim_x * dim_y)
|
||
|
sxy_dim_c1 = int(dim_y)
|
||
|
|
||
|
spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)
|
||
|
spike_dim_c1 = int(dim_x * dim_y)
|
||
|
spike_dim_c2 = int(dim_y)
|
||
|
|
||
|
memory_pxy_copy = memory_pxy.clone()
|
||
|
memory_sxy_copy = memory_sxy.clone()
|
||
|
memory_spikes_copy = memory_spikes.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_pxy_times_spike_selected_sxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
memory_pxy.data_ptr(),
|
||
|
memory_sxy.data_ptr(),
|
||
|
memory_spikes.data_ptr(),
|
||
|
spike_time,
|
||
|
spike_dim_c0,
|
||
|
spike_dim_c1,
|
||
|
spike_dim_c2,
|
||
|
pxy_dim_c0,
|
||
|
pxy_dim_c1,
|
||
|
sxy_dim_c0,
|
||
|
sxy_dim_c1,
|
||
|
)
|
||
|
|
||
|
for p in range(0, memory_spikes_copy.shape[0]):
|
||
|
for x in range(0, memory_spikes_copy.shape[2]):
|
||
|
for y in range(0, memory_spikes_copy.shape[3]):
|
||
|
spike = memory_spikes_copy[p, spike_time, x, y]
|
||
|
|
||
|
if spike >= 0:
|
||
|
memory_pxy_copy[p, x, y] *= memory_sxy_copy[spike, x, y]
|
||
|
else:
|
||
|
memory_pxy_copy[p, x, y] = 0.0
|
||
|
print(f"difference: {torch.abs(memory_pxy - memory_pxy_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_fill_with_spike_selected_w(
|
||
|
h_dim,
|
||
|
s_dim,
|
||
|
number_of_pattern,
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
display_debug,
|
||
|
spike_time,
|
||
|
number_of_spikes,
|
||
|
):
|
||
|
print("test_kernel_phxy_fill_with_spike_selected_w")
|
||
|
# void test_kernel_phxy_fill_with_spike_selected_w(
|
||
|
# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, size_t spike_time, size_t weights_dim_c0,
|
||
|
# size_t spike_dim_c0, size_t spike_dim_c1, size_t spike_dim_c2,
|
||
|
# size_t phxy_dim_c0, size_t phxy_dim_c1, size_t phxy_dim_c2,
|
||
|
# int64_t phxy_memory_addr, int64_t weight_memory_addr,
|
||
|
# int64_t spike_memory_addr);
|
||
|
|
||
|
memory_phxy = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_w = torch.rand(
|
||
|
(s_dim, h_dim),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_spikes = (
|
||
|
torch.rand(
|
||
|
(number_of_pattern, number_of_spikes, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
* float(s_dim)
|
||
|
).type(dtype=torch.int64)
|
||
|
|
||
|
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
|
||
|
phxy_dim_c1 = int(dim_x * dim_y)
|
||
|
phxy_dim_c2 = int(dim_y)
|
||
|
|
||
|
spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)
|
||
|
spike_dim_c1 = int(dim_x * dim_y)
|
||
|
spike_dim_c2 = int(dim_y)
|
||
|
|
||
|
weights_dim_c0 = int(h_dim)
|
||
|
|
||
|
memory_phxy_copy = memory_phxy.clone()
|
||
|
memory_w_copy = memory_w.clone()
|
||
|
memory_spikes_copy = memory_spikes.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_fill_with_spike_selected_w(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
spike_time,
|
||
|
weights_dim_c0,
|
||
|
spike_dim_c0,
|
||
|
spike_dim_c1,
|
||
|
spike_dim_c2,
|
||
|
phxy_dim_c0,
|
||
|
phxy_dim_c1,
|
||
|
phxy_dim_c2,
|
||
|
memory_phxy.data_ptr(),
|
||
|
memory_w.data_ptr(),
|
||
|
memory_spikes.data_ptr(),
|
||
|
)
|
||
|
|
||
|
for p in range(0, memory_spikes_copy.shape[0]):
|
||
|
for x in range(0, memory_spikes_copy.shape[2]):
|
||
|
for y in range(0, memory_spikes_copy.shape[3]):
|
||
|
spike = memory_spikes_copy[p, spike_time, x, y]
|
||
|
|
||
|
if spike >= 0:
|
||
|
memory_phxy_copy[p, :, x, y] = memory_w_copy[spike, :]
|
||
|
else:
|
||
|
memory_phxy_copy[p, :, x, y] = 0.0
|
||
|
|
||
|
print(f"difference: {torch.abs(memory_phxy - memory_phxy_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_one_over_sum_into_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_phxy_one_over_sum_into_pxy")
|
||
|
# void test_kernel_phxy_one_over_sum_into_pxy(
|
||
|
# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, size_t phxy_dim_c0, size_t phxy_dim_c1,
|
||
|
# size_t phxy_dim_c2, size_t pxy_dim_c0, size_t pxy_dim_c1,
|
||
|
# int64_t phxy_memory_addr, int64_t pxy_memory_addr);
|
||
|
|
||
|
memory_a = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_b = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
pxy_dim_c0 = int(dim_x * dim_y)
|
||
|
pxy_dim_c1 = int(dim_y)
|
||
|
|
||
|
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
|
||
|
phxy_dim_c1 = int(dim_x * dim_y)
|
||
|
phxy_dim_c2 = int(dim_y)
|
||
|
|
||
|
memory_a_copy = memory_a.clone()
|
||
|
memory_b_copy = memory_b.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_one_over_sum_into_pxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
phxy_dim_c0,
|
||
|
phxy_dim_c1,
|
||
|
phxy_dim_c2,
|
||
|
pxy_dim_c0,
|
||
|
pxy_dim_c1,
|
||
|
memory_a.data_ptr(),
|
||
|
memory_b.data_ptr(),
|
||
|
)
|
||
|
memory_temp_copy = memory_a_copy.sum(dim=1)
|
||
|
|
||
|
memory_b_copy = torch.where(memory_temp_copy > 1e-10, 1.0 / memory_temp_copy, 0.0)
|
||
|
print(
|
||
|
"Remember: \nAn error of 0 is very unlikely due to different \nrandom order of values for the sum."
|
||
|
)
|
||
|
print(f"difference: {torch.abs(memory_b - memory_b_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_fill_with_h(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_phxy_fill_with_h")
|
||
|
# void test_kernel_phxy_fill_with_h(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, size_t phxy_dim_c0,
|
||
|
# size_t phxy_dim_c1, size_t phxy_dim_c2,
|
||
|
# int64_t h_memory_addr,
|
||
|
# int64_t phxy_memory_addr);
|
||
|
|
||
|
memory_a = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_h = torch.rand(
|
||
|
(h_dim),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
|
||
|
phxy_dim_c1 = int(dim_x * dim_y)
|
||
|
phxy_dim_c2 = int(dim_y)
|
||
|
|
||
|
memory_a_copy = memory_a.clone()
|
||
|
memory_h_copy = memory_h.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_fill_with_h(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
phxy_dim_c0,
|
||
|
phxy_dim_c1,
|
||
|
phxy_dim_c2,
|
||
|
memory_h.data_ptr(),
|
||
|
memory_a.data_ptr(),
|
||
|
)
|
||
|
for p in range(0, memory_a_copy.shape[0]):
|
||
|
for x in range(0, memory_a_copy.shape[2]):
|
||
|
for y in range(0, memory_a_copy.shape[3]):
|
||
|
memory_a_copy[p, :, x, y] = memory_h_copy
|
||
|
|
||
|
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_plus_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_phxy_plus_pxy")
|
||
|
# void test_kernel_phxy_plus_pxy(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, size_t phxy_dim_c0,
|
||
|
# size_t phxy_dim_c1, size_t phxy_dim_c2,
|
||
|
# size_t pxy_dim_c0, size_t pxy_dim_c1,
|
||
|
# int64_t phxy_memory_addr,
|
||
|
# int64_t pxy_memory_addr);
|
||
|
|
||
|
memory_a = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_b = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
pxy_dim_c0 = int(dim_x * dim_y)
|
||
|
pxy_dim_c1 = int(dim_y)
|
||
|
|
||
|
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
|
||
|
phxy_dim_c1 = int(dim_x * dim_y)
|
||
|
phxy_dim_c2 = int(dim_y)
|
||
|
|
||
|
memory_a_copy = memory_a.clone()
|
||
|
memory_b_copy = memory_b.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_plus_pxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
phxy_dim_c0,
|
||
|
phxy_dim_c1,
|
||
|
phxy_dim_c2,
|
||
|
pxy_dim_c0,
|
||
|
pxy_dim_c1,
|
||
|
memory_a.data_ptr(),
|
||
|
memory_b.data_ptr(),
|
||
|
)
|
||
|
|
||
|
memory_a_copy += memory_b_copy.unsqueeze(1)
|
||
|
|
||
|
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_times_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_phxy_times_pxy")
|
||
|
# void test_kernel_phxy_times_pxy(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, size_t phxy_dim_c0,
|
||
|
# size_t phxy_dim_c1, size_t phxy_dim_c2,
|
||
|
# size_t pxy_dim_c0, size_t pxy_dim_c1,
|
||
|
# int64_t phxy_memory_addr,
|
||
|
# int64_t pxy_memory_addr);
|
||
|
|
||
|
memory_a = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_b = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
pxy_dim_c0 = int(dim_x * dim_y)
|
||
|
pxy_dim_c1 = int(dim_y)
|
||
|
|
||
|
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
|
||
|
phxy_dim_c1 = int(dim_x * dim_y)
|
||
|
phxy_dim_c2 = int(dim_y)
|
||
|
|
||
|
memory_a_copy = memory_a.clone()
|
||
|
memory_b_copy = memory_b.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_times_pxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
phxy_dim_c0,
|
||
|
phxy_dim_c1,
|
||
|
phxy_dim_c2,
|
||
|
pxy_dim_c0,
|
||
|
pxy_dim_c1,
|
||
|
memory_a.data_ptr(),
|
||
|
memory_b.data_ptr(),
|
||
|
)
|
||
|
|
||
|
memory_a_copy *= memory_b_copy.unsqueeze(1)
|
||
|
|
||
|
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_times_phxy_equals_phxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_phxy_times_phxy_equals_phxy")
|
||
|
# void test_kernel_phxy_times_phxy_equals_phxy(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern,
|
||
|
# size_t h_dim, bool display_debug,
|
||
|
# int64_t phxy_memory_a_addr,
|
||
|
# int64_t phxy_memory_b_addr,
|
||
|
# int64_t phxy_memory_out_addr);
|
||
|
|
||
|
memory_a = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_b = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_out = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_a_copy = memory_a.clone()
|
||
|
memory_b_copy = memory_b.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_times_phxy_equals_phxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
memory_a.data_ptr(),
|
||
|
memory_b.data_ptr(),
|
||
|
memory_out.data_ptr(),
|
||
|
)
|
||
|
|
||
|
memory_out_copy = memory_a_copy * memory_b_copy
|
||
|
|
||
|
print(f"difference: {torch.abs(memory_out - memory_out_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_phxy_plus_phxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_pxy_time_pxy")
|
||
|
# void test_kernel_phxy_plus_phxy(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug,
|
||
|
# int64_t phxy_memory_a_addr,
|
||
|
# int64_t phxy_memory_b_addr);
|
||
|
|
||
|
memory_a = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_b = torch.rand(
|
||
|
(number_of_pattern, h_dim, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
memory_a_copy = memory_a.clone()
|
||
|
memory_b_copy = memory_b.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_phxy_plus_phxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
memory_a.data_ptr(),
|
||
|
memory_b.data_ptr(),
|
||
|
)
|
||
|
|
||
|
memory_a_copy += memory_b_copy
|
||
|
|
||
|
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_pxy_time_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_pxy_time_pxy")
|
||
|
|
||
|
# void test_kernel_pxy_time_pxy(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, int64_t pxy_memory_a_addr,
|
||
|
# int64_t pxy_memory_b_addr);
|
||
|
|
||
|
epsilon_memory_a = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_b = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_a_copy = epsilon_memory_a.clone()
|
||
|
epsilon_memory_b_copy = epsilon_memory_b.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_pxy_time_pxy(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
epsilon_memory_a.data_ptr(),
|
||
|
epsilon_memory_b.data_ptr(),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_a_copy *= epsilon_memory_b_copy
|
||
|
|
||
|
print(
|
||
|
f"difference: {torch.abs(epsilon_memory_a - epsilon_memory_a_copy).max():.4e}"
|
||
|
)
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_pxy_times_v(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_pxy_times_v")
|
||
|
|
||
|
# void test_kernel_pxy_times_v(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, float value,
|
||
|
# int64_t pxy_memory_addr);
|
||
|
|
||
|
epsilon_memory = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_copy = epsilon_memory.clone()
|
||
|
value = float(math.pi)
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_pxy_times_v(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
value,
|
||
|
epsilon_memory.data_ptr(),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_copy = epsilon_memory_copy * value
|
||
|
|
||
|
print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_pxy_plus_v(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_pxy_plus_v")
|
||
|
# void test_kernel_pxy_plus_v(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, float value,
|
||
|
# int64_t pxy_memory_addr);
|
||
|
|
||
|
epsilon_memory = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_copy = epsilon_memory.clone()
|
||
|
value = float(math.pi)
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_pxy_plus_v(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
value,
|
||
|
epsilon_memory.data_ptr(),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_copy = epsilon_memory_copy + value
|
||
|
|
||
|
print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_pxy_set_to_v(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
|
||
|
print("test_kernel_pxy_set_to_v")
|
||
|
# void test_kernel_pxy_set_to_v(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, float value,
|
||
|
# int64_t pxy_memory_addr);
|
||
|
|
||
|
set_value = float(math.pi)
|
||
|
|
||
|
epsilon_memory = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_pxy_set_to_v(
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
number_of_pattern,
|
||
|
h_dim,
|
||
|
display_debug,
|
||
|
set_value,
|
||
|
epsilon_memory.data_ptr(),
|
||
|
)
|
||
|
|
||
|
print(f"difference: {torch.abs(epsilon_memory - set_value).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
def test_kernel_pxy_reciprocal(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
):
|
||
|
print("test_kernel_pxy_reciprocal")
|
||
|
# void test_kernel_pxy_reciprocal(size_t dim_x, size_t dim_y,
|
||
|
# size_t number_of_pattern, size_t h_dim,
|
||
|
# bool display_debug, int64_t pxy_memory_addr);
|
||
|
|
||
|
epsilon_memory = torch.rand(
|
||
|
(number_of_pattern, dim_x, dim_y),
|
||
|
dtype=torch.float32,
|
||
|
device=torch.device("cuda:0"),
|
||
|
)
|
||
|
|
||
|
epsilon_memory_copy = epsilon_memory.clone()
|
||
|
|
||
|
my_kernels = TestKernel()
|
||
|
my_kernels.test_kernel_pxy_reciprocal(
|
||
|
dim_x, dim_y, number_of_pattern, h_dim, display_debug, epsilon_memory.data_ptr()
|
||
|
)
|
||
|
|
||
|
epsilon_memory_copy = 1.0 / epsilon_memory_copy
|
||
|
|
||
|
print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
|
||
|
print()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
input_set = 0
|
||
|
|
||
|
for test_id in range(0, 13):
|
||
|
print(f"Test-ID: {test_id}")
|
||
|
|
||
|
number_of_spikes: int = int(1600)
|
||
|
spike_time: int = int(random.random() * number_of_spikes)
|
||
|
|
||
|
if input_set == 0:
|
||
|
h_dim: int = int(32)
|
||
|
s_dim: int = int(1 * 5 * 5)
|
||
|
number_of_pattern: int = int(24)
|
||
|
dim_x: int = int(20)
|
||
|
dim_y: int = int(20)
|
||
|
display_debug: int = bool(False)
|
||
|
else:
|
||
|
h_dim = int(10)
|
||
|
s_dim = int(32 * 20 * 20)
|
||
|
number_of_pattern = int(24)
|
||
|
dim_x = int(1)
|
||
|
dim_y = int(1)
|
||
|
display_debug = bool(False)
|
||
|
|
||
|
if test_id == 0:
|
||
|
test_kernel_pxy_reciprocal(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 1:
|
||
|
test_kernel_pxy_set_to_v(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 2:
|
||
|
test_kernel_pxy_plus_v(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 3:
|
||
|
test_kernel_pxy_times_v(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 4:
|
||
|
test_kernel_pxy_time_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 5:
|
||
|
test_kernel_phxy_plus_phxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 6:
|
||
|
test_kernel_phxy_times_phxy_equals_phxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 7:
|
||
|
test_kernel_phxy_times_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 8:
|
||
|
test_kernel_phxy_plus_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 9:
|
||
|
test_kernel_phxy_fill_with_h(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 10:
|
||
|
test_kernel_phxy_one_over_sum_into_pxy(
|
||
|
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
|
||
|
)
|
||
|
elif test_id == 11:
|
||
|
test_kernel_phxy_fill_with_spike_selected_w(
|
||
|
h_dim,
|
||
|
s_dim,
|
||
|
number_of_pattern,
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
display_debug,
|
||
|
spike_time,
|
||
|
number_of_spikes,
|
||
|
)
|
||
|
elif test_id == 12:
|
||
|
test_kernel_pxy_times_spike_selected_sxy(
|
||
|
h_dim,
|
||
|
s_dim,
|
||
|
number_of_pattern,
|
||
|
dim_x,
|
||
|
dim_y,
|
||
|
display_debug,
|
||
|
spike_time,
|
||
|
number_of_spikes,
|
||
|
)
|