pytorch-sbs/network/CPP_Cuda_new_preview/test_kernel.py

768 lines
23 KiB
Python
Raw Normal View History

2023-01-13 21:33:57 +01:00
import torch
import math
import random
from PyTestKernel import TestKernel
# TODO: kernel_phxy_plus_pxy, kernel_phxy_times_pxy,
# kernel_phxy_fill_h, kernel_phxy_one_over_sum_into_pxy,
# test_kernel_phxy_fill_with_spike_selected_w => 4D index
# pxy = number_of_pattern * dim_x * dim_y
# phxy = number_of_pattern * h_dim * dim_x * dim_y
# sxy = s_dim * dim_x * dim_y
def test_kernel_pxy_times_spike_selected_sxy(
h_dim,
s_dim,
number_of_pattern,
dim_x,
dim_y,
display_debug,
spike_time,
number_of_spikes,
):
print("test_kernel_pxy_times_spike_selected_sxy")
# void test_kernel_pxy_times_spike_selected_sxy(
# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
# bool display_debug, int64_t pxy_memory_addr, int64_t sxy_memory_addr,
# int64_t spike_memory_addr, size_t spike_time, size_t spike_dim_c0,
# size_t spike_dim_c1, size_t spike_dim_c2, size_t pxy_dim_c0,
# size_t pxy_dim_c1, size_t sxy_dim_c0, size_t sxy_dim_c1);
memory_pxy = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_sxy = torch.rand(
(s_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_spikes = (
torch.rand(
(number_of_pattern, number_of_spikes, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
* float(s_dim)
).type(dtype=torch.int64)
pxy_dim_c0 = int(dim_x * dim_y)
pxy_dim_c1 = int(dim_y)
sxy_dim_c0 = int(dim_x * dim_y)
sxy_dim_c1 = int(dim_y)
spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)
spike_dim_c1 = int(dim_x * dim_y)
spike_dim_c2 = int(dim_y)
memory_pxy_copy = memory_pxy.clone()
memory_sxy_copy = memory_sxy.clone()
memory_spikes_copy = memory_spikes.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_pxy_times_spike_selected_sxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
memory_pxy.data_ptr(),
memory_sxy.data_ptr(),
memory_spikes.data_ptr(),
spike_time,
spike_dim_c0,
spike_dim_c1,
spike_dim_c2,
pxy_dim_c0,
pxy_dim_c1,
sxy_dim_c0,
sxy_dim_c1,
)
for p in range(0, memory_spikes_copy.shape[0]):
for x in range(0, memory_spikes_copy.shape[2]):
for y in range(0, memory_spikes_copy.shape[3]):
spike = memory_spikes_copy[p, spike_time, x, y]
if spike >= 0:
memory_pxy_copy[p, x, y] *= memory_sxy_copy[spike, x, y]
else:
memory_pxy_copy[p, x, y] = 0.0
print(f"difference: {torch.abs(memory_pxy - memory_pxy_copy).max():.4e}")
print()
def test_kernel_phxy_fill_with_spike_selected_w(
h_dim,
s_dim,
number_of_pattern,
dim_x,
dim_y,
display_debug,
spike_time,
number_of_spikes,
):
print("test_kernel_phxy_fill_with_spike_selected_w")
# void test_kernel_phxy_fill_with_spike_selected_w(
# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
# bool display_debug, size_t spike_time, size_t weights_dim_c0,
# size_t spike_dim_c0, size_t spike_dim_c1, size_t spike_dim_c2,
# size_t phxy_dim_c0, size_t phxy_dim_c1, size_t phxy_dim_c2,
# int64_t phxy_memory_addr, int64_t weight_memory_addr,
# int64_t spike_memory_addr);
memory_phxy = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_w = torch.rand(
(s_dim, h_dim),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_spikes = (
torch.rand(
(number_of_pattern, number_of_spikes, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
* float(s_dim)
).type(dtype=torch.int64)
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
phxy_dim_c1 = int(dim_x * dim_y)
phxy_dim_c2 = int(dim_y)
spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)
spike_dim_c1 = int(dim_x * dim_y)
spike_dim_c2 = int(dim_y)
weights_dim_c0 = int(h_dim)
memory_phxy_copy = memory_phxy.clone()
memory_w_copy = memory_w.clone()
memory_spikes_copy = memory_spikes.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_fill_with_spike_selected_w(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
spike_time,
weights_dim_c0,
spike_dim_c0,
spike_dim_c1,
spike_dim_c2,
phxy_dim_c0,
phxy_dim_c1,
phxy_dim_c2,
memory_phxy.data_ptr(),
memory_w.data_ptr(),
memory_spikes.data_ptr(),
)
for p in range(0, memory_spikes_copy.shape[0]):
for x in range(0, memory_spikes_copy.shape[2]):
for y in range(0, memory_spikes_copy.shape[3]):
spike = memory_spikes_copy[p, spike_time, x, y]
if spike >= 0:
memory_phxy_copy[p, :, x, y] = memory_w_copy[spike, :]
else:
memory_phxy_copy[p, :, x, y] = 0.0
print(f"difference: {torch.abs(memory_phxy - memory_phxy_copy).max():.4e}")
print()
def test_kernel_phxy_one_over_sum_into_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_phxy_one_over_sum_into_pxy")
# void test_kernel_phxy_one_over_sum_into_pxy(
# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
# bool display_debug, size_t phxy_dim_c0, size_t phxy_dim_c1,
# size_t phxy_dim_c2, size_t pxy_dim_c0, size_t pxy_dim_c1,
# int64_t phxy_memory_addr, int64_t pxy_memory_addr);
memory_a = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_b = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
pxy_dim_c0 = int(dim_x * dim_y)
pxy_dim_c1 = int(dim_y)
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
phxy_dim_c1 = int(dim_x * dim_y)
phxy_dim_c2 = int(dim_y)
memory_a_copy = memory_a.clone()
memory_b_copy = memory_b.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_one_over_sum_into_pxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
phxy_dim_c0,
phxy_dim_c1,
phxy_dim_c2,
pxy_dim_c0,
pxy_dim_c1,
memory_a.data_ptr(),
memory_b.data_ptr(),
)
memory_temp_copy = memory_a_copy.sum(dim=1)
memory_b_copy = torch.where(memory_temp_copy > 1e-10, 1.0 / memory_temp_copy, 0.0)
print(
"Remember: \nAn error of 0 is very unlikely due to different \nrandom order of values for the sum."
)
print(f"difference: {torch.abs(memory_b - memory_b_copy).max():.4e}")
print()
def test_kernel_phxy_fill_with_h(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_phxy_fill_with_h")
# void test_kernel_phxy_fill_with_h(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, size_t phxy_dim_c0,
# size_t phxy_dim_c1, size_t phxy_dim_c2,
# int64_t h_memory_addr,
# int64_t phxy_memory_addr);
memory_a = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_h = torch.rand(
(h_dim),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
phxy_dim_c1 = int(dim_x * dim_y)
phxy_dim_c2 = int(dim_y)
memory_a_copy = memory_a.clone()
memory_h_copy = memory_h.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_fill_with_h(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
phxy_dim_c0,
phxy_dim_c1,
phxy_dim_c2,
memory_h.data_ptr(),
memory_a.data_ptr(),
)
for p in range(0, memory_a_copy.shape[0]):
for x in range(0, memory_a_copy.shape[2]):
for y in range(0, memory_a_copy.shape[3]):
memory_a_copy[p, :, x, y] = memory_h_copy
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
print()
def test_kernel_phxy_plus_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_phxy_plus_pxy")
# void test_kernel_phxy_plus_pxy(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, size_t phxy_dim_c0,
# size_t phxy_dim_c1, size_t phxy_dim_c2,
# size_t pxy_dim_c0, size_t pxy_dim_c1,
# int64_t phxy_memory_addr,
# int64_t pxy_memory_addr);
memory_a = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_b = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
pxy_dim_c0 = int(dim_x * dim_y)
pxy_dim_c1 = int(dim_y)
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
phxy_dim_c1 = int(dim_x * dim_y)
phxy_dim_c2 = int(dim_y)
memory_a_copy = memory_a.clone()
memory_b_copy = memory_b.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_plus_pxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
phxy_dim_c0,
phxy_dim_c1,
phxy_dim_c2,
pxy_dim_c0,
pxy_dim_c1,
memory_a.data_ptr(),
memory_b.data_ptr(),
)
memory_a_copy += memory_b_copy.unsqueeze(1)
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
print()
def test_kernel_phxy_times_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_phxy_times_pxy")
# void test_kernel_phxy_times_pxy(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, size_t phxy_dim_c0,
# size_t phxy_dim_c1, size_t phxy_dim_c2,
# size_t pxy_dim_c0, size_t pxy_dim_c1,
# int64_t phxy_memory_addr,
# int64_t pxy_memory_addr);
memory_a = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_b = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
pxy_dim_c0 = int(dim_x * dim_y)
pxy_dim_c1 = int(dim_y)
phxy_dim_c0 = int(h_dim * dim_x * dim_y)
phxy_dim_c1 = int(dim_x * dim_y)
phxy_dim_c2 = int(dim_y)
memory_a_copy = memory_a.clone()
memory_b_copy = memory_b.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_times_pxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
phxy_dim_c0,
phxy_dim_c1,
phxy_dim_c2,
pxy_dim_c0,
pxy_dim_c1,
memory_a.data_ptr(),
memory_b.data_ptr(),
)
memory_a_copy *= memory_b_copy.unsqueeze(1)
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
print()
def test_kernel_phxy_times_phxy_equals_phxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_phxy_times_phxy_equals_phxy")
# void test_kernel_phxy_times_phxy_equals_phxy(size_t dim_x, size_t dim_y,
# size_t number_of_pattern,
# size_t h_dim, bool display_debug,
# int64_t phxy_memory_a_addr,
# int64_t phxy_memory_b_addr,
# int64_t phxy_memory_out_addr);
memory_a = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_b = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_out = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_a_copy = memory_a.clone()
memory_b_copy = memory_b.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_times_phxy_equals_phxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
memory_a.data_ptr(),
memory_b.data_ptr(),
memory_out.data_ptr(),
)
memory_out_copy = memory_a_copy * memory_b_copy
print(f"difference: {torch.abs(memory_out - memory_out_copy).max():.4e}")
print()
def test_kernel_phxy_plus_phxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_pxy_time_pxy")
# void test_kernel_phxy_plus_phxy(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug,
# int64_t phxy_memory_a_addr,
# int64_t phxy_memory_b_addr);
memory_a = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_b = torch.rand(
(number_of_pattern, h_dim, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
memory_a_copy = memory_a.clone()
memory_b_copy = memory_b.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_phxy_plus_phxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
memory_a.data_ptr(),
memory_b.data_ptr(),
)
memory_a_copy += memory_b_copy
print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
print()
def test_kernel_pxy_time_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_pxy_time_pxy")
# void test_kernel_pxy_time_pxy(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, int64_t pxy_memory_a_addr,
# int64_t pxy_memory_b_addr);
epsilon_memory_a = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
epsilon_memory_b = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
epsilon_memory_a_copy = epsilon_memory_a.clone()
epsilon_memory_b_copy = epsilon_memory_b.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_pxy_time_pxy(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
epsilon_memory_a.data_ptr(),
epsilon_memory_b.data_ptr(),
)
epsilon_memory_a_copy *= epsilon_memory_b_copy
print(
f"difference: {torch.abs(epsilon_memory_a - epsilon_memory_a_copy).max():.4e}"
)
print()
def test_kernel_pxy_times_v(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_pxy_times_v")
# void test_kernel_pxy_times_v(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, float value,
# int64_t pxy_memory_addr);
epsilon_memory = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
epsilon_memory_copy = epsilon_memory.clone()
value = float(math.pi)
my_kernels = TestKernel()
my_kernels.test_kernel_pxy_times_v(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
value,
epsilon_memory.data_ptr(),
)
epsilon_memory_copy = epsilon_memory_copy * value
print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
print()
def test_kernel_pxy_plus_v(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_pxy_plus_v")
# void test_kernel_pxy_plus_v(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, float value,
# int64_t pxy_memory_addr);
epsilon_memory = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
epsilon_memory_copy = epsilon_memory.clone()
value = float(math.pi)
my_kernels = TestKernel()
my_kernels.test_kernel_pxy_plus_v(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
value,
epsilon_memory.data_ptr(),
)
epsilon_memory_copy = epsilon_memory_copy + value
print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
print()
def test_kernel_pxy_set_to_v(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_pxy_set_to_v")
# void test_kernel_pxy_set_to_v(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, float value,
# int64_t pxy_memory_addr);
set_value = float(math.pi)
epsilon_memory = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
my_kernels = TestKernel()
my_kernels.test_kernel_pxy_set_to_v(
dim_x,
dim_y,
number_of_pattern,
h_dim,
display_debug,
set_value,
epsilon_memory.data_ptr(),
)
print(f"difference: {torch.abs(epsilon_memory - set_value).max():.4e}")
print()
def test_kernel_pxy_reciprocal(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
print("test_kernel_pxy_reciprocal")
# void test_kernel_pxy_reciprocal(size_t dim_x, size_t dim_y,
# size_t number_of_pattern, size_t h_dim,
# bool display_debug, int64_t pxy_memory_addr);
epsilon_memory = torch.rand(
(number_of_pattern, dim_x, dim_y),
dtype=torch.float32,
device=torch.device("cuda:0"),
)
epsilon_memory_copy = epsilon_memory.clone()
my_kernels = TestKernel()
my_kernels.test_kernel_pxy_reciprocal(
dim_x, dim_y, number_of_pattern, h_dim, display_debug, epsilon_memory.data_ptr()
)
epsilon_memory_copy = 1.0 / epsilon_memory_copy
print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
print()
if __name__ == "__main__":
input_set = 0
for test_id in range(0, 13):
print(f"Test-ID: {test_id}")
number_of_spikes: int = int(1600)
spike_time: int = int(random.random() * number_of_spikes)
if input_set == 0:
h_dim: int = int(32)
s_dim: int = int(1 * 5 * 5)
number_of_pattern: int = int(24)
dim_x: int = int(20)
dim_y: int = int(20)
display_debug: int = bool(False)
else:
h_dim = int(10)
s_dim = int(32 * 20 * 20)
number_of_pattern = int(24)
dim_x = int(1)
dim_y = int(1)
display_debug = bool(False)
if test_id == 0:
test_kernel_pxy_reciprocal(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 1:
test_kernel_pxy_set_to_v(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 2:
test_kernel_pxy_plus_v(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 3:
test_kernel_pxy_times_v(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 4:
test_kernel_pxy_time_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 5:
test_kernel_phxy_plus_phxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 6:
test_kernel_phxy_times_phxy_equals_phxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 7:
test_kernel_phxy_times_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 8:
test_kernel_phxy_plus_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 9:
test_kernel_phxy_fill_with_h(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 10:
test_kernel_phxy_one_over_sum_into_pxy(
h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
)
elif test_id == 11:
test_kernel_phxy_fill_with_spike_selected_w(
h_dim,
s_dim,
number_of_pattern,
dim_x,
dim_y,
display_debug,
spike_time,
number_of_spikes,
)
elif test_id == 12:
test_kernel_pxy_times_spike_selected_sxy(
h_dim,
s_dim,
number_of_pattern,
dim_x,
dim_y,
display_debug,
spike_time,
number_of_spikes,
)