pytorch-sbs/network/CPP_Cuda_new_preview/test_kernel.py

import torch
import math
import random

from PyTestKernel import TestKernel

# TODO: kernel_phxy_plus_pxy, kernel_phxy_times_pxy,
# kernel_phxy_fill_h, kernel_phxy_one_over_sum_into_pxy,
# test_kernel_phxy_fill_with_spike_selected_w => 4D index

# pxy = number_of_pattern * dim_x * dim_y
# phxy = number_of_pattern * h_dim * dim_x * dim_y
# sxy = s_dim * dim_x * dim_y


def test_kernel_pxy_times_spike_selected_sxy(
    h_dim,
    s_dim,
    number_of_pattern,
    dim_x,
    dim_y,
    display_debug,
    spike_time,
    number_of_spikes,
):
    print("test_kernel_pxy_times_spike_selected_sxy")
    #   void test_kernel_pxy_times_spike_selected_sxy(
    #       size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
    #       bool display_debug, int64_t pxy_memory_addr, int64_t sxy_memory_addr,
    #       int64_t spike_memory_addr, size_t spike_time, size_t spike_dim_c0,
    #       size_t spike_dim_c1, size_t spike_dim_c2, size_t pxy_dim_c0,
    #       size_t pxy_dim_c1, size_t sxy_dim_c0, size_t sxy_dim_c1);

    memory_pxy = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_sxy = torch.rand(
        (s_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_spikes = (
        torch.rand(
            (number_of_pattern, number_of_spikes, dim_x, dim_y),
            dtype=torch.float32,
            device=torch.device("cuda:0"),
        )
        * float(s_dim)
    ).type(dtype=torch.int64)

    pxy_dim_c0 = int(dim_x * dim_y)
    pxy_dim_c1 = int(dim_y)

    sxy_dim_c0 = int(dim_x * dim_y)
    sxy_dim_c1 = int(dim_y)

    spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)
    spike_dim_c1 = int(dim_x * dim_y)
    spike_dim_c2 = int(dim_y)

    memory_pxy_copy = memory_pxy.clone()
    memory_sxy_copy = memory_sxy.clone()
    memory_spikes_copy = memory_spikes.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_pxy_times_spike_selected_sxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        memory_pxy.data_ptr(),
        memory_sxy.data_ptr(),
        memory_spikes.data_ptr(),
        spike_time,
        spike_dim_c0,
        spike_dim_c1,
        spike_dim_c2,
        pxy_dim_c0,
        pxy_dim_c1,
        sxy_dim_c0,
        sxy_dim_c1,
    )

    for p in range(0, memory_spikes_copy.shape[0]):
        for x in range(0, memory_spikes_copy.shape[2]):
            for y in range(0, memory_spikes_copy.shape[3]):
                spike = memory_spikes_copy[p, spike_time, x, y]

                if spike >= 0:
                    memory_pxy_copy[p, x, y] *= memory_sxy_copy[spike, x, y]
                else:
                    memory_pxy_copy[p, x, y] = 0.0
    print(f"difference: {torch.abs(memory_pxy - memory_pxy_copy).max():.4e}")
    print()


def test_kernel_phxy_fill_with_spike_selected_w(
    h_dim,
    s_dim,
    number_of_pattern,
    dim_x,
    dim_y,
    display_debug,
    spike_time,
    number_of_spikes,
):
    print("test_kernel_phxy_fill_with_spike_selected_w")
    #   void test_kernel_phxy_fill_with_spike_selected_w(
    #       size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
    #       bool display_debug, size_t spike_time, size_t weights_dim_c0,
    #       size_t spike_dim_c0, size_t spike_dim_c1, size_t spike_dim_c2,
    #       size_t phxy_dim_c0, size_t phxy_dim_c1, size_t phxy_dim_c2,
    #       int64_t phxy_memory_addr, int64_t weight_memory_addr,
    #       int64_t spike_memory_addr);

    memory_phxy = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_w = torch.rand(
        (s_dim, h_dim),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_spikes = (
        torch.rand(
            (number_of_pattern, number_of_spikes, dim_x, dim_y),
            dtype=torch.float32,
            device=torch.device("cuda:0"),
        )
        * float(s_dim)
    ).type(dtype=torch.int64)

    phxy_dim_c0 = int(h_dim * dim_x * dim_y)
    phxy_dim_c1 = int(dim_x * dim_y)
    phxy_dim_c2 = int(dim_y)

    spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)
    spike_dim_c1 = int(dim_x * dim_y)
    spike_dim_c2 = int(dim_y)

    weights_dim_c0 = int(h_dim)

    memory_phxy_copy = memory_phxy.clone()
    memory_w_copy = memory_w.clone()
    memory_spikes_copy = memory_spikes.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_fill_with_spike_selected_w(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        spike_time,
        weights_dim_c0,
        spike_dim_c0,
        spike_dim_c1,
        spike_dim_c2,
        phxy_dim_c0,
        phxy_dim_c1,
        phxy_dim_c2,
        memory_phxy.data_ptr(),
        memory_w.data_ptr(),
        memory_spikes.data_ptr(),
    )

    for p in range(0, memory_spikes_copy.shape[0]):
        for x in range(0, memory_spikes_copy.shape[2]):
            for y in range(0, memory_spikes_copy.shape[3]):
                spike = memory_spikes_copy[p, spike_time, x, y]

                if spike >= 0:
                    memory_phxy_copy[p, :, x, y] = memory_w_copy[spike, :]
                else:
                    memory_phxy_copy[p, :, x, y] = 0.0

    print(f"difference: {torch.abs(memory_phxy - memory_phxy_copy).max():.4e}")
    print()


def test_kernel_phxy_one_over_sum_into_pxy(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_phxy_one_over_sum_into_pxy")
    #   void test_kernel_phxy_one_over_sum_into_pxy(
    #       size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,
    #       bool display_debug, size_t phxy_dim_c0, size_t phxy_dim_c1,
    #       size_t phxy_dim_c2, size_t pxy_dim_c0, size_t pxy_dim_c1,
    #       int64_t phxy_memory_addr, int64_t pxy_memory_addr);

    memory_a = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_b = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    pxy_dim_c0 = int(dim_x * dim_y)
    pxy_dim_c1 = int(dim_y)

    phxy_dim_c0 = int(h_dim * dim_x * dim_y)
    phxy_dim_c1 = int(dim_x * dim_y)
    phxy_dim_c2 = int(dim_y)

    memory_a_copy = memory_a.clone()
    memory_b_copy = memory_b.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_one_over_sum_into_pxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        phxy_dim_c0,
        phxy_dim_c1,
        phxy_dim_c2,
        pxy_dim_c0,
        pxy_dim_c1,
        memory_a.data_ptr(),
        memory_b.data_ptr(),
    )
    memory_temp_copy = memory_a_copy.sum(dim=1)

    memory_b_copy = torch.where(memory_temp_copy > 1e-10, 1.0 / memory_temp_copy, 0.0)
    print(
        "Remember: \nAn error of 0 is very unlikely due to different \nrandom order of values for the sum."
    )
    print(f"difference: {torch.abs(memory_b - memory_b_copy).max():.4e}")
    print()


def test_kernel_phxy_fill_with_h(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_phxy_fill_with_h")
    #   void test_kernel_phxy_fill_with_h(size_t dim_x, size_t dim_y,
    #                                     size_t number_of_pattern, size_t h_dim,
    #                                     bool display_debug, size_t phxy_dim_c0,
    #                                     size_t phxy_dim_c1, size_t phxy_dim_c2,
    #                                     int64_t h_memory_addr,
    #                                     int64_t phxy_memory_addr);

    memory_a = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_h = torch.rand(
        (h_dim),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    phxy_dim_c0 = int(h_dim * dim_x * dim_y)
    phxy_dim_c1 = int(dim_x * dim_y)
    phxy_dim_c2 = int(dim_y)

    memory_a_copy = memory_a.clone()
    memory_h_copy = memory_h.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_fill_with_h(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        phxy_dim_c0,
        phxy_dim_c1,
        phxy_dim_c2,
        memory_h.data_ptr(),
        memory_a.data_ptr(),
    )
    for p in range(0, memory_a_copy.shape[0]):
        for x in range(0, memory_a_copy.shape[2]):
            for y in range(0, memory_a_copy.shape[3]):
                memory_a_copy[p, :, x, y] = memory_h_copy

    print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
    print()


def test_kernel_phxy_plus_pxy(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_phxy_plus_pxy")
    #   void test_kernel_phxy_plus_pxy(size_t dim_x, size_t dim_y,
    #                                  size_t number_of_pattern, size_t h_dim,
    #                                  bool display_debug, size_t phxy_dim_c0,
    #                                  size_t phxy_dim_c1, size_t phxy_dim_c2,
    #                                  size_t pxy_dim_c0, size_t pxy_dim_c1,
    #                                  int64_t phxy_memory_addr,
    #                                  int64_t pxy_memory_addr);

    memory_a = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_b = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    pxy_dim_c0 = int(dim_x * dim_y)
    pxy_dim_c1 = int(dim_y)

    phxy_dim_c0 = int(h_dim * dim_x * dim_y)
    phxy_dim_c1 = int(dim_x * dim_y)
    phxy_dim_c2 = int(dim_y)

    memory_a_copy = memory_a.clone()
    memory_b_copy = memory_b.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_plus_pxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        phxy_dim_c0,
        phxy_dim_c1,
        phxy_dim_c2,
        pxy_dim_c0,
        pxy_dim_c1,
        memory_a.data_ptr(),
        memory_b.data_ptr(),
    )

    memory_a_copy += memory_b_copy.unsqueeze(1)

    print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
    print()


def test_kernel_phxy_times_pxy(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_phxy_times_pxy")
    #   void test_kernel_phxy_times_pxy(size_t dim_x, size_t dim_y,
    #                                   size_t number_of_pattern, size_t h_dim,
    #                                   bool display_debug, size_t phxy_dim_c0,
    #                                   size_t phxy_dim_c1, size_t phxy_dim_c2,
    #                                   size_t pxy_dim_c0, size_t pxy_dim_c1,
    #                                   int64_t phxy_memory_addr,
    #                                   int64_t pxy_memory_addr);

    memory_a = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_b = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    pxy_dim_c0 = int(dim_x * dim_y)
    pxy_dim_c1 = int(dim_y)

    phxy_dim_c0 = int(h_dim * dim_x * dim_y)
    phxy_dim_c1 = int(dim_x * dim_y)
    phxy_dim_c2 = int(dim_y)

    memory_a_copy = memory_a.clone()
    memory_b_copy = memory_b.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_times_pxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        phxy_dim_c0,
        phxy_dim_c1,
        phxy_dim_c2,
        pxy_dim_c0,
        pxy_dim_c1,
        memory_a.data_ptr(),
        memory_b.data_ptr(),
    )

    memory_a_copy *= memory_b_copy.unsqueeze(1)

    print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
    print()


def test_kernel_phxy_times_phxy_equals_phxy(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_phxy_times_phxy_equals_phxy")
    #   void test_kernel_phxy_times_phxy_equals_phxy(size_t dim_x, size_t dim_y,
    #                                                size_t number_of_pattern,
    #                                                size_t h_dim, bool display_debug,
    #                                                int64_t phxy_memory_a_addr,
    #                                                int64_t phxy_memory_b_addr,
    #                                                int64_t phxy_memory_out_addr);

    memory_a = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_b = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_out = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_a_copy = memory_a.clone()
    memory_b_copy = memory_b.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_times_phxy_equals_phxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        memory_a.data_ptr(),
        memory_b.data_ptr(),
        memory_out.data_ptr(),
    )

    memory_out_copy = memory_a_copy * memory_b_copy

    print(f"difference: {torch.abs(memory_out - memory_out_copy).max():.4e}")
    print()


def test_kernel_phxy_plus_phxy(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_pxy_time_pxy")
    #   void test_kernel_phxy_plus_phxy(size_t dim_x, size_t dim_y,
    #                                   size_t number_of_pattern, size_t h_dim,
    #                                   bool display_debug,
    #                                   int64_t phxy_memory_a_addr,
    #                                   int64_t phxy_memory_b_addr);

    memory_a = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_b = torch.rand(
        (number_of_pattern, h_dim, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    memory_a_copy = memory_a.clone()
    memory_b_copy = memory_b.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_phxy_plus_phxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        memory_a.data_ptr(),
        memory_b.data_ptr(),
    )

    memory_a_copy += memory_b_copy

    print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")
    print()


def test_kernel_pxy_time_pxy(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_pxy_time_pxy")

    #   void test_kernel_pxy_time_pxy(size_t dim_x, size_t dim_y,
    #                                 size_t number_of_pattern, size_t h_dim,
    #                                 bool display_debug, int64_t pxy_memory_a_addr,
    #                                 int64_t pxy_memory_b_addr);

    epsilon_memory_a = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    epsilon_memory_b = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    epsilon_memory_a_copy = epsilon_memory_a.clone()
    epsilon_memory_b_copy = epsilon_memory_b.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_pxy_time_pxy(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        epsilon_memory_a.data_ptr(),
        epsilon_memory_b.data_ptr(),
    )

    epsilon_memory_a_copy *= epsilon_memory_b_copy

    print(
        f"difference: {torch.abs(epsilon_memory_a - epsilon_memory_a_copy).max():.4e}"
    )
    print()


def test_kernel_pxy_times_v(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_pxy_times_v")

    #   void test_kernel_pxy_times_v(size_t dim_x, size_t dim_y,
    #                                size_t number_of_pattern, size_t h_dim,
    #                                bool display_debug, float value,
    #                                int64_t pxy_memory_addr);

    epsilon_memory = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    epsilon_memory_copy = epsilon_memory.clone()
    value = float(math.pi)

    my_kernels = TestKernel()
    my_kernels.test_kernel_pxy_times_v(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        value,
        epsilon_memory.data_ptr(),
    )

    epsilon_memory_copy = epsilon_memory_copy * value

    print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
    print()


def test_kernel_pxy_plus_v(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_pxy_plus_v")
    #   void test_kernel_pxy_plus_v(size_t dim_x, size_t dim_y,
    #                               size_t number_of_pattern, size_t h_dim,
    #                               bool display_debug, float value,
    #                               int64_t pxy_memory_addr);

    epsilon_memory = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    epsilon_memory_copy = epsilon_memory.clone()
    value = float(math.pi)

    my_kernels = TestKernel()
    my_kernels.test_kernel_pxy_plus_v(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        value,
        epsilon_memory.data_ptr(),
    )

    epsilon_memory_copy = epsilon_memory_copy + value

    print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
    print()


def test_kernel_pxy_set_to_v(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):

    print("test_kernel_pxy_set_to_v")
    #   void test_kernel_pxy_set_to_v(size_t dim_x, size_t dim_y,
    #                                 size_t number_of_pattern, size_t h_dim,
    #                                 bool display_debug, float value,
    #                                 int64_t pxy_memory_addr);

    set_value = float(math.pi)

    epsilon_memory = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    my_kernels = TestKernel()
    my_kernels.test_kernel_pxy_set_to_v(
        dim_x,
        dim_y,
        number_of_pattern,
        h_dim,
        display_debug,
        set_value,
        epsilon_memory.data_ptr(),
    )

    print(f"difference: {torch.abs(epsilon_memory - set_value).max():.4e}")
    print()


def test_kernel_pxy_reciprocal(
    h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
):
    print("test_kernel_pxy_reciprocal")
    #   void test_kernel_pxy_reciprocal(size_t dim_x, size_t dim_y,
    #                                   size_t number_of_pattern, size_t h_dim,
    #                                   bool display_debug, int64_t pxy_memory_addr);

    epsilon_memory = torch.rand(
        (number_of_pattern, dim_x, dim_y),
        dtype=torch.float32,
        device=torch.device("cuda:0"),
    )

    epsilon_memory_copy = epsilon_memory.clone()

    my_kernels = TestKernel()
    my_kernels.test_kernel_pxy_reciprocal(
        dim_x, dim_y, number_of_pattern, h_dim, display_debug, epsilon_memory.data_ptr()
    )

    epsilon_memory_copy = 1.0 / epsilon_memory_copy

    print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")
    print()


if __name__ == "__main__":
    input_set = 0

    for test_id in range(0, 13):
        print(f"Test-ID: {test_id}")

        number_of_spikes: int = int(1600)
        spike_time: int = int(random.random() * number_of_spikes)

        if input_set == 0:
            h_dim: int = int(32)
            s_dim: int = int(1 * 5 * 5)
            number_of_pattern: int = int(24)
            dim_x: int = int(20)
            dim_y: int = int(20)
            display_debug: int = bool(False)
        else:
            h_dim = int(10)
            s_dim = int(32 * 20 * 20)
            number_of_pattern = int(24)
            dim_x = int(1)
            dim_y = int(1)
            display_debug = bool(False)

        if test_id == 0:
            test_kernel_pxy_reciprocal(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 1:
            test_kernel_pxy_set_to_v(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 2:
            test_kernel_pxy_plus_v(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 3:
            test_kernel_pxy_times_v(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 4:
            test_kernel_pxy_time_pxy(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 5:
            test_kernel_phxy_plus_phxy(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 6:
            test_kernel_phxy_times_phxy_equals_phxy(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 7:
            test_kernel_phxy_times_pxy(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 8:
            test_kernel_phxy_plus_pxy(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 9:
            test_kernel_phxy_fill_with_h(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 10:
            test_kernel_phxy_one_over_sum_into_pxy(
                h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug
            )
        elif test_id == 11:
            test_kernel_phxy_fill_with_spike_selected_w(
                h_dim,
                s_dim,
                number_of_pattern,
                dim_x,
                dim_y,
                display_debug,
                spike_time,
                number_of_spikes,
            )
        elif test_id == 12:
            test_kernel_pxy_times_spike_selected_sxy(
                h_dim,
                s_dim,
                number_of_pattern,
                dim_x,
                dim_y,
                display_debug,
                spike_time,
                number_of_spikes,
            )
Add files via upload 2023-01-13 21:33:57 +01:00			`import torch`
			`import math`
			`import random`

			`from PyTestKernel import TestKernel`

			`# TODO: kernel_phxy_plus_pxy, kernel_phxy_times_pxy,`
			`# kernel_phxy_fill_h, kernel_phxy_one_over_sum_into_pxy,`
			`# test_kernel_phxy_fill_with_spike_selected_w => 4D index`

			`# pxy = number_of_pattern * dim_x * dim_y`
			`# phxy = number_of_pattern * h_dim * dim_x * dim_y`
			`# sxy = s_dim * dim_x * dim_y`


			`def test_kernel_pxy_times_spike_selected_sxy(`
			`h_dim,`
			`s_dim,`
			`number_of_pattern,`
			`dim_x,`
			`dim_y,`
			`display_debug,`
			`spike_time,`
			`number_of_spikes,`
			`):`
			`print("test_kernel_pxy_times_spike_selected_sxy")`
			`# void test_kernel_pxy_times_spike_selected_sxy(`
			`# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, int64_t pxy_memory_addr, int64_t sxy_memory_addr,`
			`# int64_t spike_memory_addr, size_t spike_time, size_t spike_dim_c0,`
			`# size_t spike_dim_c1, size_t spike_dim_c2, size_t pxy_dim_c0,`
			`# size_t pxy_dim_c1, size_t sxy_dim_c0, size_t sxy_dim_c1);`

			`memory_pxy = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_sxy = torch.rand(`
			`(s_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_spikes = (`
			`torch.rand(`
			`(number_of_pattern, number_of_spikes, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`
			`* float(s_dim)`
			`).type(dtype=torch.int64)`

			`pxy_dim_c0 = int(dim_x * dim_y)`
			`pxy_dim_c1 = int(dim_y)`

			`sxy_dim_c0 = int(dim_x * dim_y)`
			`sxy_dim_c1 = int(dim_y)`

			`spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)`
			`spike_dim_c1 = int(dim_x * dim_y)`
			`spike_dim_c2 = int(dim_y)`

			`memory_pxy_copy = memory_pxy.clone()`
			`memory_sxy_copy = memory_sxy.clone()`
			`memory_spikes_copy = memory_spikes.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_pxy_times_spike_selected_sxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`memory_pxy.data_ptr(),`
			`memory_sxy.data_ptr(),`
			`memory_spikes.data_ptr(),`
			`spike_time,`
			`spike_dim_c0,`
			`spike_dim_c1,`
			`spike_dim_c2,`
			`pxy_dim_c0,`
			`pxy_dim_c1,`
			`sxy_dim_c0,`
			`sxy_dim_c1,`
			`)`

			`for p in range(0, memory_spikes_copy.shape[0]):`
			`for x in range(0, memory_spikes_copy.shape[2]):`
			`for y in range(0, memory_spikes_copy.shape[3]):`
			`spike = memory_spikes_copy[p, spike_time, x, y]`

			`if spike >= 0:`
			`memory_pxy_copy[p, x, y] *= memory_sxy_copy[spike, x, y]`
			`else:`
			`memory_pxy_copy[p, x, y] = 0.0`
			`print(f"difference: {torch.abs(memory_pxy - memory_pxy_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_fill_with_spike_selected_w(`
			`h_dim,`
			`s_dim,`
			`number_of_pattern,`
			`dim_x,`
			`dim_y,`
			`display_debug,`
			`spike_time,`
			`number_of_spikes,`
			`):`
			`print("test_kernel_phxy_fill_with_spike_selected_w")`
			`# void test_kernel_phxy_fill_with_spike_selected_w(`
			`# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, size_t spike_time, size_t weights_dim_c0,`
			`# size_t spike_dim_c0, size_t spike_dim_c1, size_t spike_dim_c2,`
			`# size_t phxy_dim_c0, size_t phxy_dim_c1, size_t phxy_dim_c2,`
			`# int64_t phxy_memory_addr, int64_t weight_memory_addr,`
			`# int64_t spike_memory_addr);`

			`memory_phxy = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_w = torch.rand(`
			`(s_dim, h_dim),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_spikes = (`
			`torch.rand(`
			`(number_of_pattern, number_of_spikes, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`
			`* float(s_dim)`
			`).type(dtype=torch.int64)`

			`phxy_dim_c0 = int(h_dim * dim_x * dim_y)`
			`phxy_dim_c1 = int(dim_x * dim_y)`
			`phxy_dim_c2 = int(dim_y)`

			`spike_dim_c0 = int(number_of_spikes * dim_x * dim_y)`
			`spike_dim_c1 = int(dim_x * dim_y)`
			`spike_dim_c2 = int(dim_y)`

			`weights_dim_c0 = int(h_dim)`

			`memory_phxy_copy = memory_phxy.clone()`
			`memory_w_copy = memory_w.clone()`
			`memory_spikes_copy = memory_spikes.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_fill_with_spike_selected_w(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`spike_time,`
			`weights_dim_c0,`
			`spike_dim_c0,`
			`spike_dim_c1,`
			`spike_dim_c2,`
			`phxy_dim_c0,`
			`phxy_dim_c1,`
			`phxy_dim_c2,`
			`memory_phxy.data_ptr(),`
			`memory_w.data_ptr(),`
			`memory_spikes.data_ptr(),`
			`)`

			`for p in range(0, memory_spikes_copy.shape[0]):`
			`for x in range(0, memory_spikes_copy.shape[2]):`
			`for y in range(0, memory_spikes_copy.shape[3]):`
			`spike = memory_spikes_copy[p, spike_time, x, y]`

			`if spike >= 0:`
			`memory_phxy_copy[p, :, x, y] = memory_w_copy[spike, :]`
			`else:`
			`memory_phxy_copy[p, :, x, y] = 0.0`

			`print(f"difference: {torch.abs(memory_phxy - memory_phxy_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_one_over_sum_into_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_phxy_one_over_sum_into_pxy")`
			`# void test_kernel_phxy_one_over_sum_into_pxy(`
			`# size_t dim_x, size_t dim_y, size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, size_t phxy_dim_c0, size_t phxy_dim_c1,`
			`# size_t phxy_dim_c2, size_t pxy_dim_c0, size_t pxy_dim_c1,`
			`# int64_t phxy_memory_addr, int64_t pxy_memory_addr);`

			`memory_a = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_b = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`pxy_dim_c0 = int(dim_x * dim_y)`
			`pxy_dim_c1 = int(dim_y)`

			`phxy_dim_c0 = int(h_dim * dim_x * dim_y)`
			`phxy_dim_c1 = int(dim_x * dim_y)`
			`phxy_dim_c2 = int(dim_y)`

			`memory_a_copy = memory_a.clone()`
			`memory_b_copy = memory_b.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_one_over_sum_into_pxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`phxy_dim_c0,`
			`phxy_dim_c1,`
			`phxy_dim_c2,`
			`pxy_dim_c0,`
			`pxy_dim_c1,`
			`memory_a.data_ptr(),`
			`memory_b.data_ptr(),`
			`)`
			`memory_temp_copy = memory_a_copy.sum(dim=1)`

			`memory_b_copy = torch.where(memory_temp_copy > 1e-10, 1.0 / memory_temp_copy, 0.0)`
			`print(`
			`"Remember: \nAn error of 0 is very unlikely due to different \nrandom order of values for the sum."`
			`)`
			`print(f"difference: {torch.abs(memory_b - memory_b_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_fill_with_h(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_phxy_fill_with_h")`
			`# void test_kernel_phxy_fill_with_h(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, size_t phxy_dim_c0,`
			`# size_t phxy_dim_c1, size_t phxy_dim_c2,`
			`# int64_t h_memory_addr,`
			`# int64_t phxy_memory_addr);`

			`memory_a = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_h = torch.rand(`
			`(h_dim),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`phxy_dim_c0 = int(h_dim * dim_x * dim_y)`
			`phxy_dim_c1 = int(dim_x * dim_y)`
			`phxy_dim_c2 = int(dim_y)`

			`memory_a_copy = memory_a.clone()`
			`memory_h_copy = memory_h.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_fill_with_h(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`phxy_dim_c0,`
			`phxy_dim_c1,`
			`phxy_dim_c2,`
			`memory_h.data_ptr(),`
			`memory_a.data_ptr(),`
			`)`
			`for p in range(0, memory_a_copy.shape[0]):`
			`for x in range(0, memory_a_copy.shape[2]):`
			`for y in range(0, memory_a_copy.shape[3]):`
			`memory_a_copy[p, :, x, y] = memory_h_copy`

			`print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_plus_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_phxy_plus_pxy")`
			`# void test_kernel_phxy_plus_pxy(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, size_t phxy_dim_c0,`
			`# size_t phxy_dim_c1, size_t phxy_dim_c2,`
			`# size_t pxy_dim_c0, size_t pxy_dim_c1,`
			`# int64_t phxy_memory_addr,`
			`# int64_t pxy_memory_addr);`

			`memory_a = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_b = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`pxy_dim_c0 = int(dim_x * dim_y)`
			`pxy_dim_c1 = int(dim_y)`

			`phxy_dim_c0 = int(h_dim * dim_x * dim_y)`
			`phxy_dim_c1 = int(dim_x * dim_y)`
			`phxy_dim_c2 = int(dim_y)`

			`memory_a_copy = memory_a.clone()`
			`memory_b_copy = memory_b.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_plus_pxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`phxy_dim_c0,`
			`phxy_dim_c1,`
			`phxy_dim_c2,`
			`pxy_dim_c0,`
			`pxy_dim_c1,`
			`memory_a.data_ptr(),`
			`memory_b.data_ptr(),`
			`)`

			`memory_a_copy += memory_b_copy.unsqueeze(1)`

			`print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_times_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_phxy_times_pxy")`
			`# void test_kernel_phxy_times_pxy(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, size_t phxy_dim_c0,`
			`# size_t phxy_dim_c1, size_t phxy_dim_c2,`
			`# size_t pxy_dim_c0, size_t pxy_dim_c1,`
			`# int64_t phxy_memory_addr,`
			`# int64_t pxy_memory_addr);`

			`memory_a = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_b = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`pxy_dim_c0 = int(dim_x * dim_y)`
			`pxy_dim_c1 = int(dim_y)`

			`phxy_dim_c0 = int(h_dim * dim_x * dim_y)`
			`phxy_dim_c1 = int(dim_x * dim_y)`
			`phxy_dim_c2 = int(dim_y)`

			`memory_a_copy = memory_a.clone()`
			`memory_b_copy = memory_b.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_times_pxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`phxy_dim_c0,`
			`phxy_dim_c1,`
			`phxy_dim_c2,`
			`pxy_dim_c0,`
			`pxy_dim_c1,`
			`memory_a.data_ptr(),`
			`memory_b.data_ptr(),`
			`)`

			`memory_a_copy *= memory_b_copy.unsqueeze(1)`

			`print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_times_phxy_equals_phxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_phxy_times_phxy_equals_phxy")`
			`# void test_kernel_phxy_times_phxy_equals_phxy(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern,`
			`# size_t h_dim, bool display_debug,`
			`# int64_t phxy_memory_a_addr,`
			`# int64_t phxy_memory_b_addr,`
			`# int64_t phxy_memory_out_addr);`

			`memory_a = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_b = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_out = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_a_copy = memory_a.clone()`
			`memory_b_copy = memory_b.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_times_phxy_equals_phxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`memory_a.data_ptr(),`
			`memory_b.data_ptr(),`
			`memory_out.data_ptr(),`
			`)`

			`memory_out_copy = memory_a_copy * memory_b_copy`

			`print(f"difference: {torch.abs(memory_out - memory_out_copy).max():.4e}")`
			`print()`


			`def test_kernel_phxy_plus_phxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_pxy_time_pxy")`
			`# void test_kernel_phxy_plus_phxy(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug,`
			`# int64_t phxy_memory_a_addr,`
			`# int64_t phxy_memory_b_addr);`

			`memory_a = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_b = torch.rand(`
			`(number_of_pattern, h_dim, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`memory_a_copy = memory_a.clone()`
			`memory_b_copy = memory_b.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_phxy_plus_phxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`memory_a.data_ptr(),`
			`memory_b.data_ptr(),`
			`)`

			`memory_a_copy += memory_b_copy`

			`print(f"difference: {torch.abs(memory_a - memory_a_copy).max():.4e}")`
			`print()`


			`def test_kernel_pxy_time_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_pxy_time_pxy")`

			`# void test_kernel_pxy_time_pxy(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, int64_t pxy_memory_a_addr,`
			`# int64_t pxy_memory_b_addr);`

			`epsilon_memory_a = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`epsilon_memory_b = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`epsilon_memory_a_copy = epsilon_memory_a.clone()`
			`epsilon_memory_b_copy = epsilon_memory_b.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_pxy_time_pxy(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`epsilon_memory_a.data_ptr(),`
			`epsilon_memory_b.data_ptr(),`
			`)`

			`epsilon_memory_a_copy *= epsilon_memory_b_copy`

			`print(`
			`f"difference: {torch.abs(epsilon_memory_a - epsilon_memory_a_copy).max():.4e}"`
			`)`
			`print()`


			`def test_kernel_pxy_times_v(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_pxy_times_v")`

			`# void test_kernel_pxy_times_v(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, float value,`
			`# int64_t pxy_memory_addr);`

			`epsilon_memory = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`epsilon_memory_copy = epsilon_memory.clone()`
			`value = float(math.pi)`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_pxy_times_v(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`value,`
			`epsilon_memory.data_ptr(),`
			`)`

			`epsilon_memory_copy = epsilon_memory_copy * value`

			`print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")`
			`print()`


			`def test_kernel_pxy_plus_v(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_pxy_plus_v")`
			`# void test_kernel_pxy_plus_v(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, float value,`
			`# int64_t pxy_memory_addr);`

			`epsilon_memory = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`epsilon_memory_copy = epsilon_memory.clone()`
			`value = float(math.pi)`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_pxy_plus_v(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`value,`
			`epsilon_memory.data_ptr(),`
			`)`

			`epsilon_memory_copy = epsilon_memory_copy + value`

			`print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")`
			`print()`


			`def test_kernel_pxy_set_to_v(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`

			`print("test_kernel_pxy_set_to_v")`
			`# void test_kernel_pxy_set_to_v(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, float value,`
			`# int64_t pxy_memory_addr);`

			`set_value = float(math.pi)`

			`epsilon_memory = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_pxy_set_to_v(`
			`dim_x,`
			`dim_y,`
			`number_of_pattern,`
			`h_dim,`
			`display_debug,`
			`set_value,`
			`epsilon_memory.data_ptr(),`
			`)`

			`print(f"difference: {torch.abs(epsilon_memory - set_value).max():.4e}")`
			`print()`


			`def test_kernel_pxy_reciprocal(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`):`
			`print("test_kernel_pxy_reciprocal")`
			`# void test_kernel_pxy_reciprocal(size_t dim_x, size_t dim_y,`
			`# size_t number_of_pattern, size_t h_dim,`
			`# bool display_debug, int64_t pxy_memory_addr);`

			`epsilon_memory = torch.rand(`
			`(number_of_pattern, dim_x, dim_y),`
			`dtype=torch.float32,`
			`device=torch.device("cuda:0"),`
			`)`

			`epsilon_memory_copy = epsilon_memory.clone()`

			`my_kernels = TestKernel()`
			`my_kernels.test_kernel_pxy_reciprocal(`
			`dim_x, dim_y, number_of_pattern, h_dim, display_debug, epsilon_memory.data_ptr()`
			`)`

			`epsilon_memory_copy = 1.0 / epsilon_memory_copy`

			`print(f"difference: {torch.abs(epsilon_memory - epsilon_memory_copy).max():.4e}")`
			`print()`


			`if __name__ == "__main__":`
			`input_set = 0`

			`for test_id in range(0, 13):`
			`print(f"Test-ID: {test_id}")`

			`number_of_spikes: int = int(1600)`
			`spike_time: int = int(random.random() * number_of_spikes)`

			`if input_set == 0:`
			`h_dim: int = int(32)`
			`s_dim: int = int(1 * 5 * 5)`
			`number_of_pattern: int = int(24)`
			`dim_x: int = int(20)`
			`dim_y: int = int(20)`
			`display_debug: int = bool(False)`
			`else:`
			`h_dim = int(10)`
			`s_dim = int(32 * 20 * 20)`
			`number_of_pattern = int(24)`
			`dim_x = int(1)`
			`dim_y = int(1)`
			`display_debug = bool(False)`

			`if test_id == 0:`
			`test_kernel_pxy_reciprocal(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 1:`
			`test_kernel_pxy_set_to_v(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 2:`
			`test_kernel_pxy_plus_v(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 3:`
			`test_kernel_pxy_times_v(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 4:`
			`test_kernel_pxy_time_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 5:`
			`test_kernel_phxy_plus_phxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 6:`
			`test_kernel_phxy_times_phxy_equals_phxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 7:`
			`test_kernel_phxy_times_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 8:`
			`test_kernel_phxy_plus_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 9:`
			`test_kernel_phxy_fill_with_h(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 10:`
			`test_kernel_phxy_one_over_sum_into_pxy(`
			`h_dim, s_dim, number_of_pattern, dim_x, dim_y, display_debug`
			`)`
			`elif test_id == 11:`
			`test_kernel_phxy_fill_with_spike_selected_w(`
			`h_dim,`
			`s_dim,`
			`number_of_pattern,`
			`dim_x,`
			`dim_y,`
			`display_debug,`
			`spike_time,`
			`number_of_spikes,`
			`)`
			`elif test_id == 12:`
			`test_kernel_pxy_times_spike_selected_sxy(`
			`h_dim,`
			`s_dim,`
			`number_of_pattern,`
			`dim_x,`
			`dim_y,`
			`display_debug,`
			`spike_time,`
			`number_of_spikes,`
			`)`