diff --git a/network/spike_generation_gpu_cpp_v1/Makefile b/network/spike_generation_gpu_cpp_v1/Makefile new file mode 100644 index 0000000..5817c60 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v1/Makefile @@ -0,0 +1,33 @@ +include ../.env +export + +name = SpikeGeneration +type = GPU + +PYPOSTFIX := $(shell $(PYBIN)python3-config --extension-suffix) +PYBIND11INCLUDE := $(shell $(PYBIN)python3 -m pybind11 --includes) +PARAMETERS_O = $(PARAMETERS_O_GPU) $(PYBIND11INCLUDE) +PARAMETERS_Linker = $(PARAMETERS_Linker_GPU) + +so_file = Py$(name)$(type)$(PYPOSTFIX) +pyi_file = Py$(name)$(type).pyi +all: ../$(so_file) + +$(O_DIRS)$(name)$(type).o: $(name)$(type).h $(name)$(type).cu + mkdir -p $(O_DIRS) + $(NVCC) $(PARAMETERS_O) -c $(name)$(type).cu -o $(O_DIRS)$(name)$(type).o + +$(O_DIRS)Py$(name)$(type).o: $(name)$(type).h Py$(name)$(type).cpp + mkdir -p $(O_DIRS) + $(NVCC) $(PARAMETERS_O) -c Py$(name)$(type).cpp -o $(O_DIRS)Py$(name)$(type).o + +../$(so_file): $(O_DIRS)$(name)$(type).o $(O_DIRS)Py$(name)$(type).o + $(NVCC) $(PARAMETERS_Linker) -o ../$(so_file) $(O_DIRS)$(name)$(type).o $(O_DIRS)Py$(name)$(type).o + + +####################### +clean: + rm -rf $(O_DIRS) + rm -f ../$(so_file) + rm -f ../$(pyi_file) + diff --git a/network/spike_generation_gpu_cpp_v1/PySpikeGenerationGPU.cpp b/network/spike_generation_gpu_cpp_v1/PySpikeGenerationGPU.cpp new file mode 100644 index 0000000..70f4581 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v1/PySpikeGenerationGPU.cpp @@ -0,0 +1,19 @@ + +#include + +#include "SpikeGenerationGPU.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PySpikeGenerationGPU, m) +{ + m.doc() = "SpikeGenerationGPU Module"; + py::class_(m, "SpikeGenerationGPU") + .def(py::init<>()) + .def("gpu_occupancy_export", + &SpikeGenerationGPU::gpu_occupancy_export) + .def("gpu_occupancy_import", + &SpikeGenerationGPU::gpu_occupancy_import) + .def("spike_generation", + &SpikeGenerationGPU::entrypoint); +} diff --git a/network/spike_generation_gpu_cpp_v1/SpikeGenerationGPU.cu b/network/spike_generation_gpu_cpp_v1/SpikeGenerationGPU.cu new file mode 100644 index 0000000..d439207 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v1/SpikeGenerationGPU.cu @@ -0,0 +1,275 @@ +#include "SpikeGenerationGPU.h" + +#include +#include +#include + +#include +#include +#include + + +SpikeGenerationGPU::SpikeGenerationGPU() +{ + +}; + +SpikeGenerationGPU::~SpikeGenerationGPU() +{ + +}; + +void SpikeGenerationGPU::entrypoint( + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t random_values_pointer_addr, + int64_t random_values_dim_0, + int64_t random_values_dim_1, + int64_t random_values_dim_2, + int64_t random_values_dim_3, + int64_t output_pointer_addr, + int64_t output_dim_0, + int64_t output_dim_1, + int64_t output_dim_2, + int64_t output_dim_3, + int64_t number_of_cpu_processes) +{ + + float* input_pointer = (float*)input_pointer_addr; + float* random_values_pointer = (float*)random_values_pointer_addr; + int64_t* output_pointer = (int64_t*)output_pointer_addr; + + // Input + assert((input_pointer != nullptr)); + assert((input_dim_0 > 0)); + assert((input_dim_1 > 0)); + assert((input_dim_2 > 0)); + assert((input_dim_3 > 0)); + + // Random + assert((random_values_pointer != nullptr)); + assert((random_values_dim_0 > 0)); + assert((random_values_dim_1 > 0)); + assert((random_values_dim_2 > 0)); + assert((random_values_dim_3 > 0)); + + // Output + assert((output_pointer != nullptr)); + assert((output_dim_0 > 0)); + assert((output_dim_1 > 0)); + assert((output_dim_2 > 0)); + assert((output_dim_3 > 0)); + + // Input + size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3; + size_t input_dim_c1 = input_dim_2 * input_dim_3; + size_t input_dim_c2 = input_dim_3; + + // Random + size_t random_values_dim_c0 = + random_values_dim_1 * random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c1 = + random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c2 = random_values_dim_3; + + // Output + size_t output_dim_c0 = + output_dim_1 * output_dim_2 * output_dim_3; + size_t output_dim_c1 = output_dim_2 * output_dim_3; + size_t output_dim_c2 = output_dim_3; + + size_t number_of_pattern = input_dim_0; + size_t h_dim = input_dim_1; + size_t spike_dim = output_dim_1; + size_t x_dim = output_dim_2; + size_t y_dim = output_dim_2; + + assert ((number_of_cpu_processes <= 0)); + + gpu_spike_generation( + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + random_values_pointer, + random_values_dim_c0, + random_values_dim_c1, + random_values_dim_c2, + output_pointer, + output_dim_c0, + output_dim_c1, + output_dim_c2, + x_dim, + y_dim, + spike_dim, + h_dim, + number_of_pattern); + + return; +}; + +__device__ size_t gpu_lower_bound(float* __restrict__ data_ptr, + size_t data_length, + size_t data_ptr_stride, + float compare_to_value) +{ + + size_t start_of_range = 0; + size_t length_of_range = data_length; + + while (length_of_range != 0) + { + size_t half_length = length_of_range >> 1; + size_t actual_position = start_of_range + half_length; + + if (data_ptr[actual_position * data_ptr_stride] < compare_to_value) + { + start_of_range = ++actual_position; + length_of_range -= half_length + 1; + } + else + length_of_range = half_length; + } + return start_of_range; +}; + +__global__ void kernel_spike_generation( + float* __restrict__ input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* __restrict__ random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* __restrict__ output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t max_threadable_tasks) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx < max_threadable_tasks) + { + + size_t pattern_id = idx / spike_dim; + size_t position_spike = idx - (pattern_id * spike_dim); + + size_t position_x = blockIdx.y; + size_t position_y = blockIdx.z; + + float* p_ptr = input_pointer + pattern_id * input_dim_c0 + + position_x * input_dim_c2 + position_y; + + int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 + + position_x * output_dim_c2 + position_y + + position_spike * output_dim_c1; + + float* rand_ptr = random_values_pointer + + pattern_id * random_values_dim_c0 + + position_x * random_values_dim_c2 + position_y + + position_spike * random_values_dim_c1; + + *out_ptr = gpu_lower_bound(p_ptr, + h_dim, + input_dim_c1, + *rand_ptr); + } +}; + +void SpikeGenerationGPU::gpu_spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t number_of_pattern) +{ + cudaError_t status; + assert((x_dim < 65535)); + assert((y_dim < 65535)); + + // ////////////////////////////////////// + // Calculate the distribution on the GPU + // ////////////////////////////////////// + + int min_grid_size; + int block_size; + int grid_size; + + size_t dynamic_s_mem_size = 0; + size_t max_threadable_tasks = number_of_pattern * spike_dim * x_dim * y_dim; + + status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, + (void*)kernel_spike_generation, + dynamic_s_mem_size, max_threadable_tasks); + assert((status == cudaSuccess)); + + grid_size = ((number_of_pattern * spike_dim) + block_size - 1) / block_size; + + dim3 grid(grid_size, x_dim, y_dim); + + + kernel_spike_generation<<>>( + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + random_values_pointer, + random_values_dim_c0, + random_values_dim_c1, + random_values_dim_c2, + output_pointer, + output_dim_c0, + output_dim_c1, + output_dim_c2, + x_dim, + y_dim, + spike_dim, + h_dim, + (number_of_pattern * spike_dim)); + + cudaDeviceSynchronize(); + + return; +}; + + +void SpikeGenerationGPU::gpu_occupancy_export( + size_t dim_x, + size_t dim_y, + size_t number_of_pattern, + size_t spike_dim, + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1) +{ + return; +}; + +void SpikeGenerationGPU::gpu_occupancy_import( + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1) +{ + return; +}; \ No newline at end of file diff --git a/network/spike_generation_gpu_cpp_v1/SpikeGenerationGPU.h b/network/spike_generation_gpu_cpp_v1/SpikeGenerationGPU.h new file mode 100644 index 0000000..7059b4b --- /dev/null +++ b/network/spike_generation_gpu_cpp_v1/SpikeGenerationGPU.h @@ -0,0 +1,70 @@ +#ifndef SPIKEGENERATIONGPU +#define SPIKEGENERATIONGPU + +#include + +#include +#include + +class SpikeGenerationGPU +{ +public: + SpikeGenerationGPU(); + ~SpikeGenerationGPU(); + + void entrypoint( + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t random_values_pointer_addr, + int64_t random_values_dim_0, + int64_t random_values_dim_1, + int64_t random_values_dim_2, + int64_t random_values_dim_3, + int64_t output_pointer_addr, + int64_t output_dim_0, + int64_t output_dim_1, + int64_t output_dim_2, + int64_t output_dim_3, + int64_t number_of_cpu_processes); + + void gpu_occupancy_export( + size_t dim_x, + size_t dim_y, + size_t number_of_pattern, + size_t spike_dim, + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1); + + void gpu_occupancy_import( + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1); + +private: + + void gpu_spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t number_of_pattern); + +}; + +#endif /* SPIKEGENERATIONGPU */ diff --git a/network/spike_generation_gpu_cpp_v2/Makefile b/network/spike_generation_gpu_cpp_v2/Makefile new file mode 100644 index 0000000..7412b0c --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/Makefile @@ -0,0 +1,53 @@ +include ../.env +export + +name = SpikeGeneration +type = GPU + +PYPOSTFIX := $(shell $(PYBIN)python3-config --extension-suffix) +PYBIND11INCLUDE := $(shell $(PYBIN)python3 -m pybind11 --includes) +PARAMETERS_O = $(PARAMETERS_O_GPU) $(PYBIND11INCLUDE) +PARAMETERS_Linker = $(PARAMETERS_Linker_GPU) + +so_file = Py$(name)$(type)$(PYPOSTFIX) +pyi_file = Py$(name)$(type).pyi +all: ../$(so_file) + + +$(O_DIRS)kernel_helper_functions.o: kernel_helper_functions.h kernel_helper_functions.cpp + mkdir -p $(O_DIRS) + $(NVCC) $(PARAMETERS_O) -c kernel_helper_functions.cpp -o $(O_DIRS)kernel_helper_functions.o + +$(O_DIRS)kernel_spike_generation.o: kernel_helper_functions.h \ + kernel_spike_generation.h kernel_spike_generation.cu + mkdir -p $(O_DIRS) + $(NVCC) $(PARAMETERS_O) -c kernel_spike_generation.cu -o $(O_DIRS)kernel_spike_generation.o + + +$(O_DIRS)$(name)$(type).o: $(name)$(type).h $(name)$(type).cu + mkdir -p $(O_DIRS) + $(NVCC) $(PARAMETERS_O) -c $(name)$(type).cu -o $(O_DIRS)$(name)$(type).o + +$(O_DIRS)Py$(name)$(type).o: $(name)$(type).h Py$(name)$(type).cpp + mkdir -p $(O_DIRS) + $(NVCC) $(PARAMETERS_O) -c Py$(name)$(type).cpp -o $(O_DIRS)Py$(name)$(type).o + +../$(so_file): \ + $(O_DIRS)$(name)$(type).o \ + $(O_DIRS)Py$(name)$(type).o \ + $(O_DIRS)kernel_spike_generation.o \ + $(O_DIRS)kernel_helper_functions.o + + $(NVCC) $(PARAMETERS_Linker) -o ../$(so_file) \ + $(O_DIRS)$(name)$(type).o \ + $(O_DIRS)Py$(name)$(type).o \ + $(O_DIRS)kernel_spike_generation.o \ + $(O_DIRS)kernel_helper_functions.o + + +####################### +clean: + rm -rf $(O_DIRS) + rm -f ../$(so_file) + rm -f ../$(pyi_file) + diff --git a/network/spike_generation_gpu_cpp_v2/PySpikeGenerationGPU.cpp b/network/spike_generation_gpu_cpp_v2/PySpikeGenerationGPU.cpp new file mode 100644 index 0000000..70f4581 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/PySpikeGenerationGPU.cpp @@ -0,0 +1,19 @@ + +#include + +#include "SpikeGenerationGPU.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PySpikeGenerationGPU, m) +{ + m.doc() = "SpikeGenerationGPU Module"; + py::class_(m, "SpikeGenerationGPU") + .def(py::init<>()) + .def("gpu_occupancy_export", + &SpikeGenerationGPU::gpu_occupancy_export) + .def("gpu_occupancy_import", + &SpikeGenerationGPU::gpu_occupancy_import) + .def("spike_generation", + &SpikeGenerationGPU::entrypoint); +} diff --git a/network/spike_generation_gpu_cpp_v2/SpikeGenerationGPU.cu b/network/spike_generation_gpu_cpp_v2/SpikeGenerationGPU.cu new file mode 100644 index 0000000..042adf1 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/SpikeGenerationGPU.cu @@ -0,0 +1,227 @@ +#include +#include +#include + +#include +#include +#include + +#include "SpikeGenerationGPU.h" +#include "kernel_spike_generation.h" + +SpikeGenerationGPU::SpikeGenerationGPU() +{ + +}; + +SpikeGenerationGPU::~SpikeGenerationGPU() +{ + +}; + +void SpikeGenerationGPU::entrypoint( + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t random_values_pointer_addr, + int64_t random_values_dim_0, + int64_t random_values_dim_1, + int64_t random_values_dim_2, + int64_t random_values_dim_3, + int64_t output_pointer_addr, + int64_t output_dim_0, + int64_t output_dim_1, + int64_t output_dim_2, + int64_t output_dim_3, + int64_t number_of_cpu_processes) +{ + float* input_pointer = (float*)input_pointer_addr; + float* random_values_pointer = (float*)random_values_pointer_addr; + int64_t* output_pointer = (int64_t*)output_pointer_addr; + + // Input + assert((input_pointer != nullptr)); + assert((input_dim_0 > 0)); + assert((input_dim_1 > 0)); + assert((input_dim_2 > 0)); + assert((input_dim_3 > 0)); + + // Random + assert((random_values_pointer != nullptr)); + assert((random_values_dim_0 > 0)); + assert((random_values_dim_1 > 0)); + assert((random_values_dim_2 > 0)); + assert((random_values_dim_3 > 0)); + + // Output + assert((output_pointer != nullptr)); + assert((output_dim_0 > 0)); + assert((output_dim_1 > 0)); + assert((output_dim_2 > 0)); + assert((output_dim_3 > 0)); + + // Input + size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3; + size_t input_dim_c1 = input_dim_2 * input_dim_3; + size_t input_dim_c2 = input_dim_3; + + // Random + size_t random_values_dim_c0 = + random_values_dim_1 * random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c1 = random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c2 = random_values_dim_3; + + // Output + size_t output_dim_c0 = output_dim_1 * output_dim_2 * output_dim_3; + size_t output_dim_c1 = output_dim_2 * output_dim_3; + size_t output_dim_c2 = output_dim_3; + + size_t number_of_pattern = input_dim_0; + size_t h_dim = input_dim_1; + size_t spike_dim = output_dim_1; + size_t x_dim = output_dim_2; + size_t y_dim = output_dim_2; + + assert((number_of_cpu_processes <= 0)); + + gpu_spike_generation( + input_pointer, input_dim_c0, input_dim_c1, input_dim_c2, + random_values_pointer, random_values_dim_c0, random_values_dim_c1, + random_values_dim_c2, output_pointer, output_dim_c0, output_dim_c1, + output_dim_c2, x_dim, y_dim, spike_dim, h_dim, number_of_pattern); + + return; +}; + + +void SpikeGenerationGPU::gpu_occupancy_measure( + size_t dim_x, + size_t dim_y, + size_t number_of_pattern, + size_t spike_dim) +{ + grid_and_thread_calculated = false; + assert((dim_x < 65535)); + assert((dim_y < 65535)); + + grid_and_thread_settings.resize(1); + + occupancy_kernel_spike_generation(dim_x, dim_y, number_of_pattern, spike_dim, + grid_and_thread_settings[0], display_debug); + + grid_and_thread_calculated = true; + return; +}; + +void SpikeGenerationGPU::gpu_occupancy_export( + size_t dim_x, + size_t dim_y, + size_t number_of_pattern, + size_t spike_dim, + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1) +{ + int64_t* setting_memory = (int64_t*)setting_memory_addr; + + assert((setting_memory != nullptr)); + assert((setting_dim_1 == SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS)); + + gpu_occupancy_measure(dim_x, dim_y, number_of_pattern, spike_dim); + assert((grid_and_thread_calculated == true)); + assert( + (grid_and_thread_settings.size() == SPIKE_GENERATION_NUMBER_OF_KERNELS)); + + assert((setting_dim_0 == grid_and_thread_settings.size())); + + for (size_t counter_0 = 0; counter_0 < setting_dim_0; counter_0++) + { + for (size_t counter_1 = 0; counter_1 < setting_dim_1; counter_1++) + { + setting_memory[counter_0 * setting_dim_1 + counter_1] = + grid_and_thread_settings[counter_0][counter_1]; + } + } +}; + +void SpikeGenerationGPU::gpu_occupancy_import( + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1) +{ + grid_and_thread_calculated = false; + + int64_t* setting_memory = (int64_t*)setting_memory_addr; + + assert((setting_memory != nullptr)); + assert((setting_dim_1 == SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS)); + assert((setting_dim_0 == SPIKE_GENERATION_NUMBER_OF_KERNELS)); + + grid_and_thread_settings.resize(SPIKE_GENERATION_NUMBER_OF_KERNELS); + + for (size_t counter_0 = 0; counter_0 < setting_dim_0; counter_0++) + { + grid_and_thread_settings[counter_0].resize( + SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS); + + for (size_t counter_1 = 0; counter_1 < setting_dim_1; counter_1++) + { + grid_and_thread_settings[counter_0][counter_1] = + setting_memory[counter_0 * setting_dim_1 + counter_1]; + } + } + + grid_and_thread_calculated = true; +}; + +void SpikeGenerationGPU::gpu_spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t number_of_pattern) +{ + if (grid_and_thread_calculated == false) + { + gpu_occupancy_measure(x_dim, y_dim, number_of_pattern, spike_dim); + } + assert((grid_and_thread_calculated == true)); + + cudaError_t status; + assert((x_dim < 65535)); + assert((y_dim < 65535)); + + size_t psxy_block_dim_c0 = spike_dim * x_dim * y_dim; + size_t psxy_block_dim_c1 = x_dim * y_dim; + size_t psxy_block_dim_c2 = y_dim; + + kernel_spike_generation<<< + dim3(grid_and_thread_settings[0][0], grid_and_thread_settings[0][1], + grid_and_thread_settings[0][2]), + dim3(grid_and_thread_settings[0][3], grid_and_thread_settings[0][4], + grid_and_thread_settings[0][5])>>>( + input_pointer, input_dim_c0, input_dim_c1, input_dim_c2, + random_values_pointer, random_values_dim_c0, random_values_dim_c1, + random_values_dim_c2, output_pointer, output_dim_c0, output_dim_c1, + output_dim_c2, x_dim, y_dim, spike_dim, h_dim, psxy_block_dim_c0, + psxy_block_dim_c1, psxy_block_dim_c2, grid_and_thread_settings[0][6]); + + status = cudaDeviceSynchronize(); + assert((status == cudaSuccess)); + + return; +}; \ No newline at end of file diff --git a/network/spike_generation_gpu_cpp_v2/SpikeGenerationGPU.h b/network/spike_generation_gpu_cpp_v2/SpikeGenerationGPU.h new file mode 100644 index 0000000..1ed3084 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/SpikeGenerationGPU.h @@ -0,0 +1,83 @@ +#ifndef SPIKEGENERATIONGPU +#define SPIKEGENERATIONGPU + +#include + +#include +#include +#include + +#define SPIKE_GENERATION_NUMBER_OF_KERNELS 1 +#define SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS 7 + +class SpikeGenerationGPU +{ + public: + SpikeGenerationGPU(); + ~SpikeGenerationGPU(); + + void entrypoint( + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t random_values_pointer_addr, + int64_t random_values_dim_0, + int64_t random_values_dim_1, + int64_t random_values_dim_2, + int64_t random_values_dim_3, + int64_t output_pointer_addr, + int64_t output_dim_0, + int64_t output_dim_1, + int64_t output_dim_2, + int64_t output_dim_3, + int64_t number_of_cpu_processes); + + void gpu_spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t number_of_pattern); + + void gpu_occupancy_export( + size_t dim_x, + size_t dim_y, + size_t number_of_pattern, + size_t spike_dim, + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1); + + void gpu_occupancy_import( + int64_t setting_memory_addr, + size_t setting_dim_0, + size_t setting_dim_1); + + private: + + void gpu_occupancy_measure( + size_t dim_x, + size_t dim_y, + size_t number_of_pattern, + size_t spike_dim); + + bool grid_and_thread_calculated = false; + std::vector> grid_and_thread_settings; + bool display_debug = false; +}; + +#endif /* SPIKEGENERATIONGPU */ diff --git a/network/spike_generation_gpu_cpp_v2/kernel_helper_functions.cpp b/network/spike_generation_gpu_cpp_v2/kernel_helper_functions.cpp new file mode 100644 index 0000000..bd023e9 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/kernel_helper_functions.cpp @@ -0,0 +1,17 @@ +#include + +#include "kernel_helper_functions.h" + +void kernel_debug_plot(std::vector output, bool display_debug) { + if (display_debug == true) { + std::cout << "grid x: " << output[0] << std::endl; + std::cout << "grid y: " << output[1] << std::endl; + std::cout << "grid z: " << output[2] << std::endl; + std::cout << "thread block x: " << output[3] << std::endl; + std::cout << "thread block y: " << output[4] << std::endl; + std::cout << "thread block z: " << output[5] << std::endl; + std::cout << "max_idx: " << output[6] << std::endl << std::endl; + } + + return; +}; \ No newline at end of file diff --git a/network/spike_generation_gpu_cpp_v2/kernel_helper_functions.h b/network/spike_generation_gpu_cpp_v2/kernel_helper_functions.h new file mode 100644 index 0000000..819910e --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/kernel_helper_functions.h @@ -0,0 +1,7 @@ +#ifndef KERNEL_HELPER_FUNCTIONS +#define KERNEL_HELPER_FUNCTIONS +#include + +void kernel_debug_plot(std::vector output, bool display_debug); + +#endif /* KERNEL_HELPER_FUNCTIONS */ diff --git a/network/spike_generation_gpu_cpp_v2/kernel_spike_generation.cu b/network/spike_generation_gpu_cpp_v2/kernel_spike_generation.cu new file mode 100644 index 0000000..d319eb7 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/kernel_spike_generation.cu @@ -0,0 +1,120 @@ +#include +#include + +#include "kernel_helper_functions.h" +#include "kernel_spike_generation.h" + +__device__ size_t gpu_lower_bound( + float* __restrict__ data_ptr, + size_t data_length, + size_t data_ptr_stride, + float compare_to_value) +{ + size_t start_of_range = 0; + size_t length_of_range = data_length; + + while (length_of_range != 0) + { + size_t half_length = length_of_range >> 1; + size_t actual_position = start_of_range + half_length; + + if (data_ptr[actual_position * data_ptr_stride] < compare_to_value) + { + start_of_range = ++actual_position; + length_of_range -= half_length + 1; + } + else + length_of_range = half_length; + } + return start_of_range; +}; + +__global__ void kernel_spike_generation( + float* __restrict__ input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* __restrict__ random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* __restrict__ output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t block_dim_c0, + size_t block_dim_c1, + size_t block_dim_c2, + size_t max_threadable_tasks) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx < max_threadable_tasks) + { + size_t pattern_id = idx / block_dim_c0; + idx -= pattern_id * block_dim_c0; + size_t position_spike = idx / block_dim_c1; + idx -= position_spike * block_dim_c1; + size_t position_x = idx / block_dim_c2; + idx -= position_x * block_dim_c2; + size_t position_y = idx; + + float* p_ptr = input_pointer + pattern_id * input_dim_c0 + + position_x * input_dim_c2 + position_y; + + int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 + + position_x * output_dim_c2 + position_y + + position_spike * output_dim_c1; + + float* rand_ptr = random_values_pointer + + pattern_id * random_values_dim_c0 + + position_x * random_values_dim_c2 + position_y + + position_spike * random_values_dim_c1; + + *out_ptr = gpu_lower_bound(p_ptr, h_dim, input_dim_c1, *rand_ptr); + } +}; + +void occupancy_kernel_spike_generation( + size_t dim_x, size_t dim_y, + size_t number_of_pattern, + size_t spike_dim, + std::vector& output, + bool display_debug) +{ + size_t max_threadable_tasks; + cudaError_t status; + + int min_grid_size; + int thread_block_size; + int grid_size; + + max_threadable_tasks = number_of_pattern * spike_dim * dim_x * dim_y; + + status = cudaOccupancyMaxPotentialBlockSize( + &min_grid_size, &thread_block_size, (void*)kernel_spike_generation, 0, + max_threadable_tasks); + assert((status == cudaSuccess)); + + grid_size = + (max_threadable_tasks + thread_block_size - 1) / thread_block_size; + + output.resize(7); + output[0] = grid_size; + output[1] = 1; + output[2] = 1; + output[3] = thread_block_size; + output[4] = 1; + output[5] = 1; + output[6] = max_threadable_tasks; + + if (display_debug == true) + { + std::cout << "kernel_spike_generation:" << std::endl; + kernel_debug_plot(output, display_debug); + } +}; \ No newline at end of file diff --git a/network/spike_generation_gpu_cpp_v2/kernel_spike_generation.h b/network/spike_generation_gpu_cpp_v2/kernel_spike_generation.h new file mode 100644 index 0000000..650f481 --- /dev/null +++ b/network/spike_generation_gpu_cpp_v2/kernel_spike_generation.h @@ -0,0 +1,34 @@ +#ifndef KERNEL_SPIKE_GENERATION +#define KERNEL_SPIKE_GENERATION +#include + +__global__ void kernel_spike_generation( + float* __restrict__ input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* __restrict__ random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* __restrict__ output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t block_dim_c0, + size_t block_dim_c1, + size_t block_dim_c2, + size_t max_threadable_tasks); + +void occupancy_kernel_spike_generation( + size_t dim_x, size_t dim_y, + size_t number_of_pattern, + size_t spike_dim, + std::vector& output, + bool display_debug); + +#endif /* KERNEL_SPIKE_GENERATION */