Add files via upload

This commit is contained in:
David Rotermund 2023-02-02 23:07:12 +01:00 committed by GitHub
parent b5f95bd0db
commit 7bb5df9a45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 957 additions and 0 deletions

View file

@ -0,0 +1,33 @@
include ../.env
export
name = SpikeGeneration
type = GPU
PYPOSTFIX := $(shell $(PYBIN)python3-config --extension-suffix)
PYBIND11INCLUDE := $(shell $(PYBIN)python3 -m pybind11 --includes)
PARAMETERS_O = $(PARAMETERS_O_GPU) $(PYBIND11INCLUDE)
PARAMETERS_Linker = $(PARAMETERS_Linker_GPU)
so_file = Py$(name)$(type)$(PYPOSTFIX)
pyi_file = Py$(name)$(type).pyi
all: ../$(so_file)
$(O_DIRS)$(name)$(type).o: $(name)$(type).h $(name)$(type).cu
mkdir -p $(O_DIRS)
$(NVCC) $(PARAMETERS_O) -c $(name)$(type).cu -o $(O_DIRS)$(name)$(type).o
$(O_DIRS)Py$(name)$(type).o: $(name)$(type).h Py$(name)$(type).cpp
mkdir -p $(O_DIRS)
$(NVCC) $(PARAMETERS_O) -c Py$(name)$(type).cpp -o $(O_DIRS)Py$(name)$(type).o
../$(so_file): $(O_DIRS)$(name)$(type).o $(O_DIRS)Py$(name)$(type).o
$(NVCC) $(PARAMETERS_Linker) -o ../$(so_file) $(O_DIRS)$(name)$(type).o $(O_DIRS)Py$(name)$(type).o
#######################
clean:
rm -rf $(O_DIRS)
rm -f ../$(so_file)
rm -f ../$(pyi_file)

View file

@ -0,0 +1,19 @@
#include <pybind11/pybind11.h>
#include "SpikeGenerationGPU.h"
namespace py = pybind11;
PYBIND11_MODULE(PySpikeGenerationGPU, m)
{
m.doc() = "SpikeGenerationGPU Module";
py::class_<SpikeGenerationGPU>(m, "SpikeGenerationGPU")
.def(py::init<>())
.def("gpu_occupancy_export",
&SpikeGenerationGPU::gpu_occupancy_export)
.def("gpu_occupancy_import",
&SpikeGenerationGPU::gpu_occupancy_import)
.def("spike_generation",
&SpikeGenerationGPU::entrypoint);
}

View file

@ -0,0 +1,275 @@
#include "SpikeGenerationGPU.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
SpikeGenerationGPU::SpikeGenerationGPU()
{
};
SpikeGenerationGPU::~SpikeGenerationGPU()
{
};
void SpikeGenerationGPU::entrypoint(
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t random_values_pointer_addr,
int64_t random_values_dim_0,
int64_t random_values_dim_1,
int64_t random_values_dim_2,
int64_t random_values_dim_3,
int64_t output_pointer_addr,
int64_t output_dim_0,
int64_t output_dim_1,
int64_t output_dim_2,
int64_t output_dim_3,
int64_t number_of_cpu_processes)
{
float* input_pointer = (float*)input_pointer_addr;
float* random_values_pointer = (float*)random_values_pointer_addr;
int64_t* output_pointer = (int64_t*)output_pointer_addr;
// Input
assert((input_pointer != nullptr));
assert((input_dim_0 > 0));
assert((input_dim_1 > 0));
assert((input_dim_2 > 0));
assert((input_dim_3 > 0));
// Random
assert((random_values_pointer != nullptr));
assert((random_values_dim_0 > 0));
assert((random_values_dim_1 > 0));
assert((random_values_dim_2 > 0));
assert((random_values_dim_3 > 0));
// Output
assert((output_pointer != nullptr));
assert((output_dim_0 > 0));
assert((output_dim_1 > 0));
assert((output_dim_2 > 0));
assert((output_dim_3 > 0));
// Input
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
size_t input_dim_c1 = input_dim_2 * input_dim_3;
size_t input_dim_c2 = input_dim_3;
// Random
size_t random_values_dim_c0 =
random_values_dim_1 * random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c1 =
random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c2 = random_values_dim_3;
// Output
size_t output_dim_c0 =
output_dim_1 * output_dim_2 * output_dim_3;
size_t output_dim_c1 = output_dim_2 * output_dim_3;
size_t output_dim_c2 = output_dim_3;
size_t number_of_pattern = input_dim_0;
size_t h_dim = input_dim_1;
size_t spike_dim = output_dim_1;
size_t x_dim = output_dim_2;
size_t y_dim = output_dim_2;
assert ((number_of_cpu_processes <= 0));
gpu_spike_generation(
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
random_values_pointer,
random_values_dim_c0,
random_values_dim_c1,
random_values_dim_c2,
output_pointer,
output_dim_c0,
output_dim_c1,
output_dim_c2,
x_dim,
y_dim,
spike_dim,
h_dim,
number_of_pattern);
return;
};
__device__ size_t gpu_lower_bound(float* __restrict__ data_ptr,
size_t data_length,
size_t data_ptr_stride,
float compare_to_value)
{
size_t start_of_range = 0;
size_t length_of_range = data_length;
while (length_of_range != 0)
{
size_t half_length = length_of_range >> 1;
size_t actual_position = start_of_range + half_length;
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
{
start_of_range = ++actual_position;
length_of_range -= half_length + 1;
}
else
length_of_range = half_length;
}
return start_of_range;
};
__global__ void kernel_spike_generation(
float* __restrict__ input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* __restrict__ random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* __restrict__ output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t max_threadable_tasks)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < max_threadable_tasks)
{
size_t pattern_id = idx / spike_dim;
size_t position_spike = idx - (pattern_id * spike_dim);
size_t position_x = blockIdx.y;
size_t position_y = blockIdx.z;
float* p_ptr = input_pointer + pattern_id * input_dim_c0 +
position_x * input_dim_c2 + position_y;
int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 +
position_x * output_dim_c2 + position_y
+ position_spike * output_dim_c1;
float* rand_ptr = random_values_pointer +
pattern_id * random_values_dim_c0 +
position_x * random_values_dim_c2 + position_y
+ position_spike * random_values_dim_c1;
*out_ptr = gpu_lower_bound(p_ptr,
h_dim,
input_dim_c1,
*rand_ptr);
}
};
void SpikeGenerationGPU::gpu_spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t number_of_pattern)
{
cudaError_t status;
assert((x_dim < 65535));
assert((y_dim < 65535));
// //////////////////////////////////////
// Calculate the distribution on the GPU
// //////////////////////////////////////
int min_grid_size;
int block_size;
int grid_size;
size_t dynamic_s_mem_size = 0;
size_t max_threadable_tasks = number_of_pattern * spike_dim * x_dim * y_dim;
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
(void*)kernel_spike_generation,
dynamic_s_mem_size, max_threadable_tasks);
assert((status == cudaSuccess));
grid_size = ((number_of_pattern * spike_dim) + block_size - 1) / block_size;
dim3 grid(grid_size, x_dim, y_dim);
kernel_spike_generation<<<grid, block_size >>>(
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
random_values_pointer,
random_values_dim_c0,
random_values_dim_c1,
random_values_dim_c2,
output_pointer,
output_dim_c0,
output_dim_c1,
output_dim_c2,
x_dim,
y_dim,
spike_dim,
h_dim,
(number_of_pattern * spike_dim));
cudaDeviceSynchronize();
return;
};
void SpikeGenerationGPU::gpu_occupancy_export(
size_t dim_x,
size_t dim_y,
size_t number_of_pattern,
size_t spike_dim,
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1)
{
return;
};
void SpikeGenerationGPU::gpu_occupancy_import(
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1)
{
return;
};

View file

@ -0,0 +1,70 @@
#ifndef SPIKEGENERATIONGPU
#define SPIKEGENERATIONGPU
#include <unistd.h>
#include <cctype>
#include <iostream>
class SpikeGenerationGPU
{
public:
SpikeGenerationGPU();
~SpikeGenerationGPU();
void entrypoint(
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t random_values_pointer_addr,
int64_t random_values_dim_0,
int64_t random_values_dim_1,
int64_t random_values_dim_2,
int64_t random_values_dim_3,
int64_t output_pointer_addr,
int64_t output_dim_0,
int64_t output_dim_1,
int64_t output_dim_2,
int64_t output_dim_3,
int64_t number_of_cpu_processes);
void gpu_occupancy_export(
size_t dim_x,
size_t dim_y,
size_t number_of_pattern,
size_t spike_dim,
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1);
void gpu_occupancy_import(
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1);
private:
void gpu_spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t number_of_pattern);
};
#endif /* SPIKEGENERATIONGPU */

View file

@ -0,0 +1,53 @@
include ../.env
export
name = SpikeGeneration
type = GPU
PYPOSTFIX := $(shell $(PYBIN)python3-config --extension-suffix)
PYBIND11INCLUDE := $(shell $(PYBIN)python3 -m pybind11 --includes)
PARAMETERS_O = $(PARAMETERS_O_GPU) $(PYBIND11INCLUDE)
PARAMETERS_Linker = $(PARAMETERS_Linker_GPU)
so_file = Py$(name)$(type)$(PYPOSTFIX)
pyi_file = Py$(name)$(type).pyi
all: ../$(so_file)
$(O_DIRS)kernel_helper_functions.o: kernel_helper_functions.h kernel_helper_functions.cpp
mkdir -p $(O_DIRS)
$(NVCC) $(PARAMETERS_O) -c kernel_helper_functions.cpp -o $(O_DIRS)kernel_helper_functions.o
$(O_DIRS)kernel_spike_generation.o: kernel_helper_functions.h \
kernel_spike_generation.h kernel_spike_generation.cu
mkdir -p $(O_DIRS)
$(NVCC) $(PARAMETERS_O) -c kernel_spike_generation.cu -o $(O_DIRS)kernel_spike_generation.o
$(O_DIRS)$(name)$(type).o: $(name)$(type).h $(name)$(type).cu
mkdir -p $(O_DIRS)
$(NVCC) $(PARAMETERS_O) -c $(name)$(type).cu -o $(O_DIRS)$(name)$(type).o
$(O_DIRS)Py$(name)$(type).o: $(name)$(type).h Py$(name)$(type).cpp
mkdir -p $(O_DIRS)
$(NVCC) $(PARAMETERS_O) -c Py$(name)$(type).cpp -o $(O_DIRS)Py$(name)$(type).o
../$(so_file): \
$(O_DIRS)$(name)$(type).o \
$(O_DIRS)Py$(name)$(type).o \
$(O_DIRS)kernel_spike_generation.o \
$(O_DIRS)kernel_helper_functions.o
$(NVCC) $(PARAMETERS_Linker) -o ../$(so_file) \
$(O_DIRS)$(name)$(type).o \
$(O_DIRS)Py$(name)$(type).o \
$(O_DIRS)kernel_spike_generation.o \
$(O_DIRS)kernel_helper_functions.o
#######################
clean:
rm -rf $(O_DIRS)
rm -f ../$(so_file)
rm -f ../$(pyi_file)

View file

@ -0,0 +1,19 @@
#include <pybind11/pybind11.h>
#include "SpikeGenerationGPU.h"
namespace py = pybind11;
PYBIND11_MODULE(PySpikeGenerationGPU, m)
{
m.doc() = "SpikeGenerationGPU Module";
py::class_<SpikeGenerationGPU>(m, "SpikeGenerationGPU")
.def(py::init<>())
.def("gpu_occupancy_export",
&SpikeGenerationGPU::gpu_occupancy_export)
.def("gpu_occupancy_import",
&SpikeGenerationGPU::gpu_occupancy_import)
.def("spike_generation",
&SpikeGenerationGPU::entrypoint);
}

View file

@ -0,0 +1,227 @@
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
#include "SpikeGenerationGPU.h"
#include "kernel_spike_generation.h"
SpikeGenerationGPU::SpikeGenerationGPU()
{
};
SpikeGenerationGPU::~SpikeGenerationGPU()
{
};
void SpikeGenerationGPU::entrypoint(
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t random_values_pointer_addr,
int64_t random_values_dim_0,
int64_t random_values_dim_1,
int64_t random_values_dim_2,
int64_t random_values_dim_3,
int64_t output_pointer_addr,
int64_t output_dim_0,
int64_t output_dim_1,
int64_t output_dim_2,
int64_t output_dim_3,
int64_t number_of_cpu_processes)
{
float* input_pointer = (float*)input_pointer_addr;
float* random_values_pointer = (float*)random_values_pointer_addr;
int64_t* output_pointer = (int64_t*)output_pointer_addr;
// Input
assert((input_pointer != nullptr));
assert((input_dim_0 > 0));
assert((input_dim_1 > 0));
assert((input_dim_2 > 0));
assert((input_dim_3 > 0));
// Random
assert((random_values_pointer != nullptr));
assert((random_values_dim_0 > 0));
assert((random_values_dim_1 > 0));
assert((random_values_dim_2 > 0));
assert((random_values_dim_3 > 0));
// Output
assert((output_pointer != nullptr));
assert((output_dim_0 > 0));
assert((output_dim_1 > 0));
assert((output_dim_2 > 0));
assert((output_dim_3 > 0));
// Input
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
size_t input_dim_c1 = input_dim_2 * input_dim_3;
size_t input_dim_c2 = input_dim_3;
// Random
size_t random_values_dim_c0 =
random_values_dim_1 * random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c1 = random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c2 = random_values_dim_3;
// Output
size_t output_dim_c0 = output_dim_1 * output_dim_2 * output_dim_3;
size_t output_dim_c1 = output_dim_2 * output_dim_3;
size_t output_dim_c2 = output_dim_3;
size_t number_of_pattern = input_dim_0;
size_t h_dim = input_dim_1;
size_t spike_dim = output_dim_1;
size_t x_dim = output_dim_2;
size_t y_dim = output_dim_2;
assert((number_of_cpu_processes <= 0));
gpu_spike_generation(
input_pointer, input_dim_c0, input_dim_c1, input_dim_c2,
random_values_pointer, random_values_dim_c0, random_values_dim_c1,
random_values_dim_c2, output_pointer, output_dim_c0, output_dim_c1,
output_dim_c2, x_dim, y_dim, spike_dim, h_dim, number_of_pattern);
return;
};
void SpikeGenerationGPU::gpu_occupancy_measure(
size_t dim_x,
size_t dim_y,
size_t number_of_pattern,
size_t spike_dim)
{
grid_and_thread_calculated = false;
assert((dim_x < 65535));
assert((dim_y < 65535));
grid_and_thread_settings.resize(1);
occupancy_kernel_spike_generation(dim_x, dim_y, number_of_pattern, spike_dim,
grid_and_thread_settings[0], display_debug);
grid_and_thread_calculated = true;
return;
};
void SpikeGenerationGPU::gpu_occupancy_export(
size_t dim_x,
size_t dim_y,
size_t number_of_pattern,
size_t spike_dim,
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1)
{
int64_t* setting_memory = (int64_t*)setting_memory_addr;
assert((setting_memory != nullptr));
assert((setting_dim_1 == SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS));
gpu_occupancy_measure(dim_x, dim_y, number_of_pattern, spike_dim);
assert((grid_and_thread_calculated == true));
assert(
(grid_and_thread_settings.size() == SPIKE_GENERATION_NUMBER_OF_KERNELS));
assert((setting_dim_0 == grid_and_thread_settings.size()));
for (size_t counter_0 = 0; counter_0 < setting_dim_0; counter_0++)
{
for (size_t counter_1 = 0; counter_1 < setting_dim_1; counter_1++)
{
setting_memory[counter_0 * setting_dim_1 + counter_1] =
grid_and_thread_settings[counter_0][counter_1];
}
}
};
void SpikeGenerationGPU::gpu_occupancy_import(
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1)
{
grid_and_thread_calculated = false;
int64_t* setting_memory = (int64_t*)setting_memory_addr;
assert((setting_memory != nullptr));
assert((setting_dim_1 == SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS));
assert((setting_dim_0 == SPIKE_GENERATION_NUMBER_OF_KERNELS));
grid_and_thread_settings.resize(SPIKE_GENERATION_NUMBER_OF_KERNELS);
for (size_t counter_0 = 0; counter_0 < setting_dim_0; counter_0++)
{
grid_and_thread_settings[counter_0].resize(
SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS);
for (size_t counter_1 = 0; counter_1 < setting_dim_1; counter_1++)
{
grid_and_thread_settings[counter_0][counter_1] =
setting_memory[counter_0 * setting_dim_1 + counter_1];
}
}
grid_and_thread_calculated = true;
};
void SpikeGenerationGPU::gpu_spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t number_of_pattern)
{
if (grid_and_thread_calculated == false)
{
gpu_occupancy_measure(x_dim, y_dim, number_of_pattern, spike_dim);
}
assert((grid_and_thread_calculated == true));
cudaError_t status;
assert((x_dim < 65535));
assert((y_dim < 65535));
size_t psxy_block_dim_c0 = spike_dim * x_dim * y_dim;
size_t psxy_block_dim_c1 = x_dim * y_dim;
size_t psxy_block_dim_c2 = y_dim;
kernel_spike_generation<<<
dim3(grid_and_thread_settings[0][0], grid_and_thread_settings[0][1],
grid_and_thread_settings[0][2]),
dim3(grid_and_thread_settings[0][3], grid_and_thread_settings[0][4],
grid_and_thread_settings[0][5])>>>(
input_pointer, input_dim_c0, input_dim_c1, input_dim_c2,
random_values_pointer, random_values_dim_c0, random_values_dim_c1,
random_values_dim_c2, output_pointer, output_dim_c0, output_dim_c1,
output_dim_c2, x_dim, y_dim, spike_dim, h_dim, psxy_block_dim_c0,
psxy_block_dim_c1, psxy_block_dim_c2, grid_and_thread_settings[0][6]);
status = cudaDeviceSynchronize();
assert((status == cudaSuccess));
return;
};

View file

@ -0,0 +1,83 @@
#ifndef SPIKEGENERATIONGPU
#define SPIKEGENERATIONGPU
#include <unistd.h>
#include <cctype>
#include <iostream>
#include <vector>
#define SPIKE_GENERATION_NUMBER_OF_KERNELS 1
#define SPIKE_GENERATION_NUMBER_OF_KERNELS_PARAMETERS 7
class SpikeGenerationGPU
{
public:
SpikeGenerationGPU();
~SpikeGenerationGPU();
void entrypoint(
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t random_values_pointer_addr,
int64_t random_values_dim_0,
int64_t random_values_dim_1,
int64_t random_values_dim_2,
int64_t random_values_dim_3,
int64_t output_pointer_addr,
int64_t output_dim_0,
int64_t output_dim_1,
int64_t output_dim_2,
int64_t output_dim_3,
int64_t number_of_cpu_processes);
void gpu_spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t number_of_pattern);
void gpu_occupancy_export(
size_t dim_x,
size_t dim_y,
size_t number_of_pattern,
size_t spike_dim,
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1);
void gpu_occupancy_import(
int64_t setting_memory_addr,
size_t setting_dim_0,
size_t setting_dim_1);
private:
void gpu_occupancy_measure(
size_t dim_x,
size_t dim_y,
size_t number_of_pattern,
size_t spike_dim);
bool grid_and_thread_calculated = false;
std::vector<std::vector<size_t>> grid_and_thread_settings;
bool display_debug = false;
};
#endif /* SPIKEGENERATIONGPU */

View file

@ -0,0 +1,17 @@
#include <iostream>
#include "kernel_helper_functions.h"
void kernel_debug_plot(std::vector<size_t> output, bool display_debug) {
if (display_debug == true) {
std::cout << "grid x: " << output[0] << std::endl;
std::cout << "grid y: " << output[1] << std::endl;
std::cout << "grid z: " << output[2] << std::endl;
std::cout << "thread block x: " << output[3] << std::endl;
std::cout << "thread block y: " << output[4] << std::endl;
std::cout << "thread block z: " << output[5] << std::endl;
std::cout << "max_idx: " << output[6] << std::endl << std::endl;
}
return;
};

View file

@ -0,0 +1,7 @@
#ifndef KERNEL_HELPER_FUNCTIONS
#define KERNEL_HELPER_FUNCTIONS
#include <vector>
void kernel_debug_plot(std::vector<size_t> output, bool display_debug);
#endif /* KERNEL_HELPER_FUNCTIONS */

View file

@ -0,0 +1,120 @@
#include <cassert>
#include <iostream>
#include "kernel_helper_functions.h"
#include "kernel_spike_generation.h"
__device__ size_t gpu_lower_bound(
float* __restrict__ data_ptr,
size_t data_length,
size_t data_ptr_stride,
float compare_to_value)
{
size_t start_of_range = 0;
size_t length_of_range = data_length;
while (length_of_range != 0)
{
size_t half_length = length_of_range >> 1;
size_t actual_position = start_of_range + half_length;
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
{
start_of_range = ++actual_position;
length_of_range -= half_length + 1;
}
else
length_of_range = half_length;
}
return start_of_range;
};
__global__ void kernel_spike_generation(
float* __restrict__ input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* __restrict__ random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* __restrict__ output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t block_dim_c0,
size_t block_dim_c1,
size_t block_dim_c2,
size_t max_threadable_tasks)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < max_threadable_tasks)
{
size_t pattern_id = idx / block_dim_c0;
idx -= pattern_id * block_dim_c0;
size_t position_spike = idx / block_dim_c1;
idx -= position_spike * block_dim_c1;
size_t position_x = idx / block_dim_c2;
idx -= position_x * block_dim_c2;
size_t position_y = idx;
float* p_ptr = input_pointer + pattern_id * input_dim_c0 +
position_x * input_dim_c2 + position_y;
int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 +
position_x * output_dim_c2 + position_y +
position_spike * output_dim_c1;
float* rand_ptr = random_values_pointer +
pattern_id * random_values_dim_c0 +
position_x * random_values_dim_c2 + position_y +
position_spike * random_values_dim_c1;
*out_ptr = gpu_lower_bound(p_ptr, h_dim, input_dim_c1, *rand_ptr);
}
};
void occupancy_kernel_spike_generation(
size_t dim_x, size_t dim_y,
size_t number_of_pattern,
size_t spike_dim,
std::vector<size_t>& output,
bool display_debug)
{
size_t max_threadable_tasks;
cudaError_t status;
int min_grid_size;
int thread_block_size;
int grid_size;
max_threadable_tasks = number_of_pattern * spike_dim * dim_x * dim_y;
status = cudaOccupancyMaxPotentialBlockSize(
&min_grid_size, &thread_block_size, (void*)kernel_spike_generation, 0,
max_threadable_tasks);
assert((status == cudaSuccess));
grid_size =
(max_threadable_tasks + thread_block_size - 1) / thread_block_size;
output.resize(7);
output[0] = grid_size;
output[1] = 1;
output[2] = 1;
output[3] = thread_block_size;
output[4] = 1;
output[5] = 1;
output[6] = max_threadable_tasks;
if (display_debug == true)
{
std::cout << "kernel_spike_generation:" << std::endl;
kernel_debug_plot(output, display_debug);
}
};

View file

@ -0,0 +1,34 @@
#ifndef KERNEL_SPIKE_GENERATION
#define KERNEL_SPIKE_GENERATION
#include <vector>
__global__ void kernel_spike_generation(
float* __restrict__ input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* __restrict__ random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* __restrict__ output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t block_dim_c0,
size_t block_dim_c1,
size_t block_dim_c2,
size_t max_threadable_tasks);
void occupancy_kernel_spike_generation(
size_t dim_x, size_t dim_y,
size_t number_of_pattern,
size_t spike_dim,
std::vector<size_t>& output,
bool display_debug);
#endif /* KERNEL_SPIKE_GENERATION */