Add files via upload
This commit is contained in:
parent
7bea490c5f
commit
a537f3e356
5 changed files with 206 additions and 87 deletions
|
@ -21,6 +21,8 @@ class HDynamicLayer(torch.nn.Module):
|
||||||
device: torch.device
|
device: torch.device
|
||||||
default_dtype: torch.dtype
|
default_dtype: torch.dtype
|
||||||
|
|
||||||
|
_force_forward_h_dynamic_on_cpu: bool
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
output_size: list[int],
|
output_size: list[int],
|
||||||
|
@ -32,6 +34,7 @@ class HDynamicLayer(torch.nn.Module):
|
||||||
device: torch.device | None = None,
|
device: torch.device | None = None,
|
||||||
default_dtype: torch.dtype | None = None,
|
default_dtype: torch.dtype | None = None,
|
||||||
gpu_tuning_factor: int = 5,
|
gpu_tuning_factor: int = 5,
|
||||||
|
force_forward_h_dynamic_on_cpu: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -46,11 +49,14 @@ class HDynamicLayer(torch.nn.Module):
|
||||||
self._output_size = output_size
|
self._output_size = output_size
|
||||||
self._output_layer = bool(output_layer)
|
self._output_layer = bool(output_layer)
|
||||||
self._local_learning = bool(local_learning)
|
self._local_learning = bool(local_learning)
|
||||||
|
self._force_forward_h_dynamic_on_cpu = force_forward_h_dynamic_on_cpu
|
||||||
|
|
||||||
global_sbs_gpu_setting.append(torch.tensor([0]))
|
global_sbs_gpu_setting.append(torch.tensor([0]))
|
||||||
global_sbs_size.append(torch.tensor([0, 0, 0, 0]))
|
global_sbs_size.append(torch.tensor([0, 0, 0, 0]))
|
||||||
|
|
||||||
if device == torch.device("cpu"):
|
if (device == torch.device("cpu")) or (
|
||||||
|
self._force_forward_h_dynamic_on_cpu is True
|
||||||
|
):
|
||||||
global_sbs_hdynamic_cpp.append(HDynamicCNNCPU())
|
global_sbs_hdynamic_cpp.append(HDynamicCNNCPU())
|
||||||
else:
|
else:
|
||||||
global_sbs_hdynamic_cpp.append(HDynamicCNNGPU())
|
global_sbs_hdynamic_cpp.append(HDynamicCNNGPU())
|
||||||
|
@ -146,11 +152,6 @@ class FunctionalSbS(torch.autograd.Function):
|
||||||
|
|
||||||
number_of_spikes: int = int(spikes.shape[1])
|
number_of_spikes: int = int(spikes.shape[1])
|
||||||
|
|
||||||
if input.device == torch.device("cpu"):
|
|
||||||
hdyn_number_of_cpu_processes: int = int(parameter_list[0])
|
|
||||||
else:
|
|
||||||
hdyn_number_of_cpu_processes = -1
|
|
||||||
|
|
||||||
output_size_0: int = int(parameter_list[1])
|
output_size_0: int = int(parameter_list[1])
|
||||||
output_size_1: int = int(parameter_list[2])
|
output_size_1: int = int(parameter_list[2])
|
||||||
gpu_tuning_factor: int = int(parameter_list[3])
|
gpu_tuning_factor: int = int(parameter_list[3])
|
||||||
|
@ -158,6 +159,30 @@ class FunctionalSbS(torch.autograd.Function):
|
||||||
sbs_gpu_setting_position = int(parameter_list[4])
|
sbs_gpu_setting_position = int(parameter_list[4])
|
||||||
sbs_hdynamic_cpp_position = int(parameter_list[5])
|
sbs_hdynamic_cpp_position = int(parameter_list[5])
|
||||||
|
|
||||||
|
if (
|
||||||
|
isinstance(
|
||||||
|
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position], HDynamicCNNCPU
|
||||||
|
)
|
||||||
|
is True
|
||||||
|
):
|
||||||
|
are_we_on_a_cpu: bool = True
|
||||||
|
work_device: torch.device = torch.device("cpu")
|
||||||
|
else:
|
||||||
|
are_we_on_a_cpu = False
|
||||||
|
work_device = input.device
|
||||||
|
|
||||||
|
target_device: torch.device = input.device
|
||||||
|
|
||||||
|
if target_device == work_device:
|
||||||
|
data_is_on_the_same_device: bool = True
|
||||||
|
else:
|
||||||
|
data_is_on_the_same_device = False
|
||||||
|
|
||||||
|
if are_we_on_a_cpu is True:
|
||||||
|
hdyn_number_of_cpu_processes: int = int(parameter_list[0])
|
||||||
|
else:
|
||||||
|
hdyn_number_of_cpu_processes = -1
|
||||||
|
|
||||||
# ###########################################################
|
# ###########################################################
|
||||||
# H dynamic
|
# H dynamic
|
||||||
# ###########################################################
|
# ###########################################################
|
||||||
|
@ -169,7 +194,7 @@ class FunctionalSbS(torch.autograd.Function):
|
||||||
# Make space for the results
|
# Make space for the results
|
||||||
# ############################################
|
# ############################################
|
||||||
|
|
||||||
output = torch.empty(
|
output_work: torch.Tensor = torch.empty(
|
||||||
(
|
(
|
||||||
int(input.shape[0]),
|
int(input.shape[0]),
|
||||||
int(weights.shape[1]),
|
int(weights.shape[1]),
|
||||||
|
@ -177,17 +202,43 @@ class FunctionalSbS(torch.autograd.Function):
|
||||||
output_size_1,
|
output_size_1,
|
||||||
),
|
),
|
||||||
dtype=input.dtype,
|
dtype=input.dtype,
|
||||||
device=input.device,
|
device=work_device,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert output.is_contiguous() is True
|
assert output_work.is_contiguous() is True
|
||||||
if epsilon_xy is not None:
|
if epsilon_xy is not None:
|
||||||
assert epsilon_xy.is_contiguous() is True
|
assert epsilon_xy.is_contiguous() is True
|
||||||
assert epsilon_xy.ndim == 3
|
assert epsilon_xy.ndim == 3
|
||||||
|
if data_is_on_the_same_device is False:
|
||||||
|
epsilon_xy_work = epsilon_xy.to(work_device)
|
||||||
|
else:
|
||||||
|
epsilon_xy_work = epsilon_xy
|
||||||
|
else:
|
||||||
|
epsilon_xy_work = None
|
||||||
|
|
||||||
assert epsilon_t_0.is_contiguous() is True
|
assert epsilon_t_0.is_contiguous() is True
|
||||||
|
if data_is_on_the_same_device is False:
|
||||||
|
epsilon_t_0_work = epsilon_t_0.to(work_device)
|
||||||
|
else:
|
||||||
|
epsilon_t_0_work = epsilon_t_0
|
||||||
|
|
||||||
assert weights.is_contiguous() is True
|
assert weights.is_contiguous() is True
|
||||||
|
if data_is_on_the_same_device is False:
|
||||||
|
weights_work = weights.to(work_device)
|
||||||
|
else:
|
||||||
|
weights_work = weights
|
||||||
|
|
||||||
assert spikes.is_contiguous() is True
|
assert spikes.is_contiguous() is True
|
||||||
|
if data_is_on_the_same_device is False:
|
||||||
|
spikes_work = spikes.to(work_device)
|
||||||
|
else:
|
||||||
|
spikes_work = spikes
|
||||||
|
|
||||||
assert h_initial.is_contiguous() is True
|
assert h_initial.is_contiguous() is True
|
||||||
|
if data_is_on_the_same_device is False:
|
||||||
|
h_initial_work = h_initial.to(work_device)
|
||||||
|
else:
|
||||||
|
h_initial_work = h_initial
|
||||||
|
|
||||||
assert weights.ndim == 2
|
assert weights.ndim == 2
|
||||||
assert h_initial.ndim == 1
|
assert h_initial.ndim == 1
|
||||||
|
@ -196,32 +247,32 @@ class FunctionalSbS(torch.autograd.Function):
|
||||||
|
|
||||||
sbs_size = global_sbs_size[sbs_gpu_setting_position].clone()
|
sbs_size = global_sbs_size[sbs_gpu_setting_position].clone()
|
||||||
|
|
||||||
if input.device != torch.device("cpu"):
|
if are_we_on_a_cpu is False:
|
||||||
if (
|
if (
|
||||||
(sbs_profile.numel() == 1)
|
(sbs_profile.numel() == 1)
|
||||||
or (sbs_size[0] != int(output.shape[0]))
|
or (sbs_size[0] != int(output_work.shape[0]))
|
||||||
or (sbs_size[1] != int(output.shape[1]))
|
or (sbs_size[1] != int(output_work.shape[1]))
|
||||||
or (sbs_size[2] != int(output.shape[2]))
|
or (sbs_size[2] != int(output_work.shape[2]))
|
||||||
or (sbs_size[3] != int(output.shape[3]))
|
or (sbs_size[3] != int(output_work.shape[3]))
|
||||||
):
|
):
|
||||||
sbs_profile = torch.zeros(
|
sbs_profile = torch.zeros(
|
||||||
(14, 7), dtype=torch.int64, device=torch.device("cpu")
|
(14, 7), dtype=torch.int64, device=torch.device("cpu")
|
||||||
)
|
)
|
||||||
|
|
||||||
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position].gpu_occupancy_export(
|
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position].gpu_occupancy_export(
|
||||||
int(output.shape[2]),
|
int(output_work.shape[2]),
|
||||||
int(output.shape[3]),
|
int(output_work.shape[3]),
|
||||||
int(output.shape[0]),
|
int(output_work.shape[0]),
|
||||||
int(output.shape[1]),
|
int(output_work.shape[1]),
|
||||||
sbs_profile.data_ptr(),
|
sbs_profile.data_ptr(),
|
||||||
int(sbs_profile.shape[0]),
|
int(sbs_profile.shape[0]),
|
||||||
int(sbs_profile.shape[1]),
|
int(sbs_profile.shape[1]),
|
||||||
)
|
)
|
||||||
global_sbs_gpu_setting[sbs_gpu_setting_position] = sbs_profile.clone()
|
global_sbs_gpu_setting[sbs_gpu_setting_position] = sbs_profile.clone()
|
||||||
sbs_size[0] = int(output.shape[0])
|
sbs_size[0] = int(output_work.shape[0])
|
||||||
sbs_size[1] = int(output.shape[1])
|
sbs_size[1] = int(output_work.shape[1])
|
||||||
sbs_size[2] = int(output.shape[2])
|
sbs_size[2] = int(output_work.shape[2])
|
||||||
sbs_size[3] = int(output.shape[3])
|
sbs_size[3] = int(output_work.shape[3])
|
||||||
global_sbs_size[sbs_gpu_setting_position] = sbs_size.clone()
|
global_sbs_size[sbs_gpu_setting_position] = sbs_size.clone()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -232,32 +283,41 @@ class FunctionalSbS(torch.autograd.Function):
|
||||||
)
|
)
|
||||||
|
|
||||||
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position].update(
|
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position].update(
|
||||||
output.data_ptr(),
|
output_work.data_ptr(),
|
||||||
int(output.shape[0]),
|
int(output_work.shape[0]),
|
||||||
int(output.shape[1]),
|
int(output_work.shape[1]),
|
||||||
int(output.shape[2]),
|
int(output_work.shape[2]),
|
||||||
int(output.shape[3]),
|
int(output_work.shape[3]),
|
||||||
epsilon_xy.data_ptr() if epsilon_xy is not None else int(0),
|
epsilon_xy_work.data_ptr() if epsilon_xy_work is not None else int(0),
|
||||||
int(epsilon_xy.shape[0]) if epsilon_xy is not None else int(0),
|
int(epsilon_xy_work.shape[0]) if epsilon_xy_work is not None else int(0),
|
||||||
int(epsilon_xy.shape[1]) if epsilon_xy is not None else int(0),
|
int(epsilon_xy_work.shape[1]) if epsilon_xy_work is not None else int(0),
|
||||||
int(epsilon_xy.shape[2]) if epsilon_xy is not None else int(0),
|
int(epsilon_xy_work.shape[2]) if epsilon_xy_work is not None else int(0),
|
||||||
epsilon_t_0.data_ptr(),
|
epsilon_t_0_work.data_ptr(),
|
||||||
int(epsilon_t_0.shape[0]),
|
int(epsilon_t_0_work.shape[0]),
|
||||||
weights.data_ptr(),
|
weights_work.data_ptr(),
|
||||||
int(weights.shape[0]),
|
int(weights_work.shape[0]),
|
||||||
int(weights.shape[1]),
|
int(weights_work.shape[1]),
|
||||||
spikes.data_ptr(),
|
spikes_work.data_ptr(),
|
||||||
int(spikes.shape[0]),
|
int(spikes_work.shape[0]),
|
||||||
int(spikes.shape[1]),
|
int(spikes_work.shape[1]),
|
||||||
int(spikes.shape[2]),
|
int(spikes_work.shape[2]),
|
||||||
int(spikes.shape[3]),
|
int(spikes_work.shape[3]),
|
||||||
h_initial.data_ptr(),
|
h_initial_work.data_ptr(),
|
||||||
int(h_initial.shape[0]),
|
int(h_initial_work.shape[0]),
|
||||||
hdyn_number_of_cpu_processes,
|
hdyn_number_of_cpu_processes,
|
||||||
float(forgetting_offset.cpu().item()),
|
float(forgetting_offset.cpu().item()),
|
||||||
int(gpu_tuning_factor),
|
int(gpu_tuning_factor),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if data_is_on_the_same_device is False:
|
||||||
|
output = output_work.to(target_device)
|
||||||
|
else:
|
||||||
|
output = output_work
|
||||||
|
|
||||||
|
# print(output)
|
||||||
|
# print(output.sum(dim=1))
|
||||||
|
# print(output.sum(dim=1).shape)
|
||||||
|
# exit()
|
||||||
# ###########################################################
|
# ###########################################################
|
||||||
# Save the necessary data for the backward pass
|
# Save the necessary data for the backward pass
|
||||||
# ###########################################################
|
# ###########################################################
|
||||||
|
|
|
@ -142,6 +142,9 @@ class Config:
|
||||||
epsilon_0: float = field(default=1.0)
|
epsilon_0: float = field(default=1.0)
|
||||||
forgetting_offset: float = field(default=-1.0)
|
forgetting_offset: float = field(default=-1.0)
|
||||||
|
|
||||||
|
force_forward_h_dynamic_on_cpu: bool = field(default=True)
|
||||||
|
spike_full_layer_input_distribution: list[bool] = field(default_factory=list)
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
def __post_init__(self) -> None:
|
||||||
"""Post init determines the number of cores.
|
"""Post init determines the number of cores.
|
||||||
Creates the required directory and gives us an optimized
|
Creates the required directory and gives us an optimized
|
||||||
|
|
|
@ -52,7 +52,9 @@ class SbSLayer(torch.nn.Module):
|
||||||
_reduction_cooldown: float = 1.0
|
_reduction_cooldown: float = 1.0
|
||||||
_layer_id: int = -1
|
_layer_id: int = -1
|
||||||
|
|
||||||
spike_full_layer_input_distribution: bool = False
|
_spike_full_layer_input_distribution: bool
|
||||||
|
|
||||||
|
_force_forward_h_dynamic_on_cpu: bool
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -81,6 +83,10 @@ class SbSLayer(torch.nn.Module):
|
||||||
layer_id: int = -1,
|
layer_id: int = -1,
|
||||||
cooldown_after_number_of_spikes: int = -1,
|
cooldown_after_number_of_spikes: int = -1,
|
||||||
reduction_cooldown: float = 1.0,
|
reduction_cooldown: float = 1.0,
|
||||||
|
force_forward_h_dynamic_on_cpu: bool = True,
|
||||||
|
spike_full_layer_input_distribution: bool = False,
|
||||||
|
force_forward_spike_on_cpu: bool = False,
|
||||||
|
force_forward_spike_output_on_cpu: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -109,6 +115,8 @@ class SbSLayer(torch.nn.Module):
|
||||||
self.reduction_cooldown = float(reduction_cooldown)
|
self.reduction_cooldown = float(reduction_cooldown)
|
||||||
self._layer_id = layer_id
|
self._layer_id = layer_id
|
||||||
self._epsilon_xy_use = epsilon_xy_use
|
self._epsilon_xy_use = epsilon_xy_use
|
||||||
|
self._force_forward_h_dynamic_on_cpu = force_forward_h_dynamic_on_cpu
|
||||||
|
self._spike_full_layer_input_distribution = spike_full_layer_input_distribution
|
||||||
|
|
||||||
assert len(input_size) == 2
|
assert len(input_size) == 2
|
||||||
self._input_size = input_size
|
self._input_size = input_size
|
||||||
|
@ -140,6 +148,8 @@ class SbSLayer(torch.nn.Module):
|
||||||
number_of_spikes=self._number_of_spikes,
|
number_of_spikes=self._number_of_spikes,
|
||||||
number_of_cpu_processes=self._number_of_cpu_processes,
|
number_of_cpu_processes=self._number_of_cpu_processes,
|
||||||
device=self.device,
|
device=self.device,
|
||||||
|
force_forward_spike_on_cpu=force_forward_spike_on_cpu,
|
||||||
|
force_forward_spike_output_on_cpu=force_forward_spike_output_on_cpu,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.h_dynamic = HDynamicLayer(
|
self.h_dynamic = HDynamicLayer(
|
||||||
|
@ -152,6 +162,7 @@ class SbSLayer(torch.nn.Module):
|
||||||
device=device,
|
device=device,
|
||||||
default_dtype=self.default_dtype,
|
default_dtype=self.default_dtype,
|
||||||
gpu_tuning_factor=gpu_tuning_factor,
|
gpu_tuning_factor=gpu_tuning_factor,
|
||||||
|
force_forward_h_dynamic_on_cpu=self._force_forward_h_dynamic_on_cpu,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(input_size) >= 2
|
assert len(input_size) >= 2
|
||||||
|
@ -169,10 +180,6 @@ class SbSLayer(torch.nn.Module):
|
||||||
number_of_cpu_processes=number_of_cpu_processes,
|
number_of_cpu_processes=number_of_cpu_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: TEST
|
|
||||||
if layer_id == 0:
|
|
||||||
self.spike_full_layer_input_distribution = True
|
|
||||||
|
|
||||||
# ###############################################################
|
# ###############################################################
|
||||||
# Initialize the weights
|
# Initialize the weights
|
||||||
# ###############################################################
|
# ###############################################################
|
||||||
|
@ -438,7 +445,7 @@ class SbSLayer(torch.nn.Module):
|
||||||
else:
|
else:
|
||||||
assert self._epsilon_xy is None
|
assert self._epsilon_xy is None
|
||||||
|
|
||||||
if self.spike_full_layer_input_distribution is False:
|
if self._spike_full_layer_input_distribution is False:
|
||||||
spike = self.spike_generator(input_convolved, int(self._number_of_spikes))
|
spike = self.spike_generator(input_convolved, int(self._number_of_spikes))
|
||||||
else:
|
else:
|
||||||
input_shape = input.shape
|
input_shape = input.shape
|
||||||
|
@ -457,7 +464,9 @@ class SbSLayer(torch.nn.Module):
|
||||||
(input_shape[0], input_shape[1], input_shape[2], input_shape[3])
|
(input_shape[0], input_shape[1], input_shape[2], input_shape[3])
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
spike = self.spikes_sorter(spike_unsorted).to(device=input_convolved.device)
|
spike = self.spikes_sorter(spike_unsorted)
|
||||||
|
if self._force_forward_h_dynamic_on_cpu is False:
|
||||||
|
spike = spike.to(device=input_convolved.device)
|
||||||
|
|
||||||
output = self.h_dynamic(
|
output = self.h_dynamic(
|
||||||
input=input_convolved,
|
input=input_convolved,
|
||||||
|
|
|
@ -15,12 +15,16 @@ class SpikeLayer(torch.nn.Module):
|
||||||
_number_of_cpu_processes: int
|
_number_of_cpu_processes: int
|
||||||
_number_of_spikes: int
|
_number_of_spikes: int
|
||||||
device: torch.device
|
device: torch.device
|
||||||
|
_force_forward_spike_on_cpu: bool
|
||||||
|
_force_forward_spike_output_on_cpu: bool
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
number_of_spikes: int = -1,
|
number_of_spikes: int = -1,
|
||||||
number_of_cpu_processes: int = 1,
|
number_of_cpu_processes: int = 1,
|
||||||
device: torch.device | None = None,
|
device: torch.device | None = None,
|
||||||
|
force_forward_spike_on_cpu: bool = False,
|
||||||
|
force_forward_spike_output_on_cpu: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -29,11 +33,15 @@ class SpikeLayer(torch.nn.Module):
|
||||||
|
|
||||||
self._number_of_cpu_processes = number_of_cpu_processes
|
self._number_of_cpu_processes = number_of_cpu_processes
|
||||||
self._number_of_spikes = number_of_spikes
|
self._number_of_spikes = number_of_spikes
|
||||||
|
self._force_forward_spike_on_cpu = force_forward_spike_on_cpu
|
||||||
|
self._force_forward_spike_output_on_cpu = force_forward_spike_output_on_cpu
|
||||||
|
|
||||||
global_spike_generation_gpu_setting.append(torch.tensor([0]))
|
global_spike_generation_gpu_setting.append(torch.tensor([0]))
|
||||||
global_spike_size.append(torch.tensor([0, 0, 0, 0]))
|
global_spike_size.append(torch.tensor([0, 0, 0, 0]))
|
||||||
|
|
||||||
if device == torch.device("cpu"):
|
if (device == torch.device("cpu")) or (
|
||||||
|
self._force_forward_spike_on_cpu is True
|
||||||
|
):
|
||||||
global_spike_generation_cpp.append(SpikeGenerationCPU())
|
global_spike_generation_cpp.append(SpikeGenerationCPU())
|
||||||
else:
|
else:
|
||||||
global_spike_generation_cpp.append(SpikeGenerationGPU())
|
global_spike_generation_cpp.append(SpikeGenerationGPU())
|
||||||
|
@ -66,6 +74,7 @@ class SpikeLayer(torch.nn.Module):
|
||||||
int(self._spike_generation_cpp_position), # 1
|
int(self._spike_generation_cpp_position), # 1
|
||||||
int(self._spike_generation_gpu_setting_position), # 2
|
int(self._spike_generation_gpu_setting_position), # 2
|
||||||
int(number_of_spikes), # 3
|
int(number_of_spikes), # 3
|
||||||
|
int(self._force_forward_spike_output_on_cpu), # 4
|
||||||
],
|
],
|
||||||
dtype=torch.int64,
|
dtype=torch.int64,
|
||||||
)
|
)
|
||||||
|
@ -83,14 +92,35 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
|
|
||||||
assert input.dim() == 4
|
assert input.dim() == 4
|
||||||
|
|
||||||
if input.device == torch.device("cpu"):
|
|
||||||
spike_number_of_cpu_processes: int = int(parameter_list[0])
|
|
||||||
else:
|
|
||||||
spike_number_of_cpu_processes = -1
|
|
||||||
|
|
||||||
spike_generation_cpp_position = int(parameter_list[1])
|
spike_generation_cpp_position = int(parameter_list[1])
|
||||||
spike_generation_gpu_setting_position = int(parameter_list[2])
|
spike_generation_gpu_setting_position = int(parameter_list[2])
|
||||||
number_of_spikes: int = int(parameter_list[3])
|
number_of_spikes: int = int(parameter_list[3])
|
||||||
|
force_forward_spike_output_on_cpu: bool = bool(parameter_list[4])
|
||||||
|
|
||||||
|
if (
|
||||||
|
isinstance(
|
||||||
|
global_spike_generation_cpp[spike_generation_cpp_position],
|
||||||
|
SpikeGenerationCPU,
|
||||||
|
)
|
||||||
|
is True
|
||||||
|
):
|
||||||
|
are_we_on_a_cpu: bool = True
|
||||||
|
work_device: torch.device = torch.device("cpu")
|
||||||
|
else:
|
||||||
|
are_we_on_a_cpu = False
|
||||||
|
work_device = input.device
|
||||||
|
|
||||||
|
target_device: torch.device = input.device
|
||||||
|
|
||||||
|
if target_device == work_device:
|
||||||
|
data_is_on_the_same_device: bool = True
|
||||||
|
else:
|
||||||
|
data_is_on_the_same_device = False
|
||||||
|
|
||||||
|
if are_we_on_a_cpu is True:
|
||||||
|
spike_number_of_cpu_processes: int = int(parameter_list[0])
|
||||||
|
else:
|
||||||
|
spike_number_of_cpu_processes = -1
|
||||||
|
|
||||||
# ###########################################################
|
# ###########################################################
|
||||||
# Spike generation
|
# Spike generation
|
||||||
|
@ -100,7 +130,12 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
# Normalized cumsum
|
# Normalized cumsum
|
||||||
# (beware of the pytorch bug! Thus .clone()!)
|
# (beware of the pytorch bug! Thus .clone()!)
|
||||||
# ############################################
|
# ############################################
|
||||||
input_cumsum: torch.Tensor = torch.cumsum(input, dim=1, dtype=input.dtype)
|
if data_is_on_the_same_device is False:
|
||||||
|
input_work = input.to(work_device)
|
||||||
|
else:
|
||||||
|
input_work = input
|
||||||
|
# input_work = input
|
||||||
|
input_cumsum: torch.Tensor = torch.cumsum(input_work, dim=1, dtype=input.dtype)
|
||||||
input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1).clone()
|
input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1).clone()
|
||||||
input_cumsum /= input_cumsum_last
|
input_cumsum /= input_cumsum_last
|
||||||
|
|
||||||
|
@ -115,17 +150,19 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
input_cumsum.shape[3],
|
input_cumsum.shape[3],
|
||||||
],
|
],
|
||||||
dtype=input.dtype,
|
dtype=input.dtype,
|
||||||
device=input.device,
|
device=work_device,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ############################################
|
# ############################################
|
||||||
# Make space for the results
|
# Make space for the results
|
||||||
# ############################################
|
# ############################################
|
||||||
spikes = torch.empty_like(random_values, dtype=torch.int64, device=input.device)
|
spikes_work = torch.empty_like(
|
||||||
|
random_values, dtype=torch.int64, device=work_device
|
||||||
|
)
|
||||||
|
|
||||||
assert input_cumsum.is_contiguous() is True
|
assert input_cumsum.is_contiguous() is True
|
||||||
assert random_values.is_contiguous() is True
|
assert random_values.is_contiguous() is True
|
||||||
assert spikes.is_contiguous() is True
|
assert spikes_work.is_contiguous() is True
|
||||||
|
|
||||||
# time_start: float = time.perf_counter()
|
# time_start: float = time.perf_counter()
|
||||||
spike_generation_profile = global_spike_generation_gpu_setting[
|
spike_generation_profile = global_spike_generation_gpu_setting[
|
||||||
|
@ -136,19 +173,13 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
spike_generation_gpu_setting_position
|
spike_generation_gpu_setting_position
|
||||||
].clone()
|
].clone()
|
||||||
|
|
||||||
if (
|
if are_we_on_a_cpu is False:
|
||||||
isinstance(
|
|
||||||
global_spike_generation_cpp[spike_generation_cpp_position],
|
|
||||||
SpikeGenerationGPU,
|
|
||||||
)
|
|
||||||
is True
|
|
||||||
):
|
|
||||||
if (
|
if (
|
||||||
(spike_generation_profile.numel() == 1)
|
(spike_generation_profile.numel() == 1)
|
||||||
or (spike_generation_size[0] != int(spikes.shape[0]))
|
or (spike_generation_size[0] != int(spikes_work.shape[0]))
|
||||||
or (spike_generation_size[1] != int(spikes.shape[1]))
|
or (spike_generation_size[1] != int(spikes_work.shape[1]))
|
||||||
or (spike_generation_size[2] != int(spikes.shape[2]))
|
or (spike_generation_size[2] != int(spikes_work.shape[2]))
|
||||||
or (spike_generation_size[3] != int(spikes.shape[3]))
|
or (spike_generation_size[3] != int(spikes_work.shape[3]))
|
||||||
):
|
):
|
||||||
|
|
||||||
spike_generation_profile = torch.zeros(
|
spike_generation_profile = torch.zeros(
|
||||||
|
@ -157,10 +188,10 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
global_spike_generation_cpp[
|
global_spike_generation_cpp[
|
||||||
spike_generation_cpp_position
|
spike_generation_cpp_position
|
||||||
].gpu_occupancy_export(
|
].gpu_occupancy_export(
|
||||||
int(spikes.shape[2]),
|
int(spikes_work.shape[2]),
|
||||||
int(spikes.shape[3]),
|
int(spikes_work.shape[3]),
|
||||||
int(spikes.shape[0]),
|
int(spikes_work.shape[0]),
|
||||||
int(spikes.shape[1]),
|
int(spikes_work.shape[1]),
|
||||||
spike_generation_profile.data_ptr(),
|
spike_generation_profile.data_ptr(),
|
||||||
int(spike_generation_profile.shape[0]),
|
int(spike_generation_profile.shape[0]),
|
||||||
int(spike_generation_profile.shape[1]),
|
int(spike_generation_profile.shape[1]),
|
||||||
|
@ -169,10 +200,10 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
spike_generation_gpu_setting_position
|
spike_generation_gpu_setting_position
|
||||||
] = spike_generation_profile.clone()
|
] = spike_generation_profile.clone()
|
||||||
|
|
||||||
spike_generation_size[0] = int(spikes.shape[0])
|
spike_generation_size[0] = int(spikes_work.shape[0])
|
||||||
spike_generation_size[1] = int(spikes.shape[1])
|
spike_generation_size[1] = int(spikes_work.shape[1])
|
||||||
spike_generation_size[2] = int(spikes.shape[2])
|
spike_generation_size[2] = int(spikes_work.shape[2])
|
||||||
spike_generation_size[3] = int(spikes.shape[3])
|
spike_generation_size[3] = int(spikes_work.shape[3])
|
||||||
global_spike_size[
|
global_spike_size[
|
||||||
spike_generation_gpu_setting_position
|
spike_generation_gpu_setting_position
|
||||||
] = spike_generation_size.clone()
|
] = spike_generation_size.clone()
|
||||||
|
@ -197,15 +228,20 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
|
||||||
int(random_values.shape[1]),
|
int(random_values.shape[1]),
|
||||||
int(random_values.shape[2]),
|
int(random_values.shape[2]),
|
||||||
int(random_values.shape[3]),
|
int(random_values.shape[3]),
|
||||||
spikes.data_ptr(),
|
spikes_work.data_ptr(),
|
||||||
int(spikes.shape[0]),
|
int(spikes_work.shape[0]),
|
||||||
int(spikes.shape[1]),
|
int(spikes_work.shape[1]),
|
||||||
int(spikes.shape[2]),
|
int(spikes_work.shape[2]),
|
||||||
int(spikes.shape[3]),
|
int(spikes_work.shape[3]),
|
||||||
int(spike_number_of_cpu_processes),
|
int(spike_number_of_cpu_processes),
|
||||||
)
|
)
|
||||||
del random_values
|
|
||||||
del input_cumsum
|
if (force_forward_spike_output_on_cpu is True) and (are_we_on_a_cpu is True):
|
||||||
|
spikes = spikes_work
|
||||||
|
elif data_is_on_the_same_device is False:
|
||||||
|
spikes = spikes_work.to(target_device)
|
||||||
|
else:
|
||||||
|
spikes = spikes_work
|
||||||
|
|
||||||
return spikes
|
return spikes
|
||||||
|
|
||||||
|
|
|
@ -120,6 +120,12 @@ def build_network(
|
||||||
cfg.learning_parameters.sbs_skip_gradient_calculation[0]
|
cfg.learning_parameters.sbs_skip_gradient_calculation[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
spike_full_layer_input_distribution: bool = False
|
||||||
|
if len(cfg.spike_full_layer_input_distribution) > layer_id:
|
||||||
|
spike_full_layer_input_distribution = (
|
||||||
|
cfg.spike_full_layer_input_distribution[layer_id]
|
||||||
|
)
|
||||||
|
|
||||||
# #############################################################
|
# #############################################################
|
||||||
# SbS layer:
|
# SbS layer:
|
||||||
# #############################################################
|
# #############################################################
|
||||||
|
@ -138,7 +144,10 @@ def build_network(
|
||||||
assert number_of_spikes > 0
|
assert number_of_spikes > 0
|
||||||
|
|
||||||
logging.info(
|
logging.info(
|
||||||
|
(
|
||||||
f"Layer: {layer_id} -> SbS Layer with {number_of_spikes} spikes "
|
f"Layer: {layer_id} -> SbS Layer with {number_of_spikes} spikes "
|
||||||
|
f"-- draw spike from full layer: {spike_full_layer_input_distribution}"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
is_pooling_layer: bool = False
|
is_pooling_layer: bool = False
|
||||||
if cfg.network_structure.layer_type[layer_id].upper().find("POOLING") != -1:
|
if cfg.network_structure.layer_type[layer_id].upper().find("POOLING") != -1:
|
||||||
|
@ -169,6 +178,8 @@ def build_network(
|
||||||
layer_id=layer_id,
|
layer_id=layer_id,
|
||||||
cooldown_after_number_of_spikes=cfg.cooldown_after_number_of_spikes,
|
cooldown_after_number_of_spikes=cfg.cooldown_after_number_of_spikes,
|
||||||
reduction_cooldown=cfg.reduction_cooldown,
|
reduction_cooldown=cfg.reduction_cooldown,
|
||||||
|
force_forward_h_dynamic_on_cpu=cfg.force_forward_h_dynamic_on_cpu,
|
||||||
|
spike_full_layer_input_distribution=spike_full_layer_input_distribution,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# Adding the x,y output dimensions
|
# Adding the x,y output dimensions
|
||||||
|
|
Loading…
Reference in a new issue