Add files via upload

This commit is contained in:
David Rotermund 2023-02-06 09:55:30 +01:00 committed by GitHub
parent 7bea490c5f
commit a537f3e356
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 206 additions and 87 deletions

View file

@ -21,6 +21,8 @@ class HDynamicLayer(torch.nn.Module):
device: torch.device
default_dtype: torch.dtype
_force_forward_h_dynamic_on_cpu: bool
def __init__(
self,
output_size: list[int],
@ -32,6 +34,7 @@ class HDynamicLayer(torch.nn.Module):
device: torch.device | None = None,
default_dtype: torch.dtype | None = None,
gpu_tuning_factor: int = 5,
force_forward_h_dynamic_on_cpu: bool = False,
) -> None:
super().__init__()
@ -46,11 +49,14 @@ class HDynamicLayer(torch.nn.Module):
self._output_size = output_size
self._output_layer = bool(output_layer)
self._local_learning = bool(local_learning)
self._force_forward_h_dynamic_on_cpu = force_forward_h_dynamic_on_cpu
global_sbs_gpu_setting.append(torch.tensor([0]))
global_sbs_size.append(torch.tensor([0, 0, 0, 0]))
if device == torch.device("cpu"):
if (device == torch.device("cpu")) or (
self._force_forward_h_dynamic_on_cpu is True
):
global_sbs_hdynamic_cpp.append(HDynamicCNNCPU())
else:
global_sbs_hdynamic_cpp.append(HDynamicCNNGPU())
@ -146,11 +152,6 @@ class FunctionalSbS(torch.autograd.Function):
number_of_spikes: int = int(spikes.shape[1])
if input.device == torch.device("cpu"):
hdyn_number_of_cpu_processes: int = int(parameter_list[0])
else:
hdyn_number_of_cpu_processes = -1
output_size_0: int = int(parameter_list[1])
output_size_1: int = int(parameter_list[2])
gpu_tuning_factor: int = int(parameter_list[3])
@ -158,6 +159,30 @@ class FunctionalSbS(torch.autograd.Function):
sbs_gpu_setting_position = int(parameter_list[4])
sbs_hdynamic_cpp_position = int(parameter_list[5])
if (
isinstance(
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position], HDynamicCNNCPU
)
is True
):
are_we_on_a_cpu: bool = True
work_device: torch.device = torch.device("cpu")
else:
are_we_on_a_cpu = False
work_device = input.device
target_device: torch.device = input.device
if target_device == work_device:
data_is_on_the_same_device: bool = True
else:
data_is_on_the_same_device = False
if are_we_on_a_cpu is True:
hdyn_number_of_cpu_processes: int = int(parameter_list[0])
else:
hdyn_number_of_cpu_processes = -1
# ###########################################################
# H dynamic
# ###########################################################
@ -169,7 +194,7 @@ class FunctionalSbS(torch.autograd.Function):
# Make space for the results
# ############################################
output = torch.empty(
output_work: torch.Tensor = torch.empty(
(
int(input.shape[0]),
int(weights.shape[1]),
@ -177,17 +202,43 @@ class FunctionalSbS(torch.autograd.Function):
output_size_1,
),
dtype=input.dtype,
device=input.device,
device=work_device,
)
assert output.is_contiguous() is True
assert output_work.is_contiguous() is True
if epsilon_xy is not None:
assert epsilon_xy.is_contiguous() is True
assert epsilon_xy.ndim == 3
if data_is_on_the_same_device is False:
epsilon_xy_work = epsilon_xy.to(work_device)
else:
epsilon_xy_work = epsilon_xy
else:
epsilon_xy_work = None
assert epsilon_t_0.is_contiguous() is True
if data_is_on_the_same_device is False:
epsilon_t_0_work = epsilon_t_0.to(work_device)
else:
epsilon_t_0_work = epsilon_t_0
assert weights.is_contiguous() is True
if data_is_on_the_same_device is False:
weights_work = weights.to(work_device)
else:
weights_work = weights
assert spikes.is_contiguous() is True
if data_is_on_the_same_device is False:
spikes_work = spikes.to(work_device)
else:
spikes_work = spikes
assert h_initial.is_contiguous() is True
if data_is_on_the_same_device is False:
h_initial_work = h_initial.to(work_device)
else:
h_initial_work = h_initial
assert weights.ndim == 2
assert h_initial.ndim == 1
@ -196,32 +247,32 @@ class FunctionalSbS(torch.autograd.Function):
sbs_size = global_sbs_size[sbs_gpu_setting_position].clone()
if input.device != torch.device("cpu"):
if are_we_on_a_cpu is False:
if (
(sbs_profile.numel() == 1)
or (sbs_size[0] != int(output.shape[0]))
or (sbs_size[1] != int(output.shape[1]))
or (sbs_size[2] != int(output.shape[2]))
or (sbs_size[3] != int(output.shape[3]))
or (sbs_size[0] != int(output_work.shape[0]))
or (sbs_size[1] != int(output_work.shape[1]))
or (sbs_size[2] != int(output_work.shape[2]))
or (sbs_size[3] != int(output_work.shape[3]))
):
sbs_profile = torch.zeros(
(14, 7), dtype=torch.int64, device=torch.device("cpu")
)
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position].gpu_occupancy_export(
int(output.shape[2]),
int(output.shape[3]),
int(output.shape[0]),
int(output.shape[1]),
int(output_work.shape[2]),
int(output_work.shape[3]),
int(output_work.shape[0]),
int(output_work.shape[1]),
sbs_profile.data_ptr(),
int(sbs_profile.shape[0]),
int(sbs_profile.shape[1]),
)
global_sbs_gpu_setting[sbs_gpu_setting_position] = sbs_profile.clone()
sbs_size[0] = int(output.shape[0])
sbs_size[1] = int(output.shape[1])
sbs_size[2] = int(output.shape[2])
sbs_size[3] = int(output.shape[3])
sbs_size[0] = int(output_work.shape[0])
sbs_size[1] = int(output_work.shape[1])
sbs_size[2] = int(output_work.shape[2])
sbs_size[3] = int(output_work.shape[3])
global_sbs_size[sbs_gpu_setting_position] = sbs_size.clone()
else:
@ -232,32 +283,41 @@ class FunctionalSbS(torch.autograd.Function):
)
global_sbs_hdynamic_cpp[sbs_hdynamic_cpp_position].update(
output.data_ptr(),
int(output.shape[0]),
int(output.shape[1]),
int(output.shape[2]),
int(output.shape[3]),
epsilon_xy.data_ptr() if epsilon_xy is not None else int(0),
int(epsilon_xy.shape[0]) if epsilon_xy is not None else int(0),
int(epsilon_xy.shape[1]) if epsilon_xy is not None else int(0),
int(epsilon_xy.shape[2]) if epsilon_xy is not None else int(0),
epsilon_t_0.data_ptr(),
int(epsilon_t_0.shape[0]),
weights.data_ptr(),
int(weights.shape[0]),
int(weights.shape[1]),
spikes.data_ptr(),
int(spikes.shape[0]),
int(spikes.shape[1]),
int(spikes.shape[2]),
int(spikes.shape[3]),
h_initial.data_ptr(),
int(h_initial.shape[0]),
output_work.data_ptr(),
int(output_work.shape[0]),
int(output_work.shape[1]),
int(output_work.shape[2]),
int(output_work.shape[3]),
epsilon_xy_work.data_ptr() if epsilon_xy_work is not None else int(0),
int(epsilon_xy_work.shape[0]) if epsilon_xy_work is not None else int(0),
int(epsilon_xy_work.shape[1]) if epsilon_xy_work is not None else int(0),
int(epsilon_xy_work.shape[2]) if epsilon_xy_work is not None else int(0),
epsilon_t_0_work.data_ptr(),
int(epsilon_t_0_work.shape[0]),
weights_work.data_ptr(),
int(weights_work.shape[0]),
int(weights_work.shape[1]),
spikes_work.data_ptr(),
int(spikes_work.shape[0]),
int(spikes_work.shape[1]),
int(spikes_work.shape[2]),
int(spikes_work.shape[3]),
h_initial_work.data_ptr(),
int(h_initial_work.shape[0]),
hdyn_number_of_cpu_processes,
float(forgetting_offset.cpu().item()),
int(gpu_tuning_factor),
)
if data_is_on_the_same_device is False:
output = output_work.to(target_device)
else:
output = output_work
# print(output)
# print(output.sum(dim=1))
# print(output.sum(dim=1).shape)
# exit()
# ###########################################################
# Save the necessary data for the backward pass
# ###########################################################

View file

@ -142,6 +142,9 @@ class Config:
epsilon_0: float = field(default=1.0)
forgetting_offset: float = field(default=-1.0)
force_forward_h_dynamic_on_cpu: bool = field(default=True)
spike_full_layer_input_distribution: list[bool] = field(default_factory=list)
def __post_init__(self) -> None:
"""Post init determines the number of cores.
Creates the required directory and gives us an optimized

View file

@ -52,7 +52,9 @@ class SbSLayer(torch.nn.Module):
_reduction_cooldown: float = 1.0
_layer_id: int = -1
spike_full_layer_input_distribution: bool = False
_spike_full_layer_input_distribution: bool
_force_forward_h_dynamic_on_cpu: bool
def __init__(
self,
@ -81,6 +83,10 @@ class SbSLayer(torch.nn.Module):
layer_id: int = -1,
cooldown_after_number_of_spikes: int = -1,
reduction_cooldown: float = 1.0,
force_forward_h_dynamic_on_cpu: bool = True,
spike_full_layer_input_distribution: bool = False,
force_forward_spike_on_cpu: bool = False,
force_forward_spike_output_on_cpu: bool = False,
) -> None:
super().__init__()
@ -109,6 +115,8 @@ class SbSLayer(torch.nn.Module):
self.reduction_cooldown = float(reduction_cooldown)
self._layer_id = layer_id
self._epsilon_xy_use = epsilon_xy_use
self._force_forward_h_dynamic_on_cpu = force_forward_h_dynamic_on_cpu
self._spike_full_layer_input_distribution = spike_full_layer_input_distribution
assert len(input_size) == 2
self._input_size = input_size
@ -140,6 +148,8 @@ class SbSLayer(torch.nn.Module):
number_of_spikes=self._number_of_spikes,
number_of_cpu_processes=self._number_of_cpu_processes,
device=self.device,
force_forward_spike_on_cpu=force_forward_spike_on_cpu,
force_forward_spike_output_on_cpu=force_forward_spike_output_on_cpu,
)
self.h_dynamic = HDynamicLayer(
@ -152,6 +162,7 @@ class SbSLayer(torch.nn.Module):
device=device,
default_dtype=self.default_dtype,
gpu_tuning_factor=gpu_tuning_factor,
force_forward_h_dynamic_on_cpu=self._force_forward_h_dynamic_on_cpu,
)
assert len(input_size) >= 2
@ -169,10 +180,6 @@ class SbSLayer(torch.nn.Module):
number_of_cpu_processes=number_of_cpu_processes,
)
# TODO: TEST
if layer_id == 0:
self.spike_full_layer_input_distribution = True
# ###############################################################
# Initialize the weights
# ###############################################################
@ -438,7 +445,7 @@ class SbSLayer(torch.nn.Module):
else:
assert self._epsilon_xy is None
if self.spike_full_layer_input_distribution is False:
if self._spike_full_layer_input_distribution is False:
spike = self.spike_generator(input_convolved, int(self._number_of_spikes))
else:
input_shape = input.shape
@ -457,7 +464,9 @@ class SbSLayer(torch.nn.Module):
(input_shape[0], input_shape[1], input_shape[2], input_shape[3])
)
)
spike = self.spikes_sorter(spike_unsorted).to(device=input_convolved.device)
spike = self.spikes_sorter(spike_unsorted)
if self._force_forward_h_dynamic_on_cpu is False:
spike = spike.to(device=input_convolved.device)
output = self.h_dynamic(
input=input_convolved,

View file

@ -15,12 +15,16 @@ class SpikeLayer(torch.nn.Module):
_number_of_cpu_processes: int
_number_of_spikes: int
device: torch.device
_force_forward_spike_on_cpu: bool
_force_forward_spike_output_on_cpu: bool
def __init__(
self,
number_of_spikes: int = -1,
number_of_cpu_processes: int = 1,
device: torch.device | None = None,
force_forward_spike_on_cpu: bool = False,
force_forward_spike_output_on_cpu: bool = False,
) -> None:
super().__init__()
@ -29,11 +33,15 @@ class SpikeLayer(torch.nn.Module):
self._number_of_cpu_processes = number_of_cpu_processes
self._number_of_spikes = number_of_spikes
self._force_forward_spike_on_cpu = force_forward_spike_on_cpu
self._force_forward_spike_output_on_cpu = force_forward_spike_output_on_cpu
global_spike_generation_gpu_setting.append(torch.tensor([0]))
global_spike_size.append(torch.tensor([0, 0, 0, 0]))
if device == torch.device("cpu"):
if (device == torch.device("cpu")) or (
self._force_forward_spike_on_cpu is True
):
global_spike_generation_cpp.append(SpikeGenerationCPU())
else:
global_spike_generation_cpp.append(SpikeGenerationGPU())
@ -66,6 +74,7 @@ class SpikeLayer(torch.nn.Module):
int(self._spike_generation_cpp_position), # 1
int(self._spike_generation_gpu_setting_position), # 2
int(number_of_spikes), # 3
int(self._force_forward_spike_output_on_cpu), # 4
],
dtype=torch.int64,
)
@ -83,14 +92,35 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
assert input.dim() == 4
if input.device == torch.device("cpu"):
spike_number_of_cpu_processes: int = int(parameter_list[0])
else:
spike_number_of_cpu_processes = -1
spike_generation_cpp_position = int(parameter_list[1])
spike_generation_gpu_setting_position = int(parameter_list[2])
number_of_spikes: int = int(parameter_list[3])
force_forward_spike_output_on_cpu: bool = bool(parameter_list[4])
if (
isinstance(
global_spike_generation_cpp[spike_generation_cpp_position],
SpikeGenerationCPU,
)
is True
):
are_we_on_a_cpu: bool = True
work_device: torch.device = torch.device("cpu")
else:
are_we_on_a_cpu = False
work_device = input.device
target_device: torch.device = input.device
if target_device == work_device:
data_is_on_the_same_device: bool = True
else:
data_is_on_the_same_device = False
if are_we_on_a_cpu is True:
spike_number_of_cpu_processes: int = int(parameter_list[0])
else:
spike_number_of_cpu_processes = -1
# ###########################################################
# Spike generation
@ -100,7 +130,12 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
# Normalized cumsum
# (beware of the pytorch bug! Thus .clone()!)
# ############################################
input_cumsum: torch.Tensor = torch.cumsum(input, dim=1, dtype=input.dtype)
if data_is_on_the_same_device is False:
input_work = input.to(work_device)
else:
input_work = input
# input_work = input
input_cumsum: torch.Tensor = torch.cumsum(input_work, dim=1, dtype=input.dtype)
input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1).clone()
input_cumsum /= input_cumsum_last
@ -115,17 +150,19 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
input_cumsum.shape[3],
],
dtype=input.dtype,
device=input.device,
device=work_device,
)
# ############################################
# Make space for the results
# ############################################
spikes = torch.empty_like(random_values, dtype=torch.int64, device=input.device)
spikes_work = torch.empty_like(
random_values, dtype=torch.int64, device=work_device
)
assert input_cumsum.is_contiguous() is True
assert random_values.is_contiguous() is True
assert spikes.is_contiguous() is True
assert spikes_work.is_contiguous() is True
# time_start: float = time.perf_counter()
spike_generation_profile = global_spike_generation_gpu_setting[
@ -136,19 +173,13 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
spike_generation_gpu_setting_position
].clone()
if (
isinstance(
global_spike_generation_cpp[spike_generation_cpp_position],
SpikeGenerationGPU,
)
is True
):
if are_we_on_a_cpu is False:
if (
(spike_generation_profile.numel() == 1)
or (spike_generation_size[0] != int(spikes.shape[0]))
or (spike_generation_size[1] != int(spikes.shape[1]))
or (spike_generation_size[2] != int(spikes.shape[2]))
or (spike_generation_size[3] != int(spikes.shape[3]))
or (spike_generation_size[0] != int(spikes_work.shape[0]))
or (spike_generation_size[1] != int(spikes_work.shape[1]))
or (spike_generation_size[2] != int(spikes_work.shape[2]))
or (spike_generation_size[3] != int(spikes_work.shape[3]))
):
spike_generation_profile = torch.zeros(
@ -157,10 +188,10 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
global_spike_generation_cpp[
spike_generation_cpp_position
].gpu_occupancy_export(
int(spikes.shape[2]),
int(spikes.shape[3]),
int(spikes.shape[0]),
int(spikes.shape[1]),
int(spikes_work.shape[2]),
int(spikes_work.shape[3]),
int(spikes_work.shape[0]),
int(spikes_work.shape[1]),
spike_generation_profile.data_ptr(),
int(spike_generation_profile.shape[0]),
int(spike_generation_profile.shape[1]),
@ -169,10 +200,10 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
spike_generation_gpu_setting_position
] = spike_generation_profile.clone()
spike_generation_size[0] = int(spikes.shape[0])
spike_generation_size[1] = int(spikes.shape[1])
spike_generation_size[2] = int(spikes.shape[2])
spike_generation_size[3] = int(spikes.shape[3])
spike_generation_size[0] = int(spikes_work.shape[0])
spike_generation_size[1] = int(spikes_work.shape[1])
spike_generation_size[2] = int(spikes_work.shape[2])
spike_generation_size[3] = int(spikes_work.shape[3])
global_spike_size[
spike_generation_gpu_setting_position
] = spike_generation_size.clone()
@ -197,15 +228,20 @@ class FunctionalSpikeGeneration(torch.autograd.Function):
int(random_values.shape[1]),
int(random_values.shape[2]),
int(random_values.shape[3]),
spikes.data_ptr(),
int(spikes.shape[0]),
int(spikes.shape[1]),
int(spikes.shape[2]),
int(spikes.shape[3]),
spikes_work.data_ptr(),
int(spikes_work.shape[0]),
int(spikes_work.shape[1]),
int(spikes_work.shape[2]),
int(spikes_work.shape[3]),
int(spike_number_of_cpu_processes),
)
del random_values
del input_cumsum
if (force_forward_spike_output_on_cpu is True) and (are_we_on_a_cpu is True):
spikes = spikes_work
elif data_is_on_the_same_device is False:
spikes = spikes_work.to(target_device)
else:
spikes = spikes_work
return spikes

View file

@ -120,6 +120,12 @@ def build_network(
cfg.learning_parameters.sbs_skip_gradient_calculation[0]
)
spike_full_layer_input_distribution: bool = False
if len(cfg.spike_full_layer_input_distribution) > layer_id:
spike_full_layer_input_distribution = (
cfg.spike_full_layer_input_distribution[layer_id]
)
# #############################################################
# SbS layer:
# #############################################################
@ -138,7 +144,10 @@ def build_network(
assert number_of_spikes > 0
logging.info(
(
f"Layer: {layer_id} -> SbS Layer with {number_of_spikes} spikes "
f"-- draw spike from full layer: {spike_full_layer_input_distribution}"
)
)
is_pooling_layer: bool = False
if cfg.network_structure.layer_type[layer_id].upper().find("POOLING") != -1:
@ -169,6 +178,8 @@ def build_network(
layer_id=layer_id,
cooldown_after_number_of_spikes=cfg.cooldown_after_number_of_spikes,
reduction_cooldown=cfg.reduction_cooldown,
force_forward_h_dynamic_on_cpu=cfg.force_forward_h_dynamic_on_cpu,
spike_full_layer_input_distribution=spike_full_layer_input_distribution,
)
)
# Adding the x,y output dimensions