# MIT License # Copyright 2022 University of Bremen # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR # THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # # David Rotermund ( davrot@uni-bremen.de ) # # # Release history: # ================ # 1.0.0 -- 01.05.2022: first release # # # %% import torch import numpy as np try: import PySpikeGeneration2DManyIP cpp_spike: bool = True except Exception: cpp_spike = False try: import PyHDynamicCNNManyIP cpp_sbs: bool = True except Exception: cpp_sbs = False class SbS(torch.nn.Module): _epsilon_xy: torch.nn.parameter.Parameter _epsilon_xy_exists: bool = False _epsilon_0: torch.Tensor | None = None _epsilon_t: torch.Tensor | None = None _weights: torch.nn.parameter.Parameter _weights_exists: bool = False _kernel_size: torch.Tensor | None = None _stride: torch.Tensor | None = None _dilation: torch.Tensor | None = None _padding: torch.Tensor | None = None _output_size: torch.Tensor | None = None _number_of_spikes: torch.Tensor | None = None _number_of_cpu_processes: torch.Tensor | None = None _number_of_neurons: torch.Tensor | None = None _number_of_input_neurons: torch.Tensor | None = None _h_initial: torch.Tensor | None = None _epsilon_xy_backup: torch.Tensor | None = None _weights_backup: torch.Tensor | None = None _alpha_number_of_iterations: torch.Tensor | None = None def __init__( self, number_of_input_neurons: int, number_of_neurons: int, input_size: list[int], forward_kernel_size: list[int], number_of_spikes: int, epsilon_t: torch.Tensor, epsilon_xy_intitial: float = 0.1, epsilon_0: float = 1.0, weight_noise_amplitude: float = 0.01, is_pooling_layer: bool = False, strides: list[int] = [1, 1], dilation: list[int] = [0, 0], padding: list[int] = [0, 0], alpha_number_of_iterations: int = 0, number_of_cpu_processes: int = 1, ) -> None: """Constructor""" super().__init__() self.stride = torch.tensor(strides, dtype=torch.int64) self.dilation = torch.tensor(dilation, dtype=torch.int64) self.padding = torch.tensor(padding, dtype=torch.int64) self.kernel_size = torch.tensor( forward_kernel_size, dtype=torch.int64, ) self.number_of_input_neurons = torch.tensor( number_of_input_neurons, dtype=torch.int64, ) self.number_of_neurons = torch.tensor( number_of_neurons, dtype=torch.int64, ) self.alpha_number_of_iterations = torch.tensor( alpha_number_of_iterations, dtype=torch.int64 ) self.calculate_output_size(torch.tensor(input_size, dtype=torch.int64)) self.set_h_init_to_uniform() self.initialize_epsilon_xy(epsilon_xy_intitial) self.epsilon_0 = torch.tensor(epsilon_0, dtype=torch.float32) self.number_of_cpu_processes = torch.tensor( number_of_cpu_processes, dtype=torch.int64 ) self.number_of_spikes = torch.tensor(number_of_spikes, dtype=torch.int64) self.epsilon_t = epsilon_t.type(dtype=torch.float32) self.initialize_weights( is_pooling_layer=is_pooling_layer, noise_amplitude=weight_noise_amplitude, ) self.functional_sbs = FunctionalSbS.apply #################################################################### # Variables in and out # #################################################################### @property def epsilon_xy(self) -> torch.Tensor | None: if self._epsilon_xy_exists is False: return None else: return self._epsilon_xy @epsilon_xy.setter def epsilon_xy(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 4 assert value.dtype == torch.float32 if self._epsilon_xy_exists is False: self._epsilon_xy = torch.nn.parameter.Parameter( value.detach().clone(memory_format=torch.contiguous_format), requires_grad=True, ) self._epsilon_xy_exists = True else: self._epsilon_xy.data = value.detach().clone( memory_format=torch.contiguous_format ) @property def epsilon_0(self) -> torch.Tensor | None: return self._epsilon_0 @epsilon_0.setter def epsilon_0(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert torch.numel(value) == 1 assert value.dtype == torch.float32 assert value.item() > 0 self._epsilon_0 = value.detach().clone(memory_format=torch.contiguous_format) self._epsilon_0.requires_grad_(False) @property def epsilon_t(self) -> torch.Tensor | None: return self._epsilon_t @epsilon_t.setter def epsilon_t(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert value.dtype == torch.float32 self._epsilon_t = value.detach().clone(memory_format=torch.contiguous_format) self._epsilon_t.requires_grad_(False) @property def weights(self) -> torch.Tensor | None: if self._weights_exists is False: return None else: return self._weights @weights.setter def weights(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 2 assert value.dtype == torch.float32 temp: torch.Tensor = value.detach().clone(memory_format=torch.contiguous_format) temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float32) if self._weights_exists is False: self._weights = torch.nn.parameter.Parameter( temp, requires_grad=True, ) self._weights_exists = True else: self._weights.data = temp @property def kernel_size(self) -> torch.Tensor | None: return self._kernel_size @kernel_size.setter def kernel_size(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert torch.numel(value) == 2 assert value.dtype == torch.int64 assert value[0] > 0 assert value[1] > 0 self._kernel_size = value.detach().clone(memory_format=torch.contiguous_format) self._kernel_size.requires_grad_(False) @property def stride(self) -> torch.Tensor | None: return self._stride @stride.setter def stride(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert torch.numel(value) == 2 assert value.dtype == torch.int64 assert value[0] > 0 assert value[1] > 0 self._stride = value.detach().clone(memory_format=torch.contiguous_format) self._stride.requires_grad_(False) @property def dilation(self) -> torch.Tensor | None: return self._dilation @dilation.setter def dilation(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert torch.numel(value) == 2 assert value.dtype == torch.int64 assert value[0] > 0 assert value[1] > 0 self._dilation = value.detach().clone(memory_format=torch.contiguous_format) self._dilation.requires_grad_(False) @property def padding(self) -> torch.Tensor | None: return self._padding @padding.setter def padding(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert torch.numel(value) == 2 assert value.dtype == torch.int64 assert value[0] >= 0 assert value[1] >= 0 self._padding = value.detach().clone(memory_format=torch.contiguous_format) self._padding.requires_grad_(False) @property def output_size(self) -> torch.Tensor | None: return self._output_size @output_size.setter def output_size(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert torch.numel(value) == 2 assert value.dtype == torch.int64 assert value[0] > 0 assert value[1] > 0 self._output_size = value.detach().clone(memory_format=torch.contiguous_format) self._output_size.requires_grad_(False) @property def number_of_spikes(self) -> torch.Tensor | None: return self._number_of_spikes @number_of_spikes.setter def number_of_spikes(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert torch.numel(value) == 1 assert value.dtype == torch.int64 assert value.item() > 0 self._number_of_spikes = value.detach().clone( memory_format=torch.contiguous_format ) self._number_of_spikes.requires_grad_(False) @property def number_of_cpu_processes(self) -> torch.Tensor | None: return self._number_of_cpu_processes @number_of_cpu_processes.setter def number_of_cpu_processes(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert torch.numel(value) == 1 assert value.dtype == torch.int64 assert value.item() > 0 self._number_of_cpu_processes = value.detach().clone( memory_format=torch.contiguous_format ) self._number_of_cpu_processes.requires_grad_(False) @property def number_of_neurons(self) -> torch.Tensor | None: return self._number_of_neurons @number_of_neurons.setter def number_of_neurons(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert torch.numel(value) == 1 assert value.dtype == torch.int64 assert value.item() > 0 self._number_of_neurons = value.detach().clone( memory_format=torch.contiguous_format ) self._number_of_neurons.requires_grad_(False) @property def number_of_input_neurons(self) -> torch.Tensor | None: return self._number_of_input_neurons @number_of_input_neurons.setter def number_of_input_neurons(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert torch.numel(value) == 1 assert value.dtype == torch.int64 assert value.item() > 0 self._number_of_input_neurons = value.detach().clone( memory_format=torch.contiguous_format ) self._number_of_input_neurons.requires_grad_(False) @property def h_initial(self) -> torch.Tensor | None: return self._h_initial @h_initial.setter def h_initial(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert value.dtype == torch.float32 self._h_initial = value.detach().clone(memory_format=torch.contiguous_format) self._h_initial.requires_grad_(False) @property def alpha_number_of_iterations(self) -> torch.Tensor | None: return self._alpha_number_of_iterations @alpha_number_of_iterations.setter def alpha_number_of_iterations(self, value: torch.Tensor): assert value is not None assert torch.is_tensor(value) is True assert torch.numel(value) == 1 assert value.dtype == torch.int64 assert value.item() >= 0 self._alpha_number_of_iterations = value.detach().clone( memory_format=torch.contiguous_format ) self._alpha_number_of_iterations.requires_grad_(False) #################################################################### # Forward # #################################################################### def forward(self, input: torch.Tensor) -> torch.Tensor: """PyTorch Forward method. Does the work.""" # Are we happy with the input? assert input is not None assert torch.is_tensor(input) is True assert input.dim() == 4 assert input.dtype == torch.float32 # Are we happy with the rest of the network? assert self._epsilon_xy_exists is True assert self._epsilon_xy is not None assert self._epsilon_0 is not None assert self._epsilon_t is not None assert self._weights_exists is True assert self._weights is not None assert self._kernel_size is not None assert self._stride is not None assert self._dilation is not None assert self._padding is not None assert self._output_size is not None assert self._number_of_spikes is not None assert self._number_of_cpu_processes is not None assert self._h_initial is not None assert self._alpha_number_of_iterations is not None # SbS forward functional return self.functional_sbs( input, self._epsilon_xy, self._epsilon_0, self._epsilon_t, self._weights, self._kernel_size, self._stride, self._dilation, self._padding, self._output_size, self._number_of_spikes, self._number_of_cpu_processes, self._h_initial, self._alpha_number_of_iterations, ) #################################################################### # Helper functions # #################################################################### def calculate_output_size(self, value: torch.Tensor) -> None: coordinates_0, coordinates_1 = self._get_coordinates(value) self._output_size: torch.Tensor = torch.tensor( [ coordinates_0.shape[1], coordinates_1.shape[1], ], dtype=torch.int64, ) self._output_size.requires_grad_(False) def _get_coordinates( self, value: torch.Tensor ) -> tuple[torch.Tensor, torch.Tensor]: """Function converts parameter in coordinates for the convolution window""" assert value is not None assert torch.is_tensor(value) is True assert value.dim() == 1 assert torch.numel(value) == 2 assert value.dtype == torch.int64 assert value[0] > 0 assert value[1] > 0 assert self._kernel_size is not None assert self._stride is not None assert self._dilation is not None assert self._padding is not None assert torch.numel(self._kernel_size) == 2 assert torch.numel(self._stride) == 2 assert torch.numel(self._dilation) == 2 assert torch.numel(self._padding) == 2 unfold_0: torch.nn.Unfold = torch.nn.Unfold( kernel_size=(int(self._kernel_size[0]), 1), dilation=int(self._dilation[0]), padding=int(self._padding[0]), stride=int(self._stride[0]), ) unfold_1: torch.nn.Unfold = torch.nn.Unfold( kernel_size=(1, int(self._kernel_size[1])), dilation=int(self._dilation[1]), padding=int(self._padding[1]), stride=int(self._stride[1]), ) coordinates_0: torch.Tensor = ( unfold_0( torch.unsqueeze( torch.unsqueeze( torch.unsqueeze( torch.arange(0, int(value[0]), dtype=torch.float32), 1, ), 0, ), 0, ) ) .squeeze(0) .type(torch.int64) ) coordinates_1: torch.Tensor = ( unfold_1( torch.unsqueeze( torch.unsqueeze( torch.unsqueeze( torch.arange(0, int(value[1]), dtype=torch.float32), 0, ), 0, ), 0, ) ) .squeeze(0) .type(torch.int64) ) return coordinates_0, coordinates_1 def _initial_random_weights(self, noise_amplitude: torch.Tensor) -> torch.Tensor: """Creates initial weights Uniform plus random noise plus normalization """ assert torch.numel(noise_amplitude) == 1 assert noise_amplitude.item() >= 0 assert noise_amplitude.dtype == torch.float32 assert self._number_of_neurons is not None assert self._number_of_input_neurons is not None assert self._kernel_size is not None weights = torch.empty( ( int(self._kernel_size[0]), int(self._kernel_size[1]), int(self._number_of_input_neurons), int(self._number_of_neurons), ), dtype=torch.float32, ) torch.nn.init.uniform_(weights, a=1.0, b=(1.0 + noise_amplitude.item())) return weights def _make_pooling_weights(self) -> torch.Tensor: """For generating the pooling weights.""" assert self._number_of_neurons is not None assert self._kernel_size is not None norm: float = 1.0 / (self._kernel_size[0] * self._kernel_size[1]) weights: torch.Tensor = torch.zeros( ( int(self._kernel_size[0]), int(self._kernel_size[1]), int(self._number_of_neurons), int(self._number_of_neurons), ), dtype=torch.float32, ) for i in range(0, int(self._number_of_neurons)): weights[:, :, i, i] = norm return weights def initialize_weights( self, is_pooling_layer: bool = False, noise_amplitude: float = 0.01, ) -> None: """For the generation of the initital weights. Switches between normal initial random weights and pooling weights.""" assert self._kernel_size is not None if is_pooling_layer is True: weights = self._make_pooling_weights() else: weights = self._initial_random_weights( torch.tensor(noise_amplitude, dtype=torch.float32) ) weights = weights.moveaxis(-1, 0).moveaxis(-1, 1) weights_t = torch.nn.functional.unfold( input=weights, kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])), dilation=(1, 1), padding=(0, 0), stride=(1, 1), ).squeeze() weights_t = torch.moveaxis(weights_t, 0, 1) self.weights = weights_t def initialize_epsilon_xy( self, eps_xy_intitial: float, ) -> None: """Creates initial epsilon xy matrices""" assert self._output_size is not None assert self._kernel_size is not None assert eps_xy_intitial > 0 eps_xy_temp: torch.Tensor = torch.full( ( int(self._output_size[0]), int(self._output_size[1]), int(self._kernel_size[0]), int(self._kernel_size[1]), ), eps_xy_intitial, dtype=torch.float32, ) self.epsilon_xy = eps_xy_temp def set_h_init_to_uniform(self) -> None: assert self._number_of_neurons is not None h_initial: torch.Tensor = torch.full( (int(self._number_of_neurons.item()),), (1.0 / float(self._number_of_neurons.item())), dtype=torch.float32, ) self.h_initial = h_initial # Epsilon XY def backup_epsilon_xy(self) -> None: assert self._epsilon_xy_exists is True self._epsilon_xy_backup = self._epsilon_xy.data.clone() def restore_epsilon_xy(self) -> None: assert self._epsilon_xy_backup is not None assert self._epsilon_xy_exists is True self._epsilon_xy.data = self._epsilon_xy_backup.clone() def mean_epsilon_xy(self) -> None: assert self._epsilon_xy_exists is True fill_value: float = float(self._epsilon_xy.data.mean()) self._epsilon_xy.data = torch.full_like( self._epsilon_xy.data, fill_value, dtype=torch.float32 ) def threshold_epsilon_xy(self, threshold: float) -> None: assert self._epsilon_xy_exists is True assert threshold >= 0 torch.clamp( self._epsilon_xy.data, min=float(threshold), max=None, out=self._epsilon_xy.data, ) # Weights def backup_weights(self) -> None: assert self._weights_exists is True self._weights_backup = self._weights.data.clone() def restore_weights(self) -> None: assert self._weights_backup is not None assert self._weights_exists is True self._weights.data = self._weights_backup.clone() def norm_weights(self) -> None: assert self._weights_exists is True temp: torch.Tensor = ( self._weights.data.detach() .clone(memory_format=torch.contiguous_format) .type(dtype=torch.float32) ) temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float32) self._weights.data = temp def threshold_weights(self, threshold: float) -> None: assert self._weights_exists is True assert threshold >= 0 torch.clamp( self._weights.data, min=float(threshold), max=None, out=self._weights.data, ) class FunctionalSbS(torch.autograd.Function): @staticmethod def forward( # type: ignore ctx, input: torch.Tensor, epsilon_xy: torch.Tensor, epsilon_0: torch.Tensor, epsilon_t: torch.Tensor, weights: torch.Tensor, kernel_size: torch.Tensor, stride: torch.Tensor, dilation: torch.Tensor, padding: torch.Tensor, output_size: torch.Tensor, number_of_spikes: torch.Tensor, number_of_cpu_processes: torch.Tensor, h_initial: torch.Tensor, alpha_number_of_iterations: torch.Tensor, ) -> torch.Tensor: torch.set_default_dtype(torch.float32) assert input.dim() == 4 assert torch.numel(kernel_size) == 2 assert torch.numel(dilation) == 2 assert torch.numel(padding) == 2 assert torch.numel(stride) == 2 assert torch.numel(output_size) == 2 assert torch.numel(epsilon_0) == 1 assert torch.numel(number_of_spikes) == 1 assert torch.numel(number_of_cpu_processes) == 1 assert torch.numel(alpha_number_of_iterations) == 1 input_size = torch.tensor([input.shape[2], input.shape[3]]) ############################################################ # Pre convolving the input # ############################################################ input_convolved_temp = torch.nn.functional.unfold( input, kernel_size=tuple(kernel_size.tolist()), dilation=tuple(dilation.tolist()), padding=tuple(padding.tolist()), stride=tuple(stride.tolist()), ) input_convolved = torch.nn.functional.fold( input_convolved_temp, output_size=tuple(output_size.tolist()), kernel_size=(1, 1), dilation=(1, 1), padding=(0, 0), stride=(1, 1), ).requires_grad_(True) epsilon_xy_convolved: torch.Tensor = ( ( torch.nn.functional.unfold( epsilon_xy.reshape( ( int(epsilon_xy.shape[0]) * int(epsilon_xy.shape[1]), int(epsilon_xy.shape[2]), int(epsilon_xy.shape[3]), ) ) .unsqueeze(1) .tile((1, input.shape[1], 1, 1)), kernel_size=tuple(kernel_size.tolist()), dilation=1, padding=0, stride=1, ) .squeeze(-1) .reshape( ( int(epsilon_xy.shape[0]), int(epsilon_xy.shape[1]), int(input_convolved.shape[1]), ) ) ) .moveaxis(-1, 0) .contiguous(memory_format=torch.contiguous_format) ) ############################################################ # Spike generation # ############################################################ if cpp_spike is False: # Alternative to the C++ module but 5x slower: spikes = ( ( input_convolved.movedim(source=(2, 3), destination=(0, 1)) .reshape( shape=( input_convolved.shape[2] * input_convolved.shape[3] * input_convolved.shape[0], input_convolved.shape[1], ) ) .multinomial( num_samples=int(number_of_spikes.item()), replacement=True ) ) .reshape( shape=( input_convolved.shape[2], input_convolved.shape[3], input_convolved.shape[0], int(number_of_spikes.item()), ) ) .movedim(source=(0, 1), destination=(2, 3)) ).contiguous(memory_format=torch.contiguous_format) else: # Normalized cumsum input_cumsum: torch.Tensor = torch.cumsum( input_convolved, dim=1, dtype=torch.float32 ) input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1) input_cumsum /= input_cumsum_last random_values = torch.rand( size=[ input_cumsum.shape[0], int(number_of_spikes.item()), input_cumsum.shape[2], input_cumsum.shape[3], ], dtype=torch.float32, ) spikes = torch.empty_like(random_values, dtype=torch.int64) # Prepare for Export (Pointer and stuff)-> np_input: np.ndarray = input_cumsum.detach().numpy() assert input_cumsum.dtype == torch.float32 assert np_input.flags["C_CONTIGUOUS"] is True assert np_input.ndim == 4 np_random_values: np.ndarray = random_values.detach().numpy() assert random_values.dtype == torch.float32 assert np_random_values.flags["C_CONTIGUOUS"] is True assert np_random_values.ndim == 4 np_spikes: np.ndarray = spikes.detach().numpy() assert spikes.dtype == torch.int64 assert np_spikes.flags["C_CONTIGUOUS"] is True assert np_spikes.ndim == 4 # <- Prepare for Export spike_generation: PySpikeGeneration2DManyIP.SpikeGeneration2DManyIP = ( PySpikeGeneration2DManyIP.SpikeGeneration2DManyIP() ) spike_generation.spike_generation_multi_pattern( np_input.__array_interface__["data"][0], int(np_input.shape[0]), int(np_input.shape[1]), int(np_input.shape[2]), int(np_input.shape[3]), np_random_values.__array_interface__["data"][0], int(np_random_values.shape[0]), int(np_random_values.shape[1]), int(np_random_values.shape[2]), int(np_random_values.shape[3]), np_spikes.__array_interface__["data"][0], int(np_spikes.shape[0]), int(np_spikes.shape[1]), int(np_spikes.shape[2]), int(np_spikes.shape[3]), int(number_of_cpu_processes.item()), ) ############################################################ # H dynamic # ############################################################ assert epsilon_t.ndim == 1 assert epsilon_t.shape[0] >= number_of_spikes if cpp_sbs is False: h = torch.tile( h_initial.unsqueeze(0).unsqueeze(0).unsqueeze(0), dims=[int(input.shape[0]), int(output_size[0]), int(output_size[1]), 1], ) epsilon_scale: torch.Tensor = torch.ones( size=[ int(spikes.shape[0]), int(spikes.shape[2]), int(spikes.shape[3]), 1, ], dtype=torch.float32, ) for t in range(0, spikes.shape[1]): if epsilon_scale.max() > 1e10: h /= epsilon_scale epsilon_scale = torch.ones_like(epsilon_scale) h_temp: torch.Tensor = weights[spikes[:, t, :, :], :] * h wx = 0 wy = 0 if t == 0: epsilon_temp: torch.Tensor = torch.empty( ( int(spikes.shape[0]), int(spikes.shape[2]), int(spikes.shape[3]), ), dtype=torch.float32, ) for wx in range(0, int(spikes.shape[2])): for wy in range(0, int(spikes.shape[3])): epsilon_temp[:, wx, wy] = epsilon_xy_convolved[ spikes[:, t, wx, wy], wx, wy ] epsilon_subsegment: torch.Tensor = ( epsilon_temp.unsqueeze(-1) * epsilon_t[t] * epsilon_0 ) h_temp_sum: torch.Tensor = ( epsilon_scale * epsilon_subsegment / h_temp.sum(dim=3, keepdim=True) ) torch.nan_to_num( h_temp_sum, out=h_temp_sum, nan=0.0, posinf=0.0, neginf=0.0 ) h_temp *= h_temp_sum h += h_temp epsilon_scale *= 1.0 + epsilon_subsegment h /= epsilon_scale output = h.movedim(3, 1) else: epsilon_t_0: torch.Tensor = epsilon_t * epsilon_0 h_shape: tuple[int, int, int, int] = ( int(input.shape[0]), int(weights.shape[1]), int(output_size[0]), int(output_size[1]), ) output = torch.empty(h_shape, dtype=torch.float32) # Prepare the export to C++ -> np_h: np.ndarray = output.detach().numpy() assert output.dtype == torch.float32 assert np_h.flags["C_CONTIGUOUS"] is True assert np_h.ndim == 4 np_epsilon_xy: np.ndarray = epsilon_xy_convolved.detach().numpy() assert epsilon_xy.dtype == torch.float32 assert np_epsilon_xy.flags["C_CONTIGUOUS"] is True assert np_epsilon_xy.ndim == 3 np_epsilon_t: np.ndarray = epsilon_t_0.detach().numpy() assert epsilon_t_0.dtype == torch.float32 assert np_epsilon_t.flags["C_CONTIGUOUS"] is True assert np_epsilon_t.ndim == 1 np_weights: np.ndarray = weights.detach().numpy() assert weights.dtype == torch.float32 assert np_weights.flags["C_CONTIGUOUS"] is True assert np_weights.ndim == 2 np_spikes = spikes.contiguous().detach().numpy() assert spikes.dtype == torch.int64 assert np_spikes.flags["C_CONTIGUOUS"] is True assert np_spikes.ndim == 4 np_h_initial = h_initial.contiguous().detach().numpy() assert h_initial.dtype == torch.float32 assert np_h_initial.flags["C_CONTIGUOUS"] is True assert np_h_initial.ndim == 1 # <- Prepare the export to C++ h_dynamic: PyHDynamicCNNManyIP.HDynamicCNNManyIP = ( PyHDynamicCNNManyIP.HDynamicCNNManyIP() ) h_dynamic.update_with_init_vector_multi_pattern( np_h.__array_interface__["data"][0], int(np_h.shape[0]), int(np_h.shape[1]), int(np_h.shape[2]), int(np_h.shape[3]), np_epsilon_xy.__array_interface__["data"][0], int(np_epsilon_xy.shape[0]), int(np_epsilon_xy.shape[1]), int(np_epsilon_xy.shape[2]), np_epsilon_t.__array_interface__["data"][0], int(np_epsilon_t.shape[0]), np_weights.__array_interface__["data"][0], int(np_weights.shape[0]), int(np_weights.shape[1]), np_spikes.__array_interface__["data"][0], int(np_spikes.shape[0]), int(np_spikes.shape[1]), int(np_spikes.shape[2]), int(np_spikes.shape[3]), np_h_initial.__array_interface__["data"][0], int(np_h_initial.shape[0]), int(number_of_cpu_processes.item()), ) ############################################################ # Alpha # ############################################################ alpha_number_of_iterations_int: int = int(alpha_number_of_iterations.item()) if alpha_number_of_iterations_int > 0: # Initialization virtual_reconstruction_weight: torch.Tensor = torch.einsum( "bixy,ji->bjxy", output, weights ) alpha_fill_value: float = 1.0 / ( virtual_reconstruction_weight.shape[2] * virtual_reconstruction_weight.shape[3] ) alpha_dynamic: torch.Tensor = torch.full( ( int(virtual_reconstruction_weight.shape[0]), 1, int(virtual_reconstruction_weight.shape[2]), int(virtual_reconstruction_weight.shape[3]), ), alpha_fill_value, dtype=torch.float32, ) # Iterations for _ in range(0, alpha_number_of_iterations_int): alpha_temp: torch.Tensor = alpha_dynamic * virtual_reconstruction_weight alpha_temp /= alpha_temp.sum(dim=3, keepdim=True).sum( dim=2, keepdim=True ) torch.nan_to_num( alpha_temp, out=alpha_temp, nan=0.0, posinf=0.0, neginf=0.0 ) alpha_temp = torch.nn.functional.unfold( alpha_temp, kernel_size=(1, 1), dilation=1, padding=0, stride=1, ) alpha_temp = torch.nn.functional.fold( alpha_temp, output_size=tuple(input_size.tolist()), kernel_size=tuple(kernel_size.tolist()), dilation=tuple(dilation.tolist()), padding=tuple(padding.tolist()), stride=tuple(stride.tolist()), ) alpha_temp = (alpha_temp * input).sum(dim=1, keepdim=True) alpha_temp = torch.nn.functional.unfold( alpha_temp, kernel_size=tuple(kernel_size.tolist()), dilation=tuple(dilation.tolist()), padding=tuple(padding.tolist()), stride=tuple(stride.tolist()), ) alpha_temp = torch.nn.functional.fold( alpha_temp, output_size=tuple(output_size.tolist()), kernel_size=(1, 1), dilation=(1, 1), padding=(0, 0), stride=(1, 1), ) alpha_dynamic = alpha_temp.sum(dim=1, keepdim=True) alpha_dynamic += 1e-20 # Alpha normalization alpha_dynamic /= alpha_dynamic.sum(dim=3, keepdim=True).sum( dim=2, keepdim=True ) torch.nan_to_num( alpha_dynamic, out=alpha_dynamic, nan=0.0, posinf=0.0, neginf=0.0 ) # Applied to the output output *= alpha_dynamic ############################################################ # Save the necessary data for the backward pass # ############################################################ ctx.save_for_backward( input_convolved, epsilon_xy_convolved, epsilon_0, weights, output, kernel_size, stride, dilation, padding, input_size, ) return output @staticmethod def backward(ctx, grad_output): # Get the variables back ( input, epsilon_xy, epsilon_0, weights, output, kernel_size, stride, dilation, padding, input_size, ) = ctx.saved_tensors torch.set_default_dtype(torch.float32) input /= input.sum(dim=1, keepdim=True, dtype=torch.float32) # For debugging: # print( # f"S: O: {output.min().item():e} {output.max().item():e} I: {input.min().item():e} {input.max().item():e} G: {grad_output.min().item():e} {grad_output.max().item():e}" # ) epsilon_0_float: float = epsilon_0.item() temp_e: torch.Tensor = 1.0 / ((epsilon_xy * epsilon_0_float) + 1.0) eps_a: torch.Tensor = temp_e.clone() eps_a *= epsilon_xy * epsilon_0_float eps_b: torch.Tensor = temp_e**2 * epsilon_0_float backprop_r: torch.Tensor = weights.unsqueeze(0).unsqueeze(-1).unsqueeze( -1 ) * output.unsqueeze(1) backprop_bigr: torch.Tensor = backprop_r.sum(axis=2) temp: torch.Tensor = input / (backprop_bigr**2 + 1e-20) backprop_f: torch.Tensor = output.unsqueeze(1) * temp.unsqueeze(2) torch.nan_to_num( backprop_f, out=backprop_f, nan=1e30, posinf=1e30, neginf=-1e30 ) torch.clip(backprop_f, out=backprop_f, min=-1e30, max=1e30) tempz: torch.Tensor = 1.0 / (backprop_bigr + 1e-20) backprop_z: torch.Tensor = backprop_r * tempz.unsqueeze(2) torch.nan_to_num( backprop_z, out=backprop_z, nan=1e30, posinf=1e30, neginf=-1e30 ) torch.clip(backprop_z, out=backprop_z, min=-1e30, max=1e30) result_omega: torch.Tensor = backprop_bigr.unsqueeze(2) * grad_output.unsqueeze( 1 ) result_omega -= torch.einsum( "bijxy,bjxy->bixy", backprop_r, grad_output ).unsqueeze(2) result_omega *= backprop_f result_eps_xy: torch.Tensor = ( ( (backprop_z * input.unsqueeze(2) - output.unsqueeze(1)) * grad_output.unsqueeze(1) ) .sum(dim=2) .sum(dim=0) ) * eps_b result_phi: torch.Tensor = torch.einsum( "bijxy,bjxy->bixy", backprop_z, grad_output ) * eps_a.unsqueeze(0) grad_weights = result_omega.sum(0).sum(-1).sum(-1) torch.nan_to_num( grad_weights, out=grad_weights, nan=1e30, posinf=1e30, neginf=-1e30 ) torch.clip(grad_weights, out=grad_weights, min=-1e30, max=1e30) grad_input = torch.nn.functional.fold( torch.nn.functional.unfold( result_phi, kernel_size=(1, 1), dilation=1, padding=0, stride=1, ), output_size=input_size, kernel_size=kernel_size, dilation=dilation, padding=padding, stride=stride, ) torch.nan_to_num( grad_input, out=grad_input, nan=1e30, posinf=1e30, neginf=-1e30 ) torch.clip(grad_input, out=grad_input, min=-1e30, max=1e30) grad_eps_xy_temp = torch.nn.functional.fold( result_eps_xy.moveaxis(0, -1) .reshape( ( int(result_eps_xy.shape[1]) * int(result_eps_xy.shape[2]), int(result_eps_xy.shape[0]), ) ) .unsqueeze(-1), output_size=kernel_size, kernel_size=kernel_size, ) grad_eps_xy = ( grad_eps_xy_temp.sum(dim=1) .reshape( ( int(result_eps_xy.shape[1]), int(result_eps_xy.shape[2]), int(grad_eps_xy_temp.shape[-2]), int(grad_eps_xy_temp.shape[-1]), ) ) .contiguous(memory_format=torch.contiguous_format) ) torch.nan_to_num( grad_eps_xy, out=grad_eps_xy, nan=1e30, posinf=1e30, neginf=-1e30 ) torch.clip(grad_eps_xy, out=grad_eps_xy, min=-1e30, max=1e30) grad_epsilon_0 = None grad_epsilon_t = None grad_kernel_size = None grad_stride = None grad_dilation = None grad_padding = None grad_output_size = None grad_number_of_spikes = None grad_number_of_cpu_processes = None grad_h_initial = None grad_alpha_number_of_iterations = None return ( grad_input, grad_eps_xy, grad_epsilon_0, grad_epsilon_t, grad_weights, grad_kernel_size, grad_stride, grad_dilation, grad_padding, grad_output_size, grad_number_of_spikes, grad_number_of_cpu_processes, grad_h_initial, grad_alpha_number_of_iterations, ) # %%