From 654014b31953b22a12344641647f59f0fd311f2d Mon Sep 17 00:00:00 2001
From: David Rotermund <54365609+davrot@users.noreply.github.com>
Date: Wed, 4 May 2022 14:42:44 +0200
Subject: [PATCH] backprop is single precision now

---
 Parameter.py |  14 +++++---
 SbS.py       | 100 ++++++++++++++++++++++++---------------------------
 learn_it.py  |  48 ++++++++++++++-----------
 test_all.sh  |   6 ++++
 test_it.py   |  12 ++++---
 5 files changed, 98 insertions(+), 82 deletions(-)
 create mode 100644 test_all.sh

diff --git a/Parameter.py b/Parameter.py
index e8d63ea..61a9346 100644
--- a/Parameter.py
+++ b/Parameter.py
@@ -71,6 +71,7 @@ class LearningParameters:
     learning_rate_threshold_eps_xy: float = field(default=0.00001)
 
     lr_schedule_name: str = field(default="ReduceLROnPlateau")
+    lr_scheduler_use_performance: bool = field(default=True)
     lr_scheduler_factor_w: float = field(default=0.75)
     lr_scheduler_patience_w: int = field(default=-1)
 
@@ -133,11 +134,12 @@ class Config:
     number_of_cpu_processes: int = field(default=-1)
 
     number_of_spikes: int = field(default=0)
-    cooldown_after_number_of_spikes: int = field(default=0)
+    cooldown_after_number_of_spikes: int = field(default=-1)
 
     weight_path: str = field(default="./Weights/")
     eps_xy_path: str = field(default="./EpsXY/")
     data_path: str = field(default="./")
+    results_path: str = field(default="./Results")
 
     reduction_cooldown: float = field(default=25.0)
     epsilon_0: float = field(default=1.0)
@@ -159,6 +161,7 @@ class Config:
         os.makedirs(self.weight_path, exist_ok=True)
         os.makedirs(self.eps_xy_path, exist_ok=True)
         os.makedirs(self.data_path, exist_ok=True)
+        os.makedirs(self.results_path, exist_ok=True)
 
         self.batch_size = (
             self.batch_size // self.number_of_cpu_processes
@@ -170,9 +173,12 @@ class Config:
     def get_epsilon_t(self):
         """Generates the time series of the basic epsilon."""
         np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32)
-        np_epsilon_t[
-            self.cooldown_after_number_of_spikes : self.number_of_spikes
-        ] /= self.reduction_cooldown
+        if (self.cooldown_after_number_of_spikes < self.number_of_spikes) and (
+            self.cooldown_after_number_of_spikes >= 0
+        ):
+            np_epsilon_t[
+                self.cooldown_after_number_of_spikes : self.number_of_spikes
+            ] /= self.reduction_cooldown
         return torch.tensor(np_epsilon_t)
 
     def get_update_after_x_pattern(self):
diff --git a/SbS.py b/SbS.py
index 70c5a1f..df50b28 100644
--- a/SbS.py
+++ b/SbS.py
@@ -122,7 +122,7 @@ class SbS(torch.nn.Module):
 
         self.initialize_epsilon_xy(epsilon_xy_intitial)
 
-        self.epsilon_0 = torch.tensor(epsilon_0, dtype=torch.float64)
+        self.epsilon_0 = torch.tensor(epsilon_0, dtype=torch.float32)
 
         self.number_of_cpu_processes = torch.tensor(
             number_of_cpu_processes, dtype=torch.int64
@@ -130,7 +130,7 @@ class SbS(torch.nn.Module):
 
         self.number_of_spikes = torch.tensor(number_of_spikes, dtype=torch.int64)
 
-        self.epsilon_t = epsilon_t.type(dtype=torch.float64)
+        self.epsilon_t = epsilon_t.type(dtype=torch.float32)
 
         self.initialize_weights(
             is_pooling_layer=is_pooling_layer,
@@ -155,7 +155,7 @@ class SbS(torch.nn.Module):
         assert value is not None
         assert torch.is_tensor(value) is True
         assert value.dim() == 4
-        assert value.dtype == torch.float64
+        assert value.dtype == torch.float32
         if self._epsilon_xy_exists is False:
             self._epsilon_xy = torch.nn.parameter.Parameter(
                 value.detach().clone(memory_format=torch.contiguous_format),
@@ -176,7 +176,7 @@ class SbS(torch.nn.Module):
         assert value is not None
         assert torch.is_tensor(value) is True
         assert torch.numel(value) == 1
-        assert value.dtype == torch.float64
+        assert value.dtype == torch.float32
         assert value.item() > 0
         self._epsilon_0 = value.detach().clone(memory_format=torch.contiguous_format)
         self._epsilon_0.requires_grad_(False)
@@ -190,7 +190,7 @@ class SbS(torch.nn.Module):
         assert value is not None
         assert torch.is_tensor(value) is True
         assert value.dim() == 1
-        assert value.dtype == torch.float64
+        assert value.dtype == torch.float32
         self._epsilon_t = value.detach().clone(memory_format=torch.contiguous_format)
         self._epsilon_t.requires_grad_(False)
 
@@ -206,9 +206,9 @@ class SbS(torch.nn.Module):
         assert value is not None
         assert torch.is_tensor(value) is True
         assert value.dim() == 2
-        assert value.dtype == torch.float64
+        assert value.dtype == torch.float32
         temp: torch.Tensor = value.detach().clone(memory_format=torch.contiguous_format)
-        temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float64)
+        temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float32)
         if self._weights_exists is False:
             self._weights = torch.nn.parameter.Parameter(
                 temp,
@@ -402,7 +402,7 @@ class SbS(torch.nn.Module):
         assert input is not None
         assert torch.is_tensor(input) is True
         assert input.dim() == 4
-        assert input.dtype == torch.float64
+        assert input.dtype == torch.float32
 
         # Are we happy with the rest of the network?
         assert self._epsilon_xy_exists is True
@@ -499,7 +499,7 @@ class SbS(torch.nn.Module):
                 torch.unsqueeze(
                     torch.unsqueeze(
                         torch.unsqueeze(
-                            torch.arange(0, int(value[0]), dtype=torch.float64),
+                            torch.arange(0, int(value[0]), dtype=torch.float32),
                             1,
                         ),
                         0,
@@ -516,7 +516,7 @@ class SbS(torch.nn.Module):
                 torch.unsqueeze(
                     torch.unsqueeze(
                         torch.unsqueeze(
-                            torch.arange(0, int(value[1]), dtype=torch.float64),
+                            torch.arange(0, int(value[1]), dtype=torch.float32),
                             0,
                         ),
                         0,
@@ -537,7 +537,7 @@ class SbS(torch.nn.Module):
 
         assert torch.numel(noise_amplitude) == 1
         assert noise_amplitude.item() >= 0
-        assert noise_amplitude.dtype == torch.float64
+        assert noise_amplitude.dtype == torch.float32
 
         assert self._number_of_neurons is not None
         assert self._number_of_input_neurons is not None
@@ -550,7 +550,7 @@ class SbS(torch.nn.Module):
                 int(self._number_of_input_neurons),
                 int(self._number_of_neurons),
             ),
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
         torch.nn.init.uniform_(weights, a=1.0, b=(1.0 + noise_amplitude.item()))
 
@@ -571,7 +571,7 @@ class SbS(torch.nn.Module):
                 int(self._number_of_neurons),
                 int(self._number_of_neurons),
             ),
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
 
         for i in range(0, int(self._number_of_neurons)):
@@ -593,7 +593,7 @@ class SbS(torch.nn.Module):
             weights = self._make_pooling_weights()
         else:
             weights = self._initial_random_weights(
-                torch.tensor(noise_amplitude, dtype=torch.float64)
+                torch.tensor(noise_amplitude, dtype=torch.float32)
             )
 
         weights = weights.moveaxis(-1, 0).moveaxis(-1, 1)
@@ -628,7 +628,7 @@ class SbS(torch.nn.Module):
                 int(self._kernel_size[1]),
             ),
             eps_xy_intitial,
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
 
         self.epsilon_xy = eps_xy_temp
@@ -660,7 +660,7 @@ class SbS(torch.nn.Module):
 
         fill_value: float = float(self._epsilon_xy.data.mean())
         self._epsilon_xy.data = torch.full_like(
-            self._epsilon_xy.data, fill_value, dtype=torch.float64
+            self._epsilon_xy.data, fill_value, dtype=torch.float32
         )
 
     def threshold_epsilon_xy(self, threshold: float) -> None:
@@ -688,9 +688,9 @@ class SbS(torch.nn.Module):
         temp: torch.Tensor = (
             self._weights.data.detach()
             .clone(memory_format=torch.contiguous_format)
-            .type(dtype=torch.float64)
+            .type(dtype=torch.float32)
         )
-        temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float64)
+        temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float32)
         self._weights.data = temp
 
     def threshold_weights(self, threshold: float) -> None:
@@ -708,11 +708,11 @@ class FunctionalSbS(torch.autograd.Function):
     @staticmethod
     def forward(  # type: ignore
         ctx,
-        input_float64: torch.Tensor,
-        epsilon_xy_float64: torch.Tensor,
-        epsilon_0_float64: torch.Tensor,
-        epsilon_t_float64: torch.Tensor,
-        weights_float64: torch.Tensor,
+        input: torch.Tensor,
+        epsilon_xy: torch.Tensor,
+        epsilon_0: torch.Tensor,
+        epsilon_t: torch.Tensor,
+        weights: torch.Tensor,
         kernel_size: torch.Tensor,
         stride: torch.Tensor,
         dilation: torch.Tensor,
@@ -724,11 +724,7 @@ class FunctionalSbS(torch.autograd.Function):
         alpha_number_of_iterations: torch.Tensor,
     ) -> torch.Tensor:
 
-        input = input_float64.type(dtype=torch.float32)
-        epsilon_xy = epsilon_xy_float64.type(dtype=torch.float32)
-        weights = weights_float64.type(dtype=torch.float32)
-        epsilon_0 = epsilon_0_float64.type(dtype=torch.float32)
-        epsilon_t = epsilon_t_float64.type(dtype=torch.float32)
+        torch.set_default_dtype(torch.float32)
 
         assert input.dim() == 4
         assert torch.numel(kernel_size) == 2
@@ -1097,7 +1093,7 @@ class FunctionalSbS(torch.autograd.Function):
                 )
                 alpha_dynamic = alpha_temp.sum(dim=1, keepdim=True)
 
-                alpha_dynamic += torch.finfo(torch.float32).eps * 1000
+                alpha_dynamic += 1e-20
 
                 # Alpha normalization
                 alpha_dynamic /= alpha_dynamic.sum(dim=3, keepdim=True).sum(
@@ -1114,13 +1110,11 @@ class FunctionalSbS(torch.autograd.Function):
         # Save the necessary data for the backward pass            #
         ############################################################
 
-        output = output.type(dtype=torch.float64)
-
         ctx.save_for_backward(
             input_convolved,
             epsilon_xy_convolved,
-            epsilon_0_float64,
-            weights_float64,
+            epsilon_0,
+            weights,
             output,
             kernel_size,
             stride,
@@ -1136,8 +1130,8 @@ class FunctionalSbS(torch.autograd.Function):
 
         # Get the variables back
         (
-            input_float32,
-            epsilon_xy_float32,
+            input,
+            epsilon_xy,
             epsilon_0,
             weights,
             output,
@@ -1148,14 +1142,14 @@ class FunctionalSbS(torch.autograd.Function):
             input_size,
         ) = ctx.saved_tensors
 
-        input = input_float32.type(dtype=torch.float64)
-        input /= input.sum(dim=1, keepdim=True, dtype=torch.float64)
-        epsilon_xy = epsilon_xy_float32.type(dtype=torch.float64)
+        torch.set_default_dtype(torch.float32)
+
+        input /= input.sum(dim=1, keepdim=True, dtype=torch.float32)
 
         # For debugging:
-        # print(
-        #     f"S: O: {output.min().item():e} {output.max().item():e} I: {input.min().item():e} {input.max().item():e} G: {grad_output.min().item():e} {grad_output.max().item():e}"
-        # )
+#        print(
+#            f"S: O: {output.min().item():e} {output.max().item():e} I: {input.min().item():e} {input.max().item():e} G: {grad_output.min().item():e} {grad_output.max().item():e}"
+#        )
 
         epsilon_0_float: float = epsilon_0.item()
 
@@ -1172,21 +1166,21 @@ class FunctionalSbS(torch.autograd.Function):
 
         backprop_bigr: torch.Tensor = backprop_r.sum(axis=2)
 
-        temp: torch.Tensor = input / backprop_bigr**2
+        temp: torch.Tensor = input / (backprop_bigr**2 + 1e-20)
 
         backprop_f: torch.Tensor = output.unsqueeze(1) * temp.unsqueeze(2)
         torch.nan_to_num(
-            backprop_f, out=backprop_f, nan=1e300, posinf=1e300, neginf=-1e300
+            backprop_f, out=backprop_f, nan=1e30, posinf=1e30, neginf=-1e30
         )
-        torch.clip(backprop_f, out=backprop_f, min=-1e300, max=1e300)
+        torch.clip(backprop_f, out=backprop_f, min=-1e30, max=1e30)
 
-        tempz: torch.Tensor = 1.0 / backprop_bigr
+        tempz: torch.Tensor = 1.0 / (backprop_bigr + 1e-20)
 
         backprop_z: torch.Tensor = backprop_r * tempz.unsqueeze(2)
         torch.nan_to_num(
-            backprop_z, out=backprop_z, nan=1e300, posinf=1e300, neginf=-1e300
+            backprop_z, out=backprop_z, nan=1e30, posinf=1e30, neginf=-1e30
         )
-        torch.clip(backprop_z, out=backprop_z, min=-1e300, max=1e300)
+        torch.clip(backprop_z, out=backprop_z, min=-1e30, max=1e30)
 
         result_omega: torch.Tensor = backprop_bigr.unsqueeze(2) * grad_output.unsqueeze(
             1
@@ -1211,9 +1205,9 @@ class FunctionalSbS(torch.autograd.Function):
 
         grad_weights = result_omega.sum(0).sum(-1).sum(-1)
         torch.nan_to_num(
-            grad_weights, out=grad_weights, nan=1e300, posinf=1e300, neginf=-1e300
+            grad_weights, out=grad_weights, nan=1e30, posinf=1e30, neginf=-1e30
         )
-        torch.clip(grad_weights, out=grad_weights, min=-1e300, max=1e300)
+        torch.clip(grad_weights, out=grad_weights, min=-1e30, max=1e30)
 
         grad_input = torch.nn.functional.fold(
             torch.nn.functional.unfold(
@@ -1230,9 +1224,9 @@ class FunctionalSbS(torch.autograd.Function):
             stride=stride,
         )
         torch.nan_to_num(
-            grad_input, out=grad_input, nan=1e300, posinf=1e300, neginf=-1e300
+            grad_input, out=grad_input, nan=1e30, posinf=1e30, neginf=-1e30
         )
-        torch.clip(grad_input, out=grad_input, min=-1e300, max=1e300)
+        torch.clip(grad_input, out=grad_input, min=-1e30, max=1e30)
 
         grad_eps_xy_temp = torch.nn.functional.fold(
             result_eps_xy.moveaxis(0, -1)
@@ -1260,9 +1254,9 @@ class FunctionalSbS(torch.autograd.Function):
             .contiguous(memory_format=torch.contiguous_format)
         )
         torch.nan_to_num(
-            grad_eps_xy, out=grad_eps_xy, nan=1e300, posinf=1e300, neginf=-1e300
+            grad_eps_xy, out=grad_eps_xy, nan=1e30, posinf=1e30, neginf=-1e30
         )
-        torch.clip(grad_eps_xy, out=grad_eps_xy, min=-1e300, max=1e300)
+        torch.clip(grad_eps_xy, out=grad_eps_xy, min=-1e30, max=1e30)
 
         grad_epsilon_0 = None
         grad_epsilon_t = None
diff --git a/learn_it.py b/learn_it.py
index 5983b6c..a3cd228 100644
--- a/learn_it.py
+++ b/learn_it.py
@@ -56,6 +56,8 @@ from torch.utils.tensorboard import SummaryWriter
 
 tb = SummaryWriter()
 
+torch.set_default_dtype(torch.float32)
+
 #######################################################################
 # We want to log what is going on into a file and screen              #
 #######################################################################
@@ -191,7 +193,7 @@ for id in range(0, len(network)):
         if os.path.exists(filename) is True:
             network[id].weights = torch.tensor(
                 np.load(filename),
-                dtype=torch.float64,
+                dtype=torch.float32,
             )
             wf[id] = np.load(filename)
 
@@ -206,7 +208,7 @@ for id in range(0, len(network)):
         if os.path.exists(filename) is True:
             network[id].epsilon_xy = torch.tensor(
                 np.load(filename),
-                dtype=torch.float64,
+                dtype=torch.float32,
             )
             eps_xy[id] = np.load(filename)
 
@@ -225,7 +227,7 @@ for id in range(0, len(network)):
     if len(file_to_load) == 1:
         network[id].weights = torch.tensor(
             np.load(file_to_load[0]),
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
         wf[id] = np.load(file_to_load[0])
         logging.info(f"File used: {file_to_load[0]}")
@@ -243,7 +245,7 @@ for id in range(0, len(network)):
     if len(file_to_load) == 1:
         network[id].epsilon_xy = torch.tensor(
             np.load(file_to_load[0]),
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
         eps_xy[id] = np.load(file_to_load[0])
         logging.info(f"File used: {file_to_load[0]}")
@@ -346,7 +348,7 @@ with torch.no_grad():
                     h_collection = []
                     h_collection.append(
                         the_dataset_train.pattern_filter_train(h_x, cfg).type(
-                            dtype=torch.float64
+                            dtype=torch.float32
                         )
                     )
                     for id in range(0, len(network)):
@@ -365,21 +367,21 @@ with torch.no_grad():
                     target_one_hot = (
                         target_one_hot.unsqueeze(2)
                         .unsqueeze(2)
-                        .type(dtype=torch.float64)
+                        .type(dtype=torch.float32)
                     )
 
-                    # through the loss functions
-                    h_y1 = torch.log(h_collection[-1])
-                    h_y2 = torch.nan_to_num(h_y1, nan=0.0, posinf=0.0, neginf=0.0)
+                    h_y1 = torch.log(h_collection[-1] + 1e-20)
 
                     my_loss: torch.Tensor = (
                         (
                             torch.nn.functional.mse_loss(
-                                h_collection[-1], target_one_hot, reduction="none"
+                                h_collection[-1],
+                                target_one_hot,
+                                reduction="none",
                             )
                             * cfg.learning_parameters.loss_coeffs_mse
                             + torch.nn.functional.kl_div(
-                                h_y2, target_one_hot, reduction="none"
+                                h_y1, target_one_hot + 1e-20, reduction="none"
                             )
                             * cfg.learning_parameters.loss_coeffs_kldiv
                         )
@@ -392,6 +394,7 @@ with torch.no_grad():
                     time_1: float = time.perf_counter()
 
                     my_loss.backward()
+
                     my_loss_float = my_loss.item()
                     time_2: float = time.perf_counter()
 
@@ -447,7 +450,7 @@ with torch.no_grad():
                             network[id].norm_weights()
                         else:
                             network[id].weights = torch.tensor(
-                                wf[id], dtype=torch.float64
+                                wf[id], dtype=torch.float32
                             )
 
                         if cfg.network_structure.eps_xy_trainable[id] is True:
@@ -458,7 +461,7 @@ with torch.no_grad():
                                 network[id].mean_epsilon_xy()
                         else:
                             network[id].epsilon_xy = torch.tensor(
-                                eps_xy[id], dtype=torch.float64
+                                eps_xy[id], dtype=torch.float32
                             )
 
                         if cfg.network_structure.w_trainable[id] is True:
@@ -504,13 +507,18 @@ with torch.no_grad():
                     # Let the torch learning rate scheduler update the
                     # learning rates of the optimiers
                     if cfg.learning_parameters.lr_scheduler_patience_w > 0:
-                        lr_scheduler_wf.step(my_loss_for_batch)
-                    if cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0:
-                        lr_scheduler_eps.step(my_loss_for_batch)
+                        if cfg.learning_parameters.lr_scheduler_use_performance is True:
+                            lr_scheduler_wf.step(100.0 - performance)
+                        else:
+                            lr_scheduler_wf.step(my_loss_for_batch)
 
-                    tb.add_scalar(
-                        "Train Error", 100.0 - performance, cfg.learning_step
-                    )
+                    if cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0:
+                        if cfg.learning_parameters.lr_scheduler_use_performance is True:
+                            lr_scheduler_eps.step(100.0 - performance)
+                        else:
+                            lr_scheduler_eps.step(my_loss_for_batch)
+
+                    tb.add_scalar("Train Error", 100.0 - performance, cfg.learning_step)
                     tb.add_scalar("Train Loss", my_loss_for_batch, cfg.learning_step)
                     tb.add_scalar(
                         "Learning Rate Scale WF",
@@ -568,7 +576,7 @@ with torch.no_grad():
 
                             h_h: torch.Tensor = network(
                                 the_dataset_test.pattern_filter_test(h_x, cfg).type(
-                                    dtype=torch.float64
+                                    dtype=torch.float32
                                 )
                             )
 
diff --git a/test_all.sh b/test_all.sh
new file mode 100644
index 0000000..587fbfc
--- /dev/null
+++ b/test_all.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+for i in $(seq 1 1 999)
+do
+    echo $i
+    /home/davrot/P3.10/bin/python3 test_it.py mnist.json $i
+done
\ No newline at end of file
diff --git a/test_it.py b/test_it.py
index 0f939f5..e4c085b 100644
--- a/test_it.py
+++ b/test_it.py
@@ -182,7 +182,7 @@ for id in range(0, len(network)):
     if len(file_to_load) == 1:
         network[id].weights = torch.tensor(
             np.load(file_to_load[0]),
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
         wf[id] = np.load(file_to_load[0])
         logging.info(f"File used: {file_to_load[0]}")
@@ -200,7 +200,7 @@ for id in range(0, len(network)):
     if len(file_to_load) == 1:
         network[id].epsilon_xy = torch.tensor(
             np.load(file_to_load[0]),
-            dtype=torch.float64,
+            dtype=torch.float32,
         )
         eps_xy[id] = np.load(file_to_load[0])
         logging.info(f"File used: {file_to_load[0]}")
@@ -219,7 +219,7 @@ for id in range(0, len(network)):
         if os.path.exists(filename) is True:
             network[id].weights = torch.tensor(
                 np.load(filename),
-                dtype=torch.float64,
+                dtype=torch.float32,
             )
             wf[id] = np.load(filename)
 
@@ -234,7 +234,7 @@ for id in range(0, len(network)):
         if os.path.exists(filename) is True:
             network[id].epsilon_xy = torch.tensor(
                 np.load(filename),
-                dtype=torch.float64,
+                dtype=torch.float32,
             )
             eps_xy[id] = np.load(filename)
 
@@ -256,7 +256,7 @@ with torch.no_grad():
         time_0 = time.perf_counter()
 
         h_h: torch.Tensor = network(
-            the_dataset_test.pattern_filter_test(h_x, cfg).type(dtype=torch.float64)
+            the_dataset_test.pattern_filter_test(h_x, cfg).type(dtype=torch.float32)
         )
 
         test_correct += (h_h.argmax(dim=1).squeeze() == h_x_labels).sum().numpy()
@@ -271,6 +271,8 @@ with torch.no_grad():
                 f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec"
             )
         )
+        np_performance = np.array(performance)
+        np.save(f"{cfg.results_path}/{cfg.learning_step}.npy", np_performance)
 
 
 # %%