pytorch-sbs/learn_it.py

# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#

# %%
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import numpy as np
import sys
import torch
import time
import dataconf
import logging
from datetime import datetime
import glob

from Dataset import (
    DatasetMaster,
    DatasetCIFAR,
    DatasetMNIST,
    DatasetFashionMNIST,
)
from Parameter import Config
from SbS import SbS

from torch.utils.tensorboard import SummaryWriter

tb = SummaryWriter()

#######################################################################
# We want to log what is going on into a file and screen              #
#######################################################################

now = datetime.now()
dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S")
logging.basicConfig(
    filename="log_" + dt_string_filename + ".txt",
    filemode="w",
    level=logging.INFO,
    format="%(asctime)s %(message)s",
)
logging.getLogger().addHandler(logging.StreamHandler())

#######################################################################
# Load the config data from the json file                             #
#######################################################################

if len(sys.argv) < 2:
    raise Exception("Argument: Config file name is missing")

filename: str = sys.argv[1]

if os.path.exists(filename) is False:
    raise Exception(f"Config file not found! {filename}")

cfg = dataconf.file(filename, Config)
logging.info(f"Using configuration file: {filename}")


#######################################################################
# Prepare the test and training data                                  #
#######################################################################

# Load the input data
the_dataset_train: DatasetMaster
the_dataset_test: DatasetMaster
if cfg.data_mode == "CIFAR10":
    the_dataset_train = DatasetCIFAR(
        train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
    )
    the_dataset_test = DatasetCIFAR(
        train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
    )
elif cfg.data_mode == "MNIST":
    the_dataset_train = DatasetMNIST(
        train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
    )
    the_dataset_test = DatasetMNIST(
        train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
    )
elif cfg.data_mode == "MNIST_FASHION":
    the_dataset_train = DatasetFashionMNIST(
        train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
    )
    the_dataset_test = DatasetFashionMNIST(
        train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
    )
else:
    raise Exception("data_mode unknown")

cfg.image_statistics.mean = the_dataset_train.mean

# The basic size
cfg.image_statistics.the_size = [
    the_dataset_train.pattern_storage.shape[2],
    the_dataset_train.pattern_storage.shape[3],
]

# Minus the stuff we cut away in the pattern filter
cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel
cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel

my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
    the_dataset_test, batch_size=cfg.batch_size, shuffle=False
)
my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
    the_dataset_train, batch_size=cfg.batch_size, shuffle=True
)

logging.info("*** Data loaded.")

#######################################################################
# Build the network                                                   #
#######################################################################

wf: list[np.ndarray] = []
eps_xy: list[np.ndarray] = []
network = torch.nn.Sequential()
for id in range(0, len(cfg.network_structure.is_pooling_layer)):
    if id == 0:
        input_size: list[int] = cfg.image_statistics.the_size
    else:
        input_size = network[id - 1].output_size.tolist()

    network.append(
        SbS(
            number_of_input_neurons=cfg.network_structure.forward_neuron_numbers[id][0],
            number_of_neurons=cfg.network_structure.forward_neuron_numbers[id][1],
            input_size=input_size,
            forward_kernel_size=cfg.network_structure.forward_kernel_size[id],
            number_of_spikes=cfg.number_of_spikes,
            epsilon_t=cfg.get_epsilon_t(),
            epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial,
            epsilon_0=cfg.epsilon_0,
            weight_noise_amplitude=cfg.learning_parameters.weight_noise_amplitude,
            is_pooling_layer=cfg.network_structure.is_pooling_layer[id],
            strides=cfg.network_structure.strides[id],
            dilation=cfg.network_structure.dilation[id],
            padding=cfg.network_structure.padding[id],
            alpha_number_of_iterations=cfg.learning_parameters.alpha_number_of_iterations,
            number_of_cpu_processes=cfg.number_of_cpu_processes,
        )
    )

    eps_xy.append(network[id].epsilon_xy.detach().clone().numpy())
    wf.append(network[id].weights.detach().clone().numpy())

logging.info("*** Network generated.")

for id in range(0, len(network)):
    # Load previous weights and epsilon xy
    if cfg.learning_step > 0:
        network[id].weights = torch.tensor(
            np.load(
                cfg.weight_path
                + "/Weight_L"
                + str(id)
                + "_S"
                + str(cfg.learning_step)
                + ".npy"
            ),
            dtype=torch.float64,
        )

        wf[id] = np.load(
            cfg.weight_path
            + "/Weight_L"
            + str(id)
            + "_S"
            + str(cfg.learning_step)
            + ".npy"
        )

        network[id].epsilon_xy = torch.tensor(
            np.load(
                cfg.eps_xy_path
                + "/EpsXY_L"
                + str(id)
                + "_S"
                + str(cfg.learning_step)
                + ".npy"
            ),
            dtype=torch.float64,
        )

        eps_xy[id] = np.load(
            cfg.eps_xy_path
            + "/EpsXY_L"
            + str(id)
            + "_S"
            + str(cfg.learning_step)
            + ".npy"
        )

for id in range(0, len(network)):

    # Are there weights that overwrite the initial weights?
    file_to_load = glob.glob(
        cfg.learning_parameters.overload_path + "/Weight_L" + str(id) + "*.npy"
    )

    if len(file_to_load) > 1:
        raise Exception(
            f"Too many previous weights files {cfg.learning_parameters.overload_path}/Weight_L{id}*.npy"
        )

    if len(file_to_load) == 1:
        network[id].weights = torch.tensor(
            np.load(file_to_load[0]),
            dtype=torch.float64,
        )
        wf[id] = np.load(file_to_load[0])
        logging.info(f"File used: {file_to_load[0]}")

    # Are there epsinlon xy files that overwrite the initial epsilon xy?
    file_to_load = glob.glob(
        cfg.learning_parameters.overload_path + "/EpsXY_L" + str(id) + "*.npy"
    )

    if len(file_to_load) > 1:
        raise Exception(
            f"Too many previous epsilon xy files {cfg.learning_parameters.overload_path}/EpsXY_L{id}*.npy"
        )

    if len(file_to_load) == 1:
        network[id].epsilon_xy = torch.tensor(
            np.load(file_to_load[0]),
            dtype=torch.float64,
        )
        eps_xy[id] = np.load(file_to_load[0])
        logging.info(f"File used: {file_to_load[0]}")

#######################################################################
# Optimizer and LR Scheduler                                          #
#######################################################################

# I keep weights and epsilon xy seperate to
# set the initial learning rate independently
parameter_list_weights: list = []
parameter_list_epsilon_xy: list = []

for id in range(0, len(network)):
    parameter_list_weights.append(network[id]._weights)
    parameter_list_epsilon_xy.append(network[id]._epsilon_xy)

if cfg.learning_parameters.optimizer_name == "Adam":
    if cfg.learning_parameters.learning_rate_gamma_w > 0:
        optimizer_wf: torch.optim.Optimizer = torch.optim.Adam(
            parameter_list_weights,
            lr=cfg.learning_parameters.learning_rate_gamma_w,
        )
    else:
        optimizer_wf = torch.optim.Adam(
            parameter_list_weights,
        )

    if cfg.learning_parameters.learning_rate_gamma_eps_xy > 0:
        optimizer_eps: torch.optim.Optimizer = torch.optim.Adam(
            parameter_list_epsilon_xy,
            lr=cfg.learning_parameters.learning_rate_gamma_eps_xy,
        )
    else:
        optimizer_eps = torch.optim.Adam(
            parameter_list_epsilon_xy,
        )
else:
    raise Exception("Optimizer not implemented")

if cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau":
    if cfg.learning_parameters.lr_scheduler_patience_w > 0:
        lr_scheduler_wf = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer_wf,
            factor=cfg.learning_parameters.lr_scheduler_factor_w,
            patience=cfg.learning_parameters.lr_scheduler_patience_w,
        )

    if cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0:
        lr_scheduler_eps = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer_eps,
            factor=cfg.learning_parameters.lr_scheduler_factor_eps_xy,
            patience=cfg.learning_parameters.lr_scheduler_patience_eps_xy,
        )
else:
    raise Exception("lr_scheduler not implemented")

logging.info("*** Optimizer prepared.")


#######################################################################
# Some variable declarations                                          #
#######################################################################

test_correct: int = 0
test_all: int = 0
test_complete: int = the_dataset_test.__len__()

train_correct: int = 0
train_all: int = 0
train_complete: int = the_dataset_train.__len__()

train_number_of_processed_pattern: int = 0

train_loss: np.ndarray = np.zeros((1), dtype=np.float32)

last_test_performance: float = -1.0


logging.info("")

with torch.no_grad():
    if cfg.learning_parameters.learning_active is True:
        while True:

            ###############################################
            # Run a training data batch                   #
            ###############################################

            for h_x, h_x_labels in my_loader_train:
                time_0: float = time.perf_counter()

                if train_number_of_processed_pattern == 0:
                    # Reset the gradient of the torch optimizers
                    optimizer_wf.zero_grad()
                    optimizer_eps.zero_grad()

                with torch.enable_grad():

                    h_collection = []
                    h_collection.append(
                        the_dataset_train.pattern_filter_train(h_x, cfg).type(
                            dtype=torch.float64
                        )
                    )
                    for id in range(0, len(network)):
                        h_collection.append(network[id](h_collection[-1]))

                    # Convert label into one hot
                    target_one_hot: torch.Tensor = torch.zeros(
                        (
                            h_x_labels.shape[0],
                            int(cfg.network_structure.number_of_output_neurons),
                        )
                    )
                    target_one_hot.scatter_(
                        1, h_x_labels.unsqueeze(1), torch.ones((h_x_labels.shape[0], 1))
                    )
                    target_one_hot = (
                        target_one_hot.unsqueeze(2)
                        .unsqueeze(2)
                        .type(dtype=torch.float64)
                    )

                    # through the loss functions
                    h_y1 = torch.log(h_collection[-1])
                    h_y2 = torch.nan_to_num(h_y1, nan=0.0, posinf=0.0, neginf=0.0)

                    my_loss: torch.Tensor = (
                        (
                            torch.nn.functional.mse_loss(
                                h_collection[-1], target_one_hot, reduction="none"
                            )
                            * cfg.learning_parameters.loss_coeffs_mse
                            + torch.nn.functional.kl_div(
                                h_y2, target_one_hot, reduction="none"
                            )
                            * cfg.learning_parameters.loss_coeffs_kldiv
                        )
                        / (
                            cfg.learning_parameters.loss_coeffs_kldiv
                            + cfg.learning_parameters.loss_coeffs_mse
                        )
                    ).mean()

                    time_1: float = time.perf_counter()

                    my_loss.backward()
                    my_loss_float = my_loss.item()
                    time_2: float = time.perf_counter()

                train_correct += (
                    (h_collection[-1].argmax(dim=1).squeeze() == h_x_labels)
                    .sum()
                    .numpy()
                )
                train_all += h_collection[-1].shape[0]

                performance: float = 100.0 * train_correct / train_all

                time_measure_a: float = time_1 - time_0

                logging.info(
                    (
                        f"{cfg.learning_step:^6} Training \t{train_all^6} pattern "
                        f"with {performance/100.0:^6.2%} "
                        f"\t\tForward time: \t{time_measure_a:^6.2f}sec"
                    )
                )

                train_loss[0] += my_loss_float
                train_number_of_processed_pattern += h_collection[-1].shape[0]

                time_measure_b: float = time_2 - time_1

                logging.info(
                    (
                        f"\t\t\tLoss: {train_loss[0]/train_number_of_processed_pattern:^15.3e} "
                        f"\t\t\tBackward time: \t{time_measure_b:^6.2f}sec "
                    )
                )

                if (
                    train_number_of_processed_pattern
                    >= cfg.get_update_after_x_pattern()
                ):
                    logging.info("\t\t\t*** Updating the weights ***")
                    my_loss_for_batch: float = (
                        train_loss[0] / train_number_of_processed_pattern
                    )

                    optimizer_wf.step()
                    optimizer_eps.step()

                    for id in range(0, len(network)):
                        if cfg.network_structure.w_trainable[id] is True:
                            network[id].norm_weights()
                            network[id].threshold_weights(
                                cfg.learning_parameters.learning_rate_threshold_w
                            )
                            network[id].norm_weights()
                        else:
                            network[id].weights = torch.tensor(
                                wf[id], dtype=torch.float64
                            )

                        if cfg.network_structure.eps_xy_trainable[id] is True:
                            network[id].threshold_epsilon_xy(
                                cfg.learning_parameters.learning_rate_threshold_eps_xy
                            )
                            if cfg.network_structure.eps_xy_mean[id] is True:
                                network[id].mean_epsilon_xy()
                        else:
                            network[id].epsilon_xy = torch.tensor(
                                eps_xy[id], dtype=torch.float64
                            )

                        if cfg.network_structure.w_trainable[id] is True:
                            # Save the new values
                            np.save(
                                cfg.weight_path
                                + "/Weight_L"
                                + str(id)
                                + "_S"
                                + str(cfg.learning_step)
                                + ".npy",
                                network[id].weights.detach().numpy(),
                            )

                            try:
                                tb.add_histogram(
                                    "Weights " + str(id),
                                    network[id].weights,
                                    cfg.learning_step,
                                )
                            except ValueError:
                                pass

                        if cfg.network_structure.eps_xy_trainable[id] is True:
                            np.save(
                                cfg.eps_xy_path
                                + "/EpsXY_L"
                                + str(id)
                                + "_S"
                                + str(cfg.learning_step)
                                + ".npy",
                                network[id].epsilon_xy.detach().numpy(),
                            )
                            try:
                                tb.add_histogram(
                                    "Epsilon XY " + str(id),
                                    network[id].epsilon_xy.detach().numpy(),
                                    cfg.learning_step,
                                )
                            except ValueError:
                                pass

                    # Let the torch learning rate scheduler update the
                    # learning rates of the optimiers
                    if cfg.learning_parameters.lr_scheduler_patience_w > 0:
                        lr_scheduler_wf.step(my_loss_for_batch)
                    if cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0:
                        lr_scheduler_eps.step(my_loss_for_batch)

                    tb.add_scalar(
                        "Train Performance", 100.0 - performance, cfg.learning_step
                    )
                    tb.add_scalar("Train Loss", my_loss_for_batch, cfg.learning_step)
                    tb.add_scalar(
                        "Learning Rate Scale WF",
                        optimizer_wf.param_groups[-1]["lr"],
                        cfg.learning_step,
                    )
                    tb.add_scalar(
                        "Learning Rate Scale Eps XY ",
                        optimizer_eps.param_groups[-1]["lr"],
                        cfg.learning_step,
                    )

                    cfg.learning_step += 1
                    train_loss = np.zeros((1), dtype=np.float32)
                    train_correct = 0
                    train_all = 0
                    performance = 0
                    train_number_of_processed_pattern = 0

                    tb.flush()

                    test_correct = 0
                    test_all = 0

                    if last_test_performance < 0:
                        logging.info("")
                    else:
                        logging.info(
                            f"\t\t\tLast test performance: {last_test_performance/100.0:^6.2%}"
                        )
                    logging.info("")

                    ###############################################
                    # Run a test data performance measurement     #
                    ###############################################
                    if (
                        (
                            (
                                (
                                    cfg.learning_step
                                    % cfg.learning_parameters.test_every_x_learning_steps
                                )
                                == 0
                            )
                            or (cfg.learning_step == cfg.learning_step_max)
                        )
                        and (cfg.learning_parameters.test_during_learning is True)
                        and (cfg.learning_step > 0)
                    ):
                        logging.info("")
                        logging.info("Testing:")

                        for h_x, h_x_labels in my_loader_test:
                            time_0 = time.perf_counter()

                            h_h: torch.Tensor = network(
                                the_dataset_train.pattern_filter_test(h_x, cfg).type(
                                    dtype=torch.float64
                                )
                            )

                            test_correct += (
                                (h_h.argmax(dim=1).squeeze() == h_x_labels)
                                .sum()
                                .numpy()
                            )
                            test_all += h_h.shape[0]
                            performance = 100.0 * test_correct / test_all
                            time_1 = time.perf_counter()
                            time_measure_a = time_1 - time_0

                            logging.info(
                                (
                                    f"\t\t{test_all} of {test_complete}"
                                    f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec"
                                )
                            )

                        logging.info("")

                        last_test_performance = performance

                        tb.add_scalar(
                            "Test Error", 100.0 - performance, cfg.learning_step
                        )
                        tb.flush()

                    if cfg.learning_step == cfg.learning_step_max:
                        tb.close()
                        exit(1)

# %%
First version 2022-04-30 02:07:09 +02:00			`# MIT License`
			`# Copyright 2022 University of Bremen`
			`#`
			`# Permission is hereby granted, free of charge, to any person obtaining`
			`# a copy of this software and associated documentation files (the "Software"),`
			`# to deal in the Software without restriction, including without limitation`
			`# the rights to use, copy, modify, merge, publish, distribute, sublicense,`
			`# and/or sell copies of the Software, and to permit persons to whom the`
			`# Software is furnished to do so, subject to the following conditions:`
			`#`
			`# The above copyright notice and this permission notice shall be included`
			`# in all copies or substantial portions of the Software.`
			`#`
			`# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,`
			`# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF`
			`# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.`
			`# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,`
			`# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR`
			`# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR`
			`# THE USE OR OTHER DEALINGS IN THE SOFTWARE.`
			`#`
			`#`
			`# David Rotermund ( davrot@uni-bremen.de )`
			`#`
			`#`
			`# Release history:`
			`# ================`
			`# 1.0.0 -- 01.05.2022: first release`
			`#`
			`#`

			`# %%`
			`import os`

			`os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"`

			`import numpy as np`
			`import sys`
			`import torch`
			`import time`
			`import dataconf`
			`import logging`
			`from datetime import datetime`
Better detection of previous files 2022-04-30 17:46:17 +02:00			`import glob`
First version 2022-04-30 02:07:09 +02:00
			`from Dataset import (`
			`DatasetMaster,`
			`DatasetCIFAR,`
			`DatasetMNIST,`
			`DatasetFashionMNIST,`
			`)`
			`from Parameter import Config`
			`from SbS import SbS`

			`from torch.utils.tensorboard import SummaryWriter`

			`tb = SummaryWriter()`

			`#######################################################################`
			`# We want to log what is going on into a file and screen #`
			`#######################################################################`

			`now = datetime.now()`
			`dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S")`
			`logging.basicConfig(`
			`filename="log_" + dt_string_filename + ".txt",`
			`filemode="w",`
			`level=logging.INFO,`
			`format="%(asctime)s %(message)s",`
			`)`
			`logging.getLogger().addHandler(logging.StreamHandler())`

			`#######################################################################`
			`# Load the config data from the json file #`
			`#######################################################################`

			`if len(sys.argv) < 2:`
			`raise Exception("Argument: Config file name is missing")`

			`filename: str = sys.argv[1]`

			`if os.path.exists(filename) is False:`
			`raise Exception(f"Config file not found! {filename}")`

			`cfg = dataconf.file(filename, Config)`
			`logging.info(f"Using configuration file: {filename}")`


			`#######################################################################`
			`# Prepare the test and training data #`
			`#######################################################################`

			`# Load the input data`
			`the_dataset_train: DatasetMaster`
			`the_dataset_test: DatasetMaster`
			`if cfg.data_mode == "CIFAR10":`
			`the_dataset_train = DatasetCIFAR(`
			`train=True, path_pattern=cfg.data_path, path_label=cfg.data_path`
			`)`
			`the_dataset_test = DatasetCIFAR(`
			`train=False, path_pattern=cfg.data_path, path_label=cfg.data_path`
			`)`
			`elif cfg.data_mode == "MNIST":`
			`the_dataset_train = DatasetMNIST(`
			`train=True, path_pattern=cfg.data_path, path_label=cfg.data_path`
			`)`
			`the_dataset_test = DatasetMNIST(`
			`train=False, path_pattern=cfg.data_path, path_label=cfg.data_path`
			`)`
			`elif cfg.data_mode == "MNIST_FASHION":`
			`the_dataset_train = DatasetFashionMNIST(`
			`train=True, path_pattern=cfg.data_path, path_label=cfg.data_path`
			`)`
			`the_dataset_test = DatasetFashionMNIST(`
			`train=False, path_pattern=cfg.data_path, path_label=cfg.data_path`
			`)`
			`else:`
			`raise Exception("data_mode unknown")`

			`cfg.image_statistics.mean = the_dataset_train.mean`

			`# The basic size`
			`cfg.image_statistics.the_size = [`
			`the_dataset_train.pattern_storage.shape[2],`
			`the_dataset_train.pattern_storage.shape[3],`
			`]`

			`# Minus the stuff we cut away in the pattern filter`
			`cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel`
			`cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel`

			`my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader(`
			`the_dataset_test, batch_size=cfg.batch_size, shuffle=False`
			`)`
			`my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader(`
			`the_dataset_train, batch_size=cfg.batch_size, shuffle=True`
			`)`

			`logging.info("*** Data loaded.")`

			`#######################################################################`
			`# Build the network #`
			`#######################################################################`

			`wf: list[np.ndarray] = []`
			`eps_xy: list[np.ndarray] = []`
			`network = torch.nn.Sequential()`
			`for id in range(0, len(cfg.network_structure.is_pooling_layer)):`
			`if id == 0:`
			`input_size: list[int] = cfg.image_statistics.the_size`
			`else:`
			`input_size = network[id - 1].output_size.tolist()`

			`network.append(`
			`SbS(`
			`number_of_input_neurons=cfg.network_structure.forward_neuron_numbers[id][0],`
			`number_of_neurons=cfg.network_structure.forward_neuron_numbers[id][1],`
			`input_size=input_size,`
			`forward_kernel_size=cfg.network_structure.forward_kernel_size[id],`
			`number_of_spikes=cfg.number_of_spikes,`
			`epsilon_t=cfg.get_epsilon_t(),`
			`epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial,`
			`epsilon_0=cfg.epsilon_0,`
			`weight_noise_amplitude=cfg.learning_parameters.weight_noise_amplitude,`
			`is_pooling_layer=cfg.network_structure.is_pooling_layer[id],`
			`strides=cfg.network_structure.strides[id],`
			`dilation=cfg.network_structure.dilation[id],`
			`padding=cfg.network_structure.padding[id],`
			`alpha_number_of_iterations=cfg.learning_parameters.alpha_number_of_iterations,`
			`number_of_cpu_processes=cfg.number_of_cpu_processes,`
			`)`
			`)`

			`eps_xy.append(network[id].epsilon_xy.detach().clone().numpy())`
			`wf.append(network[id].weights.detach().clone().numpy())`

			`logging.info("*** Network generated.")`

			`for id in range(0, len(network)):`
			`# Load previous weights and epsilon xy`
			`if cfg.learning_step > 0:`
			`network[id].weights = torch.tensor(`
			`np.load(`
			`cfg.weight_path`
			`+ "/Weight_L"`
			`+ str(id)`
			`+ "_S"`
			`+ str(cfg.learning_step)`
			`+ ".npy"`
			`),`
			`dtype=torch.float64,`
			`)`

			`wf[id] = np.load(`
			`cfg.weight_path`
			`+ "/Weight_L"`
			`+ str(id)`
			`+ "_S"`
			`+ str(cfg.learning_step)`
			`+ ".npy"`
			`)`

			`network[id].epsilon_xy = torch.tensor(`
			`np.load(`
			`cfg.eps_xy_path`
			`+ "/EpsXY_L"`
			`+ str(id)`
			`+ "_S"`
			`+ str(cfg.learning_step)`
			`+ ".npy"`
			`),`
			`dtype=torch.float64,`
			`)`

			`eps_xy[id] = np.load(`
			`cfg.eps_xy_path`
			`+ "/EpsXY_L"`
			`+ str(id)`
			`+ "_S"`
			`+ str(cfg.learning_step)`
			`+ ".npy"`
			`)`

Better detection of previous files 2022-04-30 17:46:17 +02:00			`for id in range(0, len(network)):`

First version 2022-04-30 02:07:09 +02:00			`# Are there weights that overwrite the initial weights?`
Better detection of previous files 2022-04-30 17:46:17 +02:00			`file_to_load = glob.glob(`
			`cfg.learning_parameters.overload_path + "/Weight_L" + str(id) + "*.npy"`
First version 2022-04-30 02:07:09 +02:00			`)`
Better detection of previous files 2022-04-30 17:46:17 +02:00
			`if len(file_to_load) > 1:`
			`raise Exception(`
			`f"Too many previous weights files {cfg.learning_parameters.overload_path}/Weight_L{id}*.npy"`
			`)`

			`if len(file_to_load) == 1:`
First version 2022-04-30 02:07:09 +02:00			`network[id].weights = torch.tensor(`
Better detection of previous files 2022-04-30 17:46:17 +02:00			`np.load(file_to_load[0]),`
First version 2022-04-30 02:07:09 +02:00			`dtype=torch.float64,`
			`)`
Better detection of previous files 2022-04-30 17:46:17 +02:00			`wf[id] = np.load(file_to_load[0])`
			`logging.info(f"File used: {file_to_load[0]}")`

			`# Are there epsinlon xy files that overwrite the initial epsilon xy?`
			`file_to_load = glob.glob(`
			`cfg.learning_parameters.overload_path + "/EpsXY_L" + str(id) + "*.npy"`
			`)`

			`if len(file_to_load) > 1:`
			`raise Exception(`
			`f"Too many previous epsilon xy files {cfg.learning_parameters.overload_path}/EpsXY_L{id}*.npy"`
			`)`
First version 2022-04-30 02:07:09 +02:00
Better detection of previous files 2022-04-30 17:46:17 +02:00			`if len(file_to_load) == 1:`
First version 2022-04-30 02:07:09 +02:00			`network[id].epsilon_xy = torch.tensor(`
Better detection of previous files 2022-04-30 17:46:17 +02:00			`np.load(file_to_load[0]),`
First version 2022-04-30 02:07:09 +02:00			`dtype=torch.float64,`
			`)`
Better detection of previous files 2022-04-30 17:46:17 +02:00			`eps_xy[id] = np.load(file_to_load[0])`
			`logging.info(f"File used: {file_to_load[0]}")`
First version 2022-04-30 02:07:09 +02:00
			`#######################################################################`
			`# Optimizer and LR Scheduler #`
			`#######################################################################`

			`# I keep weights and epsilon xy seperate to`
			`# set the initial learning rate independently`
			`parameter_list_weights: list = []`
			`parameter_list_epsilon_xy: list = []`

			`for id in range(0, len(network)):`
			`parameter_list_weights.append(network[id]._weights)`
			`parameter_list_epsilon_xy.append(network[id]._epsilon_xy)`

			`if cfg.learning_parameters.optimizer_name == "Adam":`
			`if cfg.learning_parameters.learning_rate_gamma_w > 0:`
			`optimizer_wf: torch.optim.Optimizer = torch.optim.Adam(`
			`parameter_list_weights,`
			`lr=cfg.learning_parameters.learning_rate_gamma_w,`
			`)`
			`else:`
			`optimizer_wf = torch.optim.Adam(`
			`parameter_list_weights,`
			`)`

			`if cfg.learning_parameters.learning_rate_gamma_eps_xy > 0:`
			`optimizer_eps: torch.optim.Optimizer = torch.optim.Adam(`
			`parameter_list_epsilon_xy,`
			`lr=cfg.learning_parameters.learning_rate_gamma_eps_xy,`
			`)`
			`else:`
			`optimizer_eps = torch.optim.Adam(`
			`parameter_list_epsilon_xy,`
			`)`
			`else:`
			`raise Exception("Optimizer not implemented")`

			`if cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau":`
LR Scheduler: Separated the eps and w parameters 2022-04-30 13:40:51 +02:00			`if cfg.learning_parameters.lr_scheduler_patience_w > 0:`
			`lr_scheduler_wf = torch.optim.lr_scheduler.ReduceLROnPlateau(`
			`optimizer_wf,`
			`factor=cfg.learning_parameters.lr_scheduler_factor_w,`
			`patience=cfg.learning_parameters.lr_scheduler_patience_w,`
			`)`
First version 2022-04-30 02:07:09 +02:00
LR Scheduler: Separated the eps and w parameters 2022-04-30 13:40:51 +02:00			`if cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0:`
			`lr_scheduler_eps = torch.optim.lr_scheduler.ReduceLROnPlateau(`
			`optimizer_eps,`
			`factor=cfg.learning_parameters.lr_scheduler_factor_eps_xy,`
			`patience=cfg.learning_parameters.lr_scheduler_patience_eps_xy,`
			`)`
First version 2022-04-30 02:07:09 +02:00			`else:`
			`raise Exception("lr_scheduler not implemented")`

			`logging.info("*** Optimizer prepared.")`


			`#######################################################################`
			`# Some variable declarations #`
			`#######################################################################`

			`test_correct: int = 0`
			`test_all: int = 0`
			`test_complete: int = the_dataset_test.__len__()`

			`train_correct: int = 0`
			`train_all: int = 0`
			`train_complete: int = the_dataset_train.__len__()`

			`train_number_of_processed_pattern: int = 0`

			`train_loss: np.ndarray = np.zeros((1), dtype=np.float32)`

			`last_test_performance: float = -1.0`


			`logging.info("")`

			`with torch.no_grad():`
			`if cfg.learning_parameters.learning_active is True:`
			`while True:`

			`###############################################`
			`# Run a training data batch #`
			`###############################################`

			`for h_x, h_x_labels in my_loader_train:`
			`time_0: float = time.perf_counter()`

			`if train_number_of_processed_pattern == 0:`
			`# Reset the gradient of the torch optimizers`
			`optimizer_wf.zero_grad()`
			`optimizer_eps.zero_grad()`

			`with torch.enable_grad():`

			`h_collection = []`
			`h_collection.append(`
			`the_dataset_train.pattern_filter_train(h_x, cfg).type(`
			`dtype=torch.float64`
			`)`
			`)`
			`for id in range(0, len(network)):`
			`h_collection.append(network[id](h_collection[-1]))`

			`# Convert label into one hot`
			`target_one_hot: torch.Tensor = torch.zeros(`
			`(`
			`h_x_labels.shape[0],`
			`int(cfg.network_structure.number_of_output_neurons),`
			`)`
			`)`
			`target_one_hot.scatter_(`
			`1, h_x_labels.unsqueeze(1), torch.ones((h_x_labels.shape[0], 1))`
			`)`
			`target_one_hot = (`
			`target_one_hot.unsqueeze(2)`
			`.unsqueeze(2)`
			`.type(dtype=torch.float64)`
			`)`

			`# through the loss functions`
			`h_y1 = torch.log(h_collection[-1])`
			`h_y2 = torch.nan_to_num(h_y1, nan=0.0, posinf=0.0, neginf=0.0)`

			`my_loss: torch.Tensor = (`
			`(`
			`torch.nn.functional.mse_loss(`
			`h_collection[-1], target_one_hot, reduction="none"`
			`)`
			`* cfg.learning_parameters.loss_coeffs_mse`
			`+ torch.nn.functional.kl_div(`
			`h_y2, target_one_hot, reduction="none"`
			`)`
			`* cfg.learning_parameters.loss_coeffs_kldiv`
			`)`
			`/ (`
			`cfg.learning_parameters.loss_coeffs_kldiv`
			`+ cfg.learning_parameters.loss_coeffs_mse`
			`)`
			`).mean()`

			`time_1: float = time.perf_counter()`

			`my_loss.backward()`
			`my_loss_float = my_loss.item()`
			`time_2: float = time.perf_counter()`

			`train_correct += (`
			`(h_collection[-1].argmax(dim=1).squeeze() == h_x_labels)`
			`.sum()`
			`.numpy()`
			`)`
			`train_all += h_collection[-1].shape[0]`

			`performance: float = 100.0 * train_correct / train_all`

			`time_measure_a: float = time_1 - time_0`

			`logging.info(`
			`(`
			`f"{cfg.learning_step:^6} Training \t{train_all^6} pattern "`
			`f"with {performance/100.0:^6.2%} "`
			`f"\t\tForward time: \t{time_measure_a:^6.2f}sec"`
			`)`
			`)`

			`train_loss[0] += my_loss_float`
			`train_number_of_processed_pattern += h_collection[-1].shape[0]`

			`time_measure_b: float = time_2 - time_1`

			`logging.info(`
			`(`
			`f"\t\t\tLoss: {train_loss[0]/train_number_of_processed_pattern:^15.3e} "`
			`f"\t\t\tBackward time: \t{time_measure_b:^6.2f}sec "`
			`)`
			`)`

			`if (`
			`train_number_of_processed_pattern`
			`>= cfg.get_update_after_x_pattern()`
			`):`
			`logging.info("\t\t\t* Updating the weights *")`
			`my_loss_for_batch: float = (`
			`train_loss[0] / train_number_of_processed_pattern`
			`)`

			`optimizer_wf.step()`
			`optimizer_eps.step()`

			`for id in range(0, len(network)):`
			`if cfg.network_structure.w_trainable[id] is True:`
			`network[id].norm_weights()`
			`network[id].threshold_weights(`
			`cfg.learning_parameters.learning_rate_threshold_w`
			`)`
			`network[id].norm_weights()`
			`else:`
			`network[id].weights = torch.tensor(`
			`wf[id], dtype=torch.float64`
			`)`

			`if cfg.network_structure.eps_xy_trainable[id] is True:`
			`network[id].threshold_epsilon_xy(`
			`cfg.learning_parameters.learning_rate_threshold_eps_xy`
			`)`
Added epsilon xy mean back in 2022-05-01 02:11:59 +02:00			`if cfg.network_structure.eps_xy_mean[id] is True:`
			`network[id].mean_epsilon_xy()`
First version 2022-04-30 02:07:09 +02:00			`else:`
			`network[id].epsilon_xy = torch.tensor(`
			`eps_xy[id], dtype=torch.float64`
			`)`

Track and save only changed w and epsilon 2022-04-30 16:43:13 +02:00			`if cfg.network_structure.w_trainable[id] is True:`
			`# Save the new values`
			`np.save(`
			`cfg.weight_path`
			`+ "/Weight_L"`
			`+ str(id)`
			`+ "_S"`
			`+ str(cfg.learning_step)`
			`+ ".npy",`
			`network[id].weights.detach().numpy(),`
First version 2022-04-30 02:07:09 +02:00			`)`
Track and save only changed w and epsilon 2022-04-30 16:43:13 +02:00
			`try:`
			`tb.add_histogram(`
			`"Weights " + str(id),`
			`network[id].weights,`
			`cfg.learning_step,`
			`)`
			`except ValueError:`
			`pass`

			`if cfg.network_structure.eps_xy_trainable[id] is True:`
			`np.save(`
			`cfg.eps_xy_path`
			`+ "/EpsXY_L"`
			`+ str(id)`
			`+ "_S"`
			`+ str(cfg.learning_step)`
			`+ ".npy",`
First version 2022-04-30 02:07:09 +02:00			`network[id].epsilon_xy.detach().numpy(),`
			`)`
Track and save only changed w and epsilon 2022-04-30 16:43:13 +02:00			`try:`
			`tb.add_histogram(`
			`"Epsilon XY " + str(id),`
			`network[id].epsilon_xy.detach().numpy(),`
			`cfg.learning_step,`
			`)`
			`except ValueError:`
			`pass`
First version 2022-04-30 02:07:09 +02:00
			`# Let the torch learning rate scheduler update the`
			`# learning rates of the optimiers`
LR Scheduler: Separated the eps and w parameters 2022-04-30 13:40:51 +02:00			`if cfg.learning_parameters.lr_scheduler_patience_w > 0:`
			`lr_scheduler_wf.step(my_loss_for_batch)`
			`if cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0:`
			`lr_scheduler_eps.step(my_loss_for_batch)`
First version 2022-04-30 02:07:09 +02:00
			`tb.add_scalar(`
			`"Train Performance", 100.0 - performance, cfg.learning_step`
			`)`
			`tb.add_scalar("Train Loss", my_loss_for_batch, cfg.learning_step)`
			`tb.add_scalar(`
			`"Learning Rate Scale WF",`
			`optimizer_wf.param_groups[-1]["lr"],`
			`cfg.learning_step,`
			`)`
			`tb.add_scalar(`
			`"Learning Rate Scale Eps XY ",`
			`optimizer_eps.param_groups[-1]["lr"],`
			`cfg.learning_step,`
			`)`

			`cfg.learning_step += 1`
			`train_loss = np.zeros((1), dtype=np.float32)`
			`train_correct = 0`
			`train_all = 0`
			`performance = 0`
			`train_number_of_processed_pattern = 0`

			`tb.flush()`

			`test_correct = 0`
			`test_all = 0`

			`if last_test_performance < 0:`
			`logging.info("")`
			`else:`
			`logging.info(`
			`f"\t\t\tLast test performance: {last_test_performance/100.0:^6.2%}"`
			`)`
			`logging.info("")`

			`###############################################`
			`# Run a test data performance measurement #`
			`###############################################`
			`if (`
			`(`
			`(`
			`(`
			`cfg.learning_step`
			`% cfg.learning_parameters.test_every_x_learning_steps`
			`)`
			`== 0`
			`)`
			`or (cfg.learning_step == cfg.learning_step_max)`
			`)`
			`and (cfg.learning_parameters.test_during_learning is True)`
			`and (cfg.learning_step > 0)`
			`):`
			`logging.info("")`
			`logging.info("Testing:")`

			`for h_x, h_x_labels in my_loader_test:`
			`time_0 = time.perf_counter()`

			`h_h: torch.Tensor = network(`
			`the_dataset_train.pattern_filter_test(h_x, cfg).type(`
			`dtype=torch.float64`
			`)`
			`)`

			`test_correct += (`
			`(h_h.argmax(dim=1).squeeze() == h_x_labels)`
			`.sum()`
			`.numpy()`
			`)`
			`test_all += h_h.shape[0]`
			`performance = 100.0 * test_correct / test_all`
			`time_1 = time.perf_counter()`
			`time_measure_a = time_1 - time_0`

			`logging.info(`
			`(`
			`f"\t\t{test_all} of {test_complete}"`
			`f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec"`
			`)`
			`)`

			`logging.info("")`

			`last_test_performance = performance`

			`tb.add_scalar(`
			`"Test Error", 100.0 - performance, cfg.learning_step`
			`)`
			`tb.flush()`

			`if cfg.learning_step == cfg.learning_step_max:`
			`tb.close()`
			`exit(1)`

			`# %%`