commit a540a3f27108ea76b010fab693eb1fef75602ece Author: David Rotermund Date: Mon Oct 21 16:43:42 2024 +0200 Initial diff --git a/avg_pooling_conv2d/L1NormLayer.py b/avg_pooling_conv2d/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/avg_pooling_conv2d/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/avg_pooling_conv2d/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/avg_pooling_conv2d/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..2143b92 Binary files /dev/null and b/avg_pooling_conv2d/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/avg_pooling_conv2d/NNMF2d.py b/avg_pooling_conv2d/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/avg_pooling_conv2d/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/avg_pooling_conv2d/NOTE_THIS_ONE_DIES_AFTER_SOME_ITERATIONS b/avg_pooling_conv2d/NOTE_THIS_ONE_DIES_AFTER_SOME_ITERATIONS new file mode 100644 index 0000000..e69de29 diff --git a/avg_pooling_conv2d/append_block.py b/avg_pooling_conv2d/append_block.py new file mode 100644 index 0000000..80142d9 --- /dev/null +++ b/avg_pooling_conv2d/append_block.py @@ -0,0 +1,111 @@ +import torch +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + if last_layer is False: + + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/avg_pooling_conv2d/append_parameter.py b/avg_pooling_conv2d/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/avg_pooling_conv2d/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/avg_pooling_conv2d/convert_log_to_numpy.py b/avg_pooling_conv2d/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/avg_pooling_conv2d/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/avg_pooling_conv2d/data_loader.py b/avg_pooling_conv2d/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/avg_pooling_conv2d/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/avg_pooling_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/avg_pooling_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..a1ac572 Binary files /dev/null and b/avg_pooling_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/avg_pooling_conv2d/get_the_data.py b/avg_pooling_conv2d/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/avg_pooling_conv2d/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/avg_pooling_conv2d/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724150849.gp4u5.926684.0 b/avg_pooling_conv2d/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724150849.gp4u5.926684.0 new file mode 100644 index 0000000..ecc5206 Binary files /dev/null and b/avg_pooling_conv2d/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724150849.gp4u5.926684.0 differ diff --git a/avg_pooling_conv2d/loss_function.py b/avg_pooling_conv2d/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/avg_pooling_conv2d/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/avg_pooling_conv2d/make_network.py b/avg_pooling_conv2d/make_network.py new file mode 100644 index 0000000..464d9b9 --- /dev/null +++ b/avg_pooling_conv2d/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer=block_id == len(number_of_output_channels) - 1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Conv2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # kernel_size=(1, 1), + # bias=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/avg_pooling_conv2d/make_optimize.py b/avg_pooling_conv2d/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/avg_pooling_conv2d/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/avg_pooling_conv2d/non_linear_weigth_function.py b/avg_pooling_conv2d/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/avg_pooling_conv2d/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/avg_pooling_conv2d/plot.py b/avg_pooling_conv2d/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/avg_pooling_conv2d/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/avg_pooling_conv2d/run_network.py b/avg_pooling_conv2d/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/avg_pooling_conv2d/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/avg_pooling_conv2d_noinbetween1x1/L1NormLayer.py b/avg_pooling_conv2d_noinbetween1x1/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/avg_pooling_conv2d_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt b/avg_pooling_conv2d_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..189a11d Binary files /dev/null and b/avg_pooling_conv2d_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt differ diff --git a/avg_pooling_conv2d_noinbetween1x1/NNMF2d.py b/avg_pooling_conv2d_noinbetween1x1/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/avg_pooling_conv2d_noinbetween1x1/append_block.py b/avg_pooling_conv2d_noinbetween1x1/append_block.py new file mode 100644 index 0000000..5dd5dac --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/append_block.py @@ -0,0 +1,66 @@ +import torch +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/avg_pooling_conv2d_noinbetween1x1/append_parameter.py b/avg_pooling_conv2d_noinbetween1x1/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/avg_pooling_conv2d_noinbetween1x1/convert_log_to_numpy.py b/avg_pooling_conv2d_noinbetween1x1/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/avg_pooling_conv2d_noinbetween1x1/data_loader.py b/avg_pooling_conv2d_noinbetween1x1/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/avg_pooling_conv2d_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy b/avg_pooling_conv2d_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..d7b3664 Binary files /dev/null and b/avg_pooling_conv2d_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy differ diff --git a/avg_pooling_conv2d_noinbetween1x1/get_the_data.py b/avg_pooling_conv2d_noinbetween1x1/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/avg_pooling_conv2d_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724150768.gp4u7.878668.0 b/avg_pooling_conv2d_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724150768.gp4u7.878668.0 new file mode 100644 index 0000000..8432b6e Binary files /dev/null and b/avg_pooling_conv2d_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724150768.gp4u7.878668.0 differ diff --git a/avg_pooling_conv2d_noinbetween1x1/loss_function.py b/avg_pooling_conv2d_noinbetween1x1/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/avg_pooling_conv2d_noinbetween1x1/make_network.py b/avg_pooling_conv2d_noinbetween1x1/make_network.py new file mode 100644 index 0000000..464d9b9 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer=block_id == len(number_of_output_channels) - 1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Conv2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # kernel_size=(1, 1), + # bias=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/avg_pooling_conv2d_noinbetween1x1/make_optimize.py b/avg_pooling_conv2d_noinbetween1x1/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/avg_pooling_conv2d_noinbetween1x1/non_linear_weigth_function.py b/avg_pooling_conv2d_noinbetween1x1/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/avg_pooling_conv2d_noinbetween1x1/plot.py b/avg_pooling_conv2d_noinbetween1x1/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/avg_pooling_conv2d_noinbetween1x1/run_network.py b/avg_pooling_conv2d_noinbetween1x1/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/avg_pooling_conv2d_noinbetween1x1/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/avg_pooling_mlp/L1NormLayer.py b/avg_pooling_mlp/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/avg_pooling_mlp/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/avg_pooling_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/avg_pooling_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..906968d Binary files /dev/null and b/avg_pooling_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/avg_pooling_mlp/NNMF2d.py b/avg_pooling_mlp/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/avg_pooling_mlp/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/avg_pooling_mlp/append_block.py b/avg_pooling_mlp/append_block.py new file mode 100644 index 0000000..b1f7d97 --- /dev/null +++ b/avg_pooling_mlp/append_block.py @@ -0,0 +1,150 @@ +import torch +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + bias=False, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + if last_layer is False: + + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/avg_pooling_mlp/append_parameter.py b/avg_pooling_mlp/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/avg_pooling_mlp/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/avg_pooling_mlp/convert_log_to_numpy.py b/avg_pooling_mlp/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/avg_pooling_mlp/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/avg_pooling_mlp/data_loader.py b/avg_pooling_mlp/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/avg_pooling_mlp/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/avg_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/avg_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..cc4bf47 Binary files /dev/null and b/avg_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/avg_pooling_mlp/get_the_data.py b/avg_pooling_mlp/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/avg_pooling_mlp/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/avg_pooling_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147595.gp4u5.917415.0 b/avg_pooling_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147595.gp4u5.917415.0 new file mode 100644 index 0000000..9444f01 Binary files /dev/null and b/avg_pooling_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147595.gp4u5.917415.0 differ diff --git a/avg_pooling_mlp/loss_function.py b/avg_pooling_mlp/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/avg_pooling_mlp/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/avg_pooling_mlp/make_network.py b/avg_pooling_mlp/make_network.py new file mode 100644 index 0000000..464d9b9 --- /dev/null +++ b/avg_pooling_mlp/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer=block_id == len(number_of_output_channels) - 1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Conv2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # kernel_size=(1, 1), + # bias=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/avg_pooling_mlp/make_optimize.py b/avg_pooling_mlp/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/avg_pooling_mlp/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/avg_pooling_mlp/non_linear_weigth_function.py b/avg_pooling_mlp/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/avg_pooling_mlp/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/avg_pooling_mlp/plot.py b/avg_pooling_mlp/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/avg_pooling_mlp/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/avg_pooling_mlp/run_network.py b/avg_pooling_mlp/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/avg_pooling_mlp/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/avg_pooling_mlp_noinbetween1x1/L1NormLayer.py b/avg_pooling_mlp_noinbetween1x1/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/avg_pooling_mlp_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt b/avg_pooling_mlp_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..76148b9 Binary files /dev/null and b/avg_pooling_mlp_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt differ diff --git a/avg_pooling_mlp_noinbetween1x1/NNMF2d.py b/avg_pooling_mlp_noinbetween1x1/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/avg_pooling_mlp_noinbetween1x1/append_block.py b/avg_pooling_mlp_noinbetween1x1/append_block.py new file mode 100644 index 0000000..ebcd39e --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/append_block.py @@ -0,0 +1,105 @@ +import torch +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + bias=False, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/avg_pooling_mlp_noinbetween1x1/append_parameter.py b/avg_pooling_mlp_noinbetween1x1/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/avg_pooling_mlp_noinbetween1x1/convert_log_to_numpy.py b/avg_pooling_mlp_noinbetween1x1/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/avg_pooling_mlp_noinbetween1x1/data_loader.py b/avg_pooling_mlp_noinbetween1x1/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/avg_pooling_mlp_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy b/avg_pooling_mlp_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..89f4213 Binary files /dev/null and b/avg_pooling_mlp_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy differ diff --git a/avg_pooling_mlp_noinbetween1x1/get_the_data.py b/avg_pooling_mlp_noinbetween1x1/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/avg_pooling_mlp_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724149524.gp4u5.923398.0 b/avg_pooling_mlp_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724149524.gp4u5.923398.0 new file mode 100644 index 0000000..8c5d7aa Binary files /dev/null and b/avg_pooling_mlp_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724149524.gp4u5.923398.0 differ diff --git a/avg_pooling_mlp_noinbetween1x1/loss_function.py b/avg_pooling_mlp_noinbetween1x1/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/avg_pooling_mlp_noinbetween1x1/make_network.py b/avg_pooling_mlp_noinbetween1x1/make_network.py new file mode 100644 index 0000000..464d9b9 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer=block_id == len(number_of_output_channels) - 1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Conv2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # kernel_size=(1, 1), + # bias=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/avg_pooling_mlp_noinbetween1x1/make_optimize.py b/avg_pooling_mlp_noinbetween1x1/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/avg_pooling_mlp_noinbetween1x1/non_linear_weigth_function.py b/avg_pooling_mlp_noinbetween1x1/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/avg_pooling_mlp_noinbetween1x1/plot.py b/avg_pooling_mlp_noinbetween1x1/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/avg_pooling_mlp_noinbetween1x1/run_network.py b/avg_pooling_mlp_noinbetween1x1/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/avg_pooling_mlp_noinbetween1x1/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/avg_pooling_nnmf/L1NormLayer.py b/avg_pooling_nnmf/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/avg_pooling_nnmf/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/avg_pooling_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/avg_pooling_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..fa4e10d Binary files /dev/null and b/avg_pooling_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/avg_pooling_nnmf/NNMF2d.py b/avg_pooling_nnmf/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/avg_pooling_nnmf/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/avg_pooling_nnmf/append_block.py b/avg_pooling_nnmf/append_block.py new file mode 100644 index 0000000..a8de5d2 --- /dev/null +++ b/avg_pooling_nnmf/append_block.py @@ -0,0 +1,149 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/avg_pooling_nnmf/append_parameter.py b/avg_pooling_nnmf/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/avg_pooling_nnmf/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/avg_pooling_nnmf/convert_log_to_numpy.py b/avg_pooling_nnmf/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/avg_pooling_nnmf/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/avg_pooling_nnmf/data_loader.py b/avg_pooling_nnmf/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/avg_pooling_nnmf/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/avg_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/avg_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..2ac820d Binary files /dev/null and b/avg_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/avg_pooling_nnmf/get_the_data.py b/avg_pooling_nnmf/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/avg_pooling_nnmf/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/avg_pooling_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147676.gp4u6.736716.0 b/avg_pooling_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147676.gp4u6.736716.0 new file mode 100644 index 0000000..3138209 Binary files /dev/null and b/avg_pooling_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147676.gp4u6.736716.0 differ diff --git a/avg_pooling_nnmf/loss_function.py b/avg_pooling_nnmf/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/avg_pooling_nnmf/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/avg_pooling_nnmf/make_network.py b/avg_pooling_nnmf/make_network.py new file mode 100644 index 0000000..0a2c532 --- /dev/null +++ b/avg_pooling_nnmf/make_network.py @@ -0,0 +1,215 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # NNMF2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # epsilon=epsilon, + # positive_function_type=positive_function_type, + # beta=beta, + # iterations=iterations, + # local_learning=False, + # local_learning_kl=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/avg_pooling_nnmf/make_optimize.py b/avg_pooling_nnmf/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/avg_pooling_nnmf/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/avg_pooling_nnmf/non_linear_weigth_function.py b/avg_pooling_nnmf/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/avg_pooling_nnmf/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/avg_pooling_nnmf/plot.py b/avg_pooling_nnmf/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/avg_pooling_nnmf/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/avg_pooling_nnmf/run_network.py b/avg_pooling_nnmf/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/avg_pooling_nnmf/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/avg_pooling_nnmf_noinbetween1x1/L1NormLayer.py b/avg_pooling_nnmf_noinbetween1x1/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/avg_pooling_nnmf_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt b/avg_pooling_nnmf_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..00e3867 Binary files /dev/null and b/avg_pooling_nnmf_noinbetween1x1/Model_iter20_lr_-_1.0000e-02_1.0000e-03_.pt differ diff --git a/avg_pooling_nnmf_noinbetween1x1/NNMF2d.py b/avg_pooling_nnmf_noinbetween1x1/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/avg_pooling_nnmf_noinbetween1x1/append_block.py b/avg_pooling_nnmf_noinbetween1x1/append_block.py new file mode 100644 index 0000000..e0e9c9b --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/append_block.py @@ -0,0 +1,109 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/avg_pooling_nnmf_noinbetween1x1/append_parameter.py b/avg_pooling_nnmf_noinbetween1x1/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/avg_pooling_nnmf_noinbetween1x1/convert_log_to_numpy.py b/avg_pooling_nnmf_noinbetween1x1/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/avg_pooling_nnmf_noinbetween1x1/data_loader.py b/avg_pooling_nnmf_noinbetween1x1/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/avg_pooling_nnmf_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy b/avg_pooling_nnmf_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..f74b924 Binary files /dev/null and b/avg_pooling_nnmf_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy differ diff --git a/avg_pooling_nnmf_noinbetween1x1/get_the_data.py b/avg_pooling_nnmf_noinbetween1x1/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/avg_pooling_nnmf_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724149649.gp4u3.1010554.0 b/avg_pooling_nnmf_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724149649.gp4u3.1010554.0 new file mode 100644 index 0000000..87f793b Binary files /dev/null and b/avg_pooling_nnmf_noinbetween1x1/log_iter20_lr_-_1.0000e-02_1.0000e-03_/events.out.tfevents.1724149649.gp4u3.1010554.0 differ diff --git a/avg_pooling_nnmf_noinbetween1x1/loss_function.py b/avg_pooling_nnmf_noinbetween1x1/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/avg_pooling_nnmf_noinbetween1x1/make_network.py b/avg_pooling_nnmf_noinbetween1x1/make_network.py new file mode 100644 index 0000000..0a2c532 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/make_network.py @@ -0,0 +1,215 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # NNMF2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # epsilon=epsilon, + # positive_function_type=positive_function_type, + # beta=beta, + # iterations=iterations, + # local_learning=False, + # local_learning_kl=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/avg_pooling_nnmf_noinbetween1x1/make_optimize.py b/avg_pooling_nnmf_noinbetween1x1/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/avg_pooling_nnmf_noinbetween1x1/non_linear_weigth_function.py b/avg_pooling_nnmf_noinbetween1x1/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/avg_pooling_nnmf_noinbetween1x1/plot.py b/avg_pooling_nnmf_noinbetween1x1/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/avg_pooling_nnmf_noinbetween1x1/run_network.py b/avg_pooling_nnmf_noinbetween1x1/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/avg_pooling_nnmf_noinbetween1x1/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_conv2d/L1NormLayer.py b/basis_conv2d/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_conv2d/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_conv2d/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/basis_conv2d/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..d3f5505 Binary files /dev/null and b/basis_conv2d/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/basis_conv2d/NNMF2d.py b/basis_conv2d/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/basis_conv2d/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/basis_conv2d/append_block.py b/basis_conv2d/append_block.py new file mode 100644 index 0000000..80142d9 --- /dev/null +++ b/basis_conv2d/append_block.py @@ -0,0 +1,111 @@ +import torch +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + if last_layer is False: + + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/basis_conv2d/append_parameter.py b/basis_conv2d/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_conv2d/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_conv2d/convert_log_to_numpy.py b/basis_conv2d/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/basis_conv2d/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_conv2d/data_loader.py b/basis_conv2d/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_conv2d/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/basis_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..383fa25 Binary files /dev/null and b/basis_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/basis_conv2d/get_the_data.py b/basis_conv2d/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_conv2d/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_conv2d/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724159951.gp4u1.916226.0 b/basis_conv2d/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724159951.gp4u1.916226.0 new file mode 100644 index 0000000..b8bf6d4 Binary files /dev/null and b/basis_conv2d/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724159951.gp4u1.916226.0 differ diff --git a/basis_conv2d/loss_function.py b/basis_conv2d/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_conv2d/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_conv2d/make_network.py b/basis_conv2d/make_network.py new file mode 100644 index 0000000..768db46 --- /dev/null +++ b/basis_conv2d/make_network.py @@ -0,0 +1,168 @@ +import torch +from append_block import append_block +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer=block_id == len(number_of_output_channels) - 1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=test_image.shape[1], + kernel_size=(2, 2), + dilation=(1, 1), + padding=(0, 0), + stride=(2, 2), + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=0.1, + track_running_stats=False, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_conv2d/make_optimize.py b/basis_conv2d/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_conv2d/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_conv2d/non_linear_weigth_function.py b/basis_conv2d/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_conv2d/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_conv2d/plot.py b/basis_conv2d/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_conv2d/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_conv2d/run_network.py b/basis_conv2d/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/basis_conv2d/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_mlp/L1NormLayer.py b/basis_mlp/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_mlp/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/basis_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..a007aac Binary files /dev/null and b/basis_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/basis_mlp/NNMF2d.py b/basis_mlp/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/basis_mlp/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/basis_mlp/append_block.py b/basis_mlp/append_block.py new file mode 100644 index 0000000..b6796c4 --- /dev/null +++ b/basis_mlp/append_block.py @@ -0,0 +1,151 @@ +import torch +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool= False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + bias=False, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + if last_layer is False: + + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/basis_mlp/append_parameter.py b/basis_mlp/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_mlp/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_mlp/convert_log_to_numpy.py b/basis_mlp/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/basis_mlp/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_mlp/data_loader.py b/basis_mlp/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_mlp/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/basis_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..71f6fbd Binary files /dev/null and b/basis_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/basis_mlp/get_the_data.py b/basis_mlp/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_mlp/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724146550.gp4u2.724157.0 b/basis_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724146550.gp4u2.724157.0 new file mode 100644 index 0000000..552cc4d Binary files /dev/null and b/basis_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724146550.gp4u2.724157.0 differ diff --git a/basis_mlp/loss_function.py b/basis_mlp/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_mlp/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_mlp/make_network.py b/basis_mlp/make_network.py new file mode 100644 index 0000000..335bc3c --- /dev/null +++ b/basis_mlp/make_network.py @@ -0,0 +1,208 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer = block_id == len(number_of_output_channels)-1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, 2, 2)), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + .squeeze(0) + .squeeze(0) + ) + + network.append( + torch.nn.Unfold( + kernel_size=(2, 2), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=test_image.shape[1] // 4, + kernel_size=(1, 1), + bias=False, + ).to(torch_device) + ) + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=0.1, + track_running_stats=False, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_mlp/make_optimize.py b/basis_mlp/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_mlp/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_mlp/non_linear_weigth_function.py b/basis_mlp/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_mlp/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_mlp/plot.py b/basis_mlp/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_mlp/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_mlp/run_network.py b/basis_mlp/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/basis_mlp/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_nnmf/L1NormLayer.py b/basis_nnmf/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_nnmf/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/basis_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..7d58a6b Binary files /dev/null and b/basis_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/basis_nnmf/NNMF2d.py b/basis_nnmf/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/basis_nnmf/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/basis_nnmf/append_block.py b/basis_nnmf/append_block.py new file mode 100644 index 0000000..a8de5d2 --- /dev/null +++ b/basis_nnmf/append_block.py @@ -0,0 +1,149 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/basis_nnmf/append_parameter.py b/basis_nnmf/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_nnmf/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_nnmf/convert_log_to_numpy.py b/basis_nnmf/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/basis_nnmf/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_nnmf/data_loader.py b/basis_nnmf/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_nnmf/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/basis_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..c6366c2 Binary files /dev/null and b/basis_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/basis_nnmf/get_the_data.py b/basis_nnmf/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_nnmf/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724146431.gp4u1.872865.0 b/basis_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724146431.gp4u1.872865.0 new file mode 100644 index 0000000..6163660 Binary files /dev/null and b/basis_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724146431.gp4u1.872865.0 differ diff --git a/basis_nnmf/loss_function.py b/basis_nnmf/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_nnmf/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_nnmf/make_network.py b/basis_nnmf/make_network.py new file mode 100644 index 0000000..071d9fb --- /dev/null +++ b/basis_nnmf/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, 2, 2)), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + .squeeze(0) + .squeeze(0) + ) + + network.append( + torch.nn.Unfold( + kernel_size=(2, 2), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2d( + in_channels=test_image.shape[1], + out_channels=test_image.shape[1] // 4, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=False, + local_learning_kl=False, + ).to(torch_device) + ) + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=0.1, + track_running_stats=False, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_nnmf/make_optimize.py b/basis_nnmf/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_nnmf/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_nnmf/non_linear_weigth_function.py b/basis_nnmf/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_nnmf/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_nnmf/plot.py b/basis_nnmf/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_nnmf/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_nnmf/run_network.py b/basis_nnmf/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/basis_nnmf/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_nnmf_autograd/L1NormLayer.py b/basis_nnmf_autograd/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_nnmf_autograd/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_nnmf_autograd/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/basis_nnmf_autograd/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..cb83d13 Binary files /dev/null and b/basis_nnmf_autograd/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/basis_nnmf_autograd/NNMF2dAutograd.py b/basis_nnmf_autograd/NNMF2dAutograd.py new file mode 100644 index 0000000..3a72b72 --- /dev/null +++ b/basis_nnmf_autograd/NNMF2dAutograd.py @@ -0,0 +1,121 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2dAutograd(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + # --------------------- + + # Prepare h + h = torch.full( + (input.shape[0], self.out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(self.out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, self.iterations): + reconstruction = torch.nn.functional.linear(h, positive_weights.T) + reconstruction = reconstruction + 1e-20 + if self.epsilon is None: + h = h * torch.nn.functional.linear( + (input / reconstruction), positive_weights + ) + else: + h = h * ( + 1 + + self.epsilon + * torch.nn.functional.linear( + (input / reconstruction), positive_weights + ) + ) + h = h / (h.sum(-1, keepdim=True) + 10e-20) + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + assert torch.isfinite(h).all() + + return h diff --git a/basis_nnmf_autograd/append_block.py b/basis_nnmf_autograd/append_block.py new file mode 100644 index 0000000..697cffb --- /dev/null +++ b/basis_nnmf_autograd/append_block.py @@ -0,0 +1,149 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2dAutograd import NNMF2dAutograd +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2dAutograd( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/basis_nnmf_autograd/append_parameter.py b/basis_nnmf_autograd/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_nnmf_autograd/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_nnmf_autograd/convert_log_to_numpy.py b/basis_nnmf_autograd/convert_log_to_numpy.py new file mode 100644 index 0000000..6a1343a --- /dev/null +++ b/basis_nnmf_autograd/convert_log_to_numpy.py @@ -0,0 +1,30 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_nnmf_autograd/data_loader.py b/basis_nnmf_autograd/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_nnmf_autograd/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_nnmf_autograd/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/basis_nnmf_autograd/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..76d4ed3 Binary files /dev/null and b/basis_nnmf_autograd/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/basis_nnmf_autograd/get_the_data.py b/basis_nnmf_autograd/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_nnmf_autograd/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_nnmf_autograd/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724148624.gp4u2.729873.0 b/basis_nnmf_autograd/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724148624.gp4u2.729873.0 new file mode 100644 index 0000000..272b3a1 Binary files /dev/null and b/basis_nnmf_autograd/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724148624.gp4u2.729873.0 differ diff --git a/basis_nnmf_autograd/loss_function.py b/basis_nnmf_autograd/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_nnmf_autograd/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_nnmf_autograd/make_network.py b/basis_nnmf_autograd/make_network.py new file mode 100644 index 0000000..9fa1cf2 --- /dev/null +++ b/basis_nnmf_autograd/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2dAutograd import NNMF2dAutograd +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, 2, 2)), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + .squeeze(0) + .squeeze(0) + ) + + network.append( + torch.nn.Unfold( + kernel_size=(2, 2), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2dAutograd( + in_channels=test_image.shape[1], + out_channels=test_image.shape[1] // 4, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=False, + local_learning_kl=False, + ).to(torch_device) + ) + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=0.1, + track_running_stats=False, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_nnmf_autograd/make_optimize.py b/basis_nnmf_autograd/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_nnmf_autograd/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_nnmf_autograd/non_linear_weigth_function.py b/basis_nnmf_autograd/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_nnmf_autograd/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_nnmf_autograd/plot.py b/basis_nnmf_autograd/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_nnmf_autograd/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_nnmf_autograd/run_network.py b/basis_nnmf_autograd/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/basis_nnmf_autograd/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_nnmf_convnnmf/L1NormLayer.py b/basis_nnmf_convnnmf/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_nnmf_convnnmf/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_nnmf_convnnmf/NNMF2dConvGroupedAutograd.py b/basis_nnmf_convnnmf/NNMF2dConvGroupedAutograd.py new file mode 100644 index 0000000..776da80 --- /dev/null +++ b/basis_nnmf_convnnmf/NNMF2dConvGroupedAutograd.py @@ -0,0 +1,208 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2dConvGrouped(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + convolution_contribution_map: None | torch.Tensor = None + convolution_contribution_map_enable: bool + convolution_ip_norm: bool + kernel_size: tuple[int, ...] + stride: tuple[int, ...] + padding: str | tuple[int, ...] + dilation: tuple[int, ...] + output_size: None | torch.Tensor = None + groups: int + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: tuple[int, int], + groups: int = 1, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + convolution_contribution_map_enable: bool = False, + stride: tuple[int, int] = (1, 1), + padding: str | tuple[int, int] = (0, 0), + dilation: tuple[int, int] = (1, 1), + convolution_ip_norm: bool = True, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + valid_padding_strings = {"same", "valid"} + if isinstance(padding, str): + if padding not in valid_padding_strings: + raise ValueError( + f"Invalid padding string {padding!r}, should be one of {valid_padding_strings}" + ) + if padding == "same" and any(s != 1 for s in stride): + raise ValueError( + "padding='same' is not supported for strided convolutions" + ) + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.groups = groups + assert ( + in_channels % self.groups == 0 + ), f"Can't divide without rest {in_channels} / {self.groups}" + self.in_channels = in_channels // self.groups + self.out_channels = out_channels + + self.iterations = iterations + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.convolution_contribution_map_enable = convolution_contribution_map_enable + self.convolution_ip_norm = convolution_ip_norm + + self.weight = torch.nn.parameter.Parameter( + torch.empty( + (out_channels, self.in_channels, *kernel_size), **factory_kwargs + ) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}," + s += f"kernel_size={self.kernel_size}," + s += f"stride={self.stride}, iterations={self.iterations}" + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", groups={self.groups}" + + if self.padding != (0,) * len(self.padding): + s += f", padding={self.padding}" + if self.dilation != (1,) * len(self.dilation): + s += f", dilation={self.dilation}" + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + if input.ndim == 2: + input = input.unsqueeze(-1) + if input.ndim == 3: + input = input.unsqueeze(-1) + + if self.output_size is None: + self.output_size = torch.tensor( + torch.nn.functional.conv2d( + torch.zeros( + 1, + input.shape[1], + input.shape[2], + input.shape[3], + device=self.weight.device, + dtype=self.weight.dtype, + ), + torch.zeros_like(self.weight), + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + ).shape, + requires_grad=False, + ) + assert self.output_size is not None + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + + positive_weights = positive_weights / ( + positive_weights.sum(dim=-1, keepdim=True) + 10e-20 + ) + + input = input / (input.sum((1, 2, 3), keepdim=True) + 10e-20) + + # Prepare h + self.output_size[0] = input.shape[0] + h = torch.full( + self.output_size.tolist(), + 1.0 / float(self.output_size[1]), + device=input.device, + dtype=input.dtype, + ) + + if self.convolution_ip_norm: + pass + else: + h = h / (h.sum((1, 2, 3), keepdim=True) + 10e-20) + + for _ in range(0, self.iterations): + + factor_x_div_r: torch.Tensor = input / ( + torch.nn.functional.conv_transpose2d( + h, + positive_weights, + stride=self.stride, + padding=self.padding, # type: ignore + dilation=self.dilation, + groups=self.groups, + ) + + 10e-20 + ) + + if self.epsilon is None: + h = h * torch.nn.functional.conv2d( + factor_x_div_r, + positive_weights, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + ) + else: + h = h * ( + 1 + + self.epsilon + * torch.nn.functional.conv2d( + factor_x_div_r, + positive_weights, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + ) + ) + if self.convolution_ip_norm: + h = h / (h.sum(1, keepdim=True) + 10e-20) + else: + h = h / (h.sum((1, 2, 3), keepdim=True) + 10e-20) + + assert torch.isfinite(h).all() + return h diff --git a/basis_nnmf_convnnmf/__pycache__/L1NormLayer.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/L1NormLayer.cpython-312.pyc new file mode 100644 index 0000000..9eff047 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/L1NormLayer.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/NNMF2dConvGrouped.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/NNMF2dConvGrouped.cpython-312.pyc new file mode 100644 index 0000000..ebeacad Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/NNMF2dConvGrouped.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/NNMF2dConvGroupedAutograd.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/NNMF2dConvGroupedAutograd.cpython-312.pyc new file mode 100644 index 0000000..5300f73 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/NNMF2dConvGroupedAutograd.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/append_block.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/append_block.cpython-312.pyc new file mode 100644 index 0000000..a4d5aa5 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/append_block.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/append_parameter.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/append_parameter.cpython-312.pyc new file mode 100644 index 0000000..0b183d7 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/append_parameter.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/data_loader.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/data_loader.cpython-312.pyc new file mode 100644 index 0000000..d64c880 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/data_loader.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/get_the_data.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/get_the_data.cpython-312.pyc new file mode 100644 index 0000000..ca55d41 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/get_the_data.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/loss_function.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/loss_function.cpython-312.pyc new file mode 100644 index 0000000..0f660f3 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/loss_function.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/make_network.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/make_network.cpython-312.pyc new file mode 100644 index 0000000..073acd1 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/make_network.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/make_optimize.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/make_optimize.cpython-312.pyc new file mode 100644 index 0000000..1aac62b Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/make_optimize.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/__pycache__/non_linear_weigth_function.cpython-312.pyc b/basis_nnmf_convnnmf/__pycache__/non_linear_weigth_function.cpython-312.pyc new file mode 100644 index 0000000..716dc28 Binary files /dev/null and b/basis_nnmf_convnnmf/__pycache__/non_linear_weigth_function.cpython-312.pyc differ diff --git a/basis_nnmf_convnnmf/append_block.py b/basis_nnmf_convnnmf/append_block.py new file mode 100644 index 0000000..5553189 --- /dev/null +++ b/basis_nnmf_convnnmf/append_block.py @@ -0,0 +1,50 @@ +import torch +from NNMF2dConvGroupedAutograd import NNMF2dConvGrouped +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + network.append( + NNMF2dConvGrouped( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + return test_image diff --git a/basis_nnmf_convnnmf/append_parameter.py b/basis_nnmf_convnnmf/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_nnmf_convnnmf/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_nnmf_convnnmf/convert_log_to_numpy.py b/basis_nnmf_convnnmf/convert_log_to_numpy.py new file mode 100644 index 0000000..6a1343a --- /dev/null +++ b/basis_nnmf_convnnmf/convert_log_to_numpy.py @@ -0,0 +1,30 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_nnmf_convnnmf/data_loader.py b/basis_nnmf_convnnmf/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_nnmf_convnnmf/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_nnmf_convnnmf/get_the_data.py b/basis_nnmf_convnnmf/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_nnmf_convnnmf/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-01_-_/events.out.tfevents.1724158487.gp4u1.903252.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-01_-_/events.out.tfevents.1724158487.gp4u1.903252.0 new file mode 100644 index 0000000..e02a58d Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-01_-_/events.out.tfevents.1724158487.gp4u1.903252.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-02_-_/events.out.tfevents.1724158460.gp4u1.903156.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-02_-_/events.out.tfevents.1724158460.gp4u1.903156.0 new file mode 100644 index 0000000..a277a68 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-02_-_/events.out.tfevents.1724158460.gp4u1.903156.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724157884.gp4u1.901250.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724157884.gp4u1.901250.0 new file mode 100644 index 0000000..00b8c3d Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724157884.gp4u1.901250.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158276.gp4u1.902364.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158276.gp4u1.902364.0 new file mode 100644 index 0000000..f4c7fe2 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158276.gp4u1.902364.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158326.gp4u1.902564.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158326.gp4u1.902564.0 new file mode 100644 index 0000000..e2667b0 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158326.gp4u1.902564.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158349.gp4u1.902674.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158349.gp4u1.902674.0 new file mode 100644 index 0000000..e78116d Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158349.gp4u1.902674.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158412.gp4u1.902887.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158412.gp4u1.902887.0 new file mode 100644 index 0000000..1daa510 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158412.gp4u1.902887.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158599.gp4u1.904447.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158599.gp4u1.904447.0 new file mode 100644 index 0000000..b0bc5a3 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158599.gp4u1.904447.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158615.gp4u1.904544.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158615.gp4u1.904544.0 new file mode 100644 index 0000000..192e495 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158615.gp4u1.904544.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158662.gp4u1.904748.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158662.gp4u1.904748.0 new file mode 100644 index 0000000..1b9a603 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158662.gp4u1.904748.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158733.gp4u1.905336.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158733.gp4u1.905336.0 new file mode 100644 index 0000000..f5abd64 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158733.gp4u1.905336.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158830.gp4u1.906047.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158830.gp4u1.906047.0 new file mode 100644 index 0000000..3fc8a14 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158830.gp4u1.906047.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158865.gp4u1.906252.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158865.gp4u1.906252.0 new file mode 100644 index 0000000..9ad6077 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_-_/events.out.tfevents.1724158865.gp4u1.906252.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_1.0000e-03_/events.out.tfevents.1724157792.gp4u1.900741.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_1.0000e-03_/events.out.tfevents.1724157792.gp4u1.900741.0 new file mode 100644 index 0000000..534ef58 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-03_1.0000e-03_/events.out.tfevents.1724157792.gp4u1.900741.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-05_-_/events.out.tfevents.1724158516.gp4u1.904045.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-05_-_/events.out.tfevents.1724158516.gp4u1.904045.0 new file mode 100644 index 0000000..81a1d83 Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-05_-_/events.out.tfevents.1724158516.gp4u1.904045.0 differ diff --git a/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-05_-_/events.out.tfevents.1724158584.gp4u1.904351.0 b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-05_-_/events.out.tfevents.1724158584.gp4u1.904351.0 new file mode 100644 index 0000000..b9e418a Binary files /dev/null and b/basis_nnmf_convnnmf/log_iter20_lr_-_1.0000e-05_-_/events.out.tfevents.1724158584.gp4u1.904351.0 differ diff --git a/basis_nnmf_convnnmf/loss_function.py b/basis_nnmf_convnnmf/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_nnmf_convnnmf/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_nnmf_convnnmf/make_network.py b/basis_nnmf_convnnmf/make_network.py new file mode 100644 index 0000000..1e62358 --- /dev/null +++ b/basis_nnmf_convnnmf/make_network.py @@ -0,0 +1,171 @@ +import torch +from append_block import append_block +from NNMF2dConvGroupedAutograd import NNMF2dConvGrouped +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # network.append( + # NNMF2dConvGrouped( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1], + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # device=torch_device, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_nnmf_convnnmf/make_optimize.py b/basis_nnmf_convnnmf/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_nnmf_convnnmf/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_nnmf_convnnmf/non_linear_weigth_function.py b/basis_nnmf_convnnmf/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_nnmf_convnnmf/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_nnmf_convnnmf/plot.py b/basis_nnmf_convnnmf/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_nnmf_convnnmf/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_nnmf_convnnmf/run_network.py b/basis_nnmf_convnnmf/run_network.py new file mode 100644 index 0000000..48ab2a5 --- /dev/null +++ b/basis_nnmf_convnnmf/run_network.py @@ -0,0 +1,253 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.001, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 + batch_size_test: int = 50 + number_of_epoch: int = 500 + + print(batch_size_train, batch_size_test) + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_nnmf_groups1/L1NormLayer.py b/basis_nnmf_groups1/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_nnmf_groups1/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_nnmf_groups1/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/basis_nnmf_groups1/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..1803c5d Binary files /dev/null and b/basis_nnmf_groups1/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/basis_nnmf_groups1/NNMF2dGrouped.py b/basis_nnmf_groups1/NNMF2dGrouped.py new file mode 100644 index 0000000..35fe0a6 --- /dev/null +++ b/basis_nnmf_groups1/NNMF2dGrouped.py @@ -0,0 +1,277 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2dGrouped(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + groups: int + + def __init__( + self, + in_channels: int, + out_channels: int, + groups: int = 1, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.groups = groups + assert ( + in_channels % self.groups == 0 + ), f"Can't divide without rest {in_channels} / {self.groups}" + self.in_channels = in_channels // self.groups + assert ( + out_channels % self.groups == 0 + ), f"Can't divide without rest {out_channels} / {self.groups}" + self.out_channels = out_channels // self.groups + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty( + (self.groups, self.out_channels, self.in_channels), **factory_kwargs + ) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d_grouped = FunctionalNNMF2dGrouped.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + s += f", groups={self.groups}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=-1, keepdim=True) + 10e-20 + ) + assert self.groups * self.in_channels == input.shape[1] + + input = input.reshape( + ( + input.shape[0], + self.groups, + self.in_channels, + input.shape[-2], + input.shape[-1], + ) + ) + input = input / (input.sum(dim=2, keepdim=True) + 10e-20) + + h_dyn = self.functional_nnmf2d_grouped( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + h_dyn = h_dyn.reshape( + ( + h_dyn.shape[0], + h_dyn.shape[1] * h_dyn.shape[2], + h_dyn.shape[3], + h_dyn.shape[4], + ) + ) + h_dyn = h_dyn / (h_dyn.sum(dim=1, keepdim=True) + 10e-20) + + return h_dyn + + +@torch.jit.script +def grouped_linear_einsum_h_weights(h, weights): + return torch.einsum("bgoxy,goi->bgixy", h, weights) + + +@torch.jit.script +def grouped_linear_einsum_reconstruction_weights(reconstruction, weights): + return torch.einsum("bgixy,goi->bgoxy", reconstruction, weights) + + +@torch.jit.script +def grouped_linear_einsum_h_input(h, reconstruction): + return torch.einsum("bgoxy,bgixy->goi", h, reconstruction) + + +class FunctionalNNMF2dGrouped(torch.autograd.Function): + + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + ( + input.shape[0], + input.shape[1], + out_channels, + input.shape[-2], + input.shape[-1], + ), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + for _ in range(0, iterations): + + reconstruction = grouped_linear_einsum_h_weights(h, weight) + reconstruction += 1e-20 + + if epsilon is None: + h *= grouped_linear_einsum_reconstruction_weights( + (input / reconstruction), weight + ) + else: + h *= 1 + epsilon * grouped_linear_einsum_reconstruction_weights( + (input / reconstruction), weight + ) + h /= h.sum(2, keepdim=True) + 10e-20 + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + big_r = grouped_linear_einsum_h_weights(h, weight) + + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + grouped_linear_einsum_h_weights(h * grad_output, weight) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -grouped_linear_einsum_h_input( + h, (factor_x_div_r * grad_input) + ) + + grad_weight += grouped_linear_einsum_h_input( + (h * grad_output), + factor_x_div_r, + ) + + else: + if ctx.local_learning_kl: + + grad_weight = -grouped_linear_einsum_h_input( + h, + factor_x_div_r, + ) + + else: + grad_weight = -grouped_linear_einsum_h_input( + h, + (2 * (input - big_r)), + ) + + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/basis_nnmf_groups1/append_block.py b/basis_nnmf_groups1/append_block.py new file mode 100644 index 0000000..b37aa0a --- /dev/null +++ b/basis_nnmf_groups1/append_block.py @@ -0,0 +1,151 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2dGrouped import NNMF2dGrouped +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + groups: int = 1, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2dGrouped( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + groups=groups, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/basis_nnmf_groups1/append_parameter.py b/basis_nnmf_groups1/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_nnmf_groups1/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_nnmf_groups1/convert_log_to_numpy.py b/basis_nnmf_groups1/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/basis_nnmf_groups1/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_nnmf_groups1/data_loader.py b/basis_nnmf_groups1/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_nnmf_groups1/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_nnmf_groups1/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/basis_nnmf_groups1/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..dd2d575 Binary files /dev/null and b/basis_nnmf_groups1/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/basis_nnmf_groups1/get_the_data.py b/basis_nnmf_groups1/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_nnmf_groups1/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_nnmf_groups1/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724156958.gp4u5.943707.0 b/basis_nnmf_groups1/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724156958.gp4u5.943707.0 new file mode 100644 index 0000000..19c6b44 Binary files /dev/null and b/basis_nnmf_groups1/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724156958.gp4u5.943707.0 differ diff --git a/basis_nnmf_groups1/loss_function.py b/basis_nnmf_groups1/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_nnmf_groups1/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_nnmf_groups1/make_network.py b/basis_nnmf_groups1/make_network.py new file mode 100644 index 0000000..16180fa --- /dev/null +++ b/basis_nnmf_groups1/make_network.py @@ -0,0 +1,216 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2dGrouped import NNMF2dGrouped +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, + groups: list[int] = [1, 1, 1, 1], + groups_pool: list[int] = [1, 1, 1, 1], +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + groups=groups[block_id], + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, 2, 2)), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + .squeeze(0) + .squeeze(0) + ) + + network.append( + torch.nn.Unfold( + kernel_size=(2, 2), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2dGrouped( + in_channels=test_image.shape[1], + out_channels=test_image.shape[1] // 4, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=False, + local_learning_kl=False, + groups=groups_pool[block_id], + ).to(torch_device) + ) + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=0.1, + track_running_stats=False, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_nnmf_groups1/make_optimize.py b/basis_nnmf_groups1/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_nnmf_groups1/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_nnmf_groups1/non_linear_weigth_function.py b/basis_nnmf_groups1/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_nnmf_groups1/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_nnmf_groups1/plot.py b/basis_nnmf_groups1/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_nnmf_groups1/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_nnmf_groups1/run_network.py b/basis_nnmf_groups1/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/basis_nnmf_groups1/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/basis_nnmf_groups5/HAD_TO_CHANGE_THE_NUMBER_OF_NEURONS_TO_35_65_95 b/basis_nnmf_groups5/HAD_TO_CHANGE_THE_NUMBER_OF_NEURONS_TO_35_65_95 new file mode 100644 index 0000000..e69de29 diff --git a/basis_nnmf_groups5/L1NormLayer.py b/basis_nnmf_groups5/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/basis_nnmf_groups5/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/basis_nnmf_groups5/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/basis_nnmf_groups5/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..2b37269 Binary files /dev/null and b/basis_nnmf_groups5/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/basis_nnmf_groups5/NNMF2dGrouped.py b/basis_nnmf_groups5/NNMF2dGrouped.py new file mode 100644 index 0000000..35fe0a6 --- /dev/null +++ b/basis_nnmf_groups5/NNMF2dGrouped.py @@ -0,0 +1,277 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2dGrouped(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + groups: int + + def __init__( + self, + in_channels: int, + out_channels: int, + groups: int = 1, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.groups = groups + assert ( + in_channels % self.groups == 0 + ), f"Can't divide without rest {in_channels} / {self.groups}" + self.in_channels = in_channels // self.groups + assert ( + out_channels % self.groups == 0 + ), f"Can't divide without rest {out_channels} / {self.groups}" + self.out_channels = out_channels // self.groups + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty( + (self.groups, self.out_channels, self.in_channels), **factory_kwargs + ) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d_grouped = FunctionalNNMF2dGrouped.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + s += f", groups={self.groups}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=-1, keepdim=True) + 10e-20 + ) + assert self.groups * self.in_channels == input.shape[1] + + input = input.reshape( + ( + input.shape[0], + self.groups, + self.in_channels, + input.shape[-2], + input.shape[-1], + ) + ) + input = input / (input.sum(dim=2, keepdim=True) + 10e-20) + + h_dyn = self.functional_nnmf2d_grouped( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + h_dyn = h_dyn.reshape( + ( + h_dyn.shape[0], + h_dyn.shape[1] * h_dyn.shape[2], + h_dyn.shape[3], + h_dyn.shape[4], + ) + ) + h_dyn = h_dyn / (h_dyn.sum(dim=1, keepdim=True) + 10e-20) + + return h_dyn + + +@torch.jit.script +def grouped_linear_einsum_h_weights(h, weights): + return torch.einsum("bgoxy,goi->bgixy", h, weights) + + +@torch.jit.script +def grouped_linear_einsum_reconstruction_weights(reconstruction, weights): + return torch.einsum("bgixy,goi->bgoxy", reconstruction, weights) + + +@torch.jit.script +def grouped_linear_einsum_h_input(h, reconstruction): + return torch.einsum("bgoxy,bgixy->goi", h, reconstruction) + + +class FunctionalNNMF2dGrouped(torch.autograd.Function): + + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + ( + input.shape[0], + input.shape[1], + out_channels, + input.shape[-2], + input.shape[-1], + ), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + for _ in range(0, iterations): + + reconstruction = grouped_linear_einsum_h_weights(h, weight) + reconstruction += 1e-20 + + if epsilon is None: + h *= grouped_linear_einsum_reconstruction_weights( + (input / reconstruction), weight + ) + else: + h *= 1 + epsilon * grouped_linear_einsum_reconstruction_weights( + (input / reconstruction), weight + ) + h /= h.sum(2, keepdim=True) + 10e-20 + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + big_r = grouped_linear_einsum_h_weights(h, weight) + + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + grouped_linear_einsum_h_weights(h * grad_output, weight) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -grouped_linear_einsum_h_input( + h, (factor_x_div_r * grad_input) + ) + + grad_weight += grouped_linear_einsum_h_input( + (h * grad_output), + factor_x_div_r, + ) + + else: + if ctx.local_learning_kl: + + grad_weight = -grouped_linear_einsum_h_input( + h, + factor_x_div_r, + ) + + else: + grad_weight = -grouped_linear_einsum_h_input( + h, + (2 * (input - big_r)), + ) + + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/basis_nnmf_groups5/append_block.py b/basis_nnmf_groups5/append_block.py new file mode 100644 index 0000000..b37aa0a --- /dev/null +++ b/basis_nnmf_groups5/append_block.py @@ -0,0 +1,151 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2dGrouped import NNMF2dGrouped +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + groups: int = 1, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2dGrouped( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + groups=groups, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/basis_nnmf_groups5/append_parameter.py b/basis_nnmf_groups5/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/basis_nnmf_groups5/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/basis_nnmf_groups5/convert_log_to_numpy.py b/basis_nnmf_groups5/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/basis_nnmf_groups5/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/basis_nnmf_groups5/data_loader.py b/basis_nnmf_groups5/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/basis_nnmf_groups5/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/basis_nnmf_groups5/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/basis_nnmf_groups5/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..64c6c9d Binary files /dev/null and b/basis_nnmf_groups5/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/basis_nnmf_groups5/get_the_data.py b/basis_nnmf_groups5/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/basis_nnmf_groups5/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/basis_nnmf_groups5/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724157361.gp4u4.959581.0 b/basis_nnmf_groups5/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724157361.gp4u4.959581.0 new file mode 100644 index 0000000..de6426a Binary files /dev/null and b/basis_nnmf_groups5/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724157361.gp4u4.959581.0 differ diff --git a/basis_nnmf_groups5/loss_function.py b/basis_nnmf_groups5/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/basis_nnmf_groups5/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/basis_nnmf_groups5/make_network.py b/basis_nnmf_groups5/make_network.py new file mode 100644 index 0000000..963d85a --- /dev/null +++ b/basis_nnmf_groups5/make_network.py @@ -0,0 +1,216 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2dGrouped import NNMF2dGrouped +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [35, 65, 95, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, + groups: list[int] = [5, 5, 5, 1], + groups_pool: list[int] = [5, 5, 1, 1], +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + groups=groups[block_id], + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, 2, 2)), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + .squeeze(0) + .squeeze(0) + ) + + network.append( + torch.nn.Unfold( + kernel_size=(2, 2), + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2dGrouped( + in_channels=test_image.shape[1], + out_channels=test_image.shape[1] // 4, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=False, + local_learning_kl=False, + groups=groups_pool[block_id], + ).to(torch_device) + ) + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=0.1, + track_running_stats=False, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/basis_nnmf_groups5/make_optimize.py b/basis_nnmf_groups5/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/basis_nnmf_groups5/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/basis_nnmf_groups5/non_linear_weigth_function.py b/basis_nnmf_groups5/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/basis_nnmf_groups5/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/basis_nnmf_groups5/plot.py b/basis_nnmf_groups5/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/basis_nnmf_groups5/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/basis_nnmf_groups5/run_network.py b/basis_nnmf_groups5/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/basis_nnmf_groups5/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/max_pooling_mlp/L1NormLayer.py b/max_pooling_mlp/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/max_pooling_mlp/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/max_pooling_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/max_pooling_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..c826147 Binary files /dev/null and b/max_pooling_mlp/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/max_pooling_mlp/NNMF2d.py b/max_pooling_mlp/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/max_pooling_mlp/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/max_pooling_mlp/append_block.py b/max_pooling_mlp/append_block.py new file mode 100644 index 0000000..b6796c4 --- /dev/null +++ b/max_pooling_mlp/append_block.py @@ -0,0 +1,151 @@ +import torch +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, + last_layer: bool= False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + bias=False, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + if last_layer is False: + + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/max_pooling_mlp/append_parameter.py b/max_pooling_mlp/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/max_pooling_mlp/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/max_pooling_mlp/convert_log_to_numpy.py b/max_pooling_mlp/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/max_pooling_mlp/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/max_pooling_mlp/data_loader.py b/max_pooling_mlp/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/max_pooling_mlp/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/max_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/max_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..d5981ee Binary files /dev/null and b/max_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/max_pooling_mlp/get_the_data.py b/max_pooling_mlp/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/max_pooling_mlp/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/max_pooling_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147264.gp4u3.1004033.0 b/max_pooling_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147264.gp4u3.1004033.0 new file mode 100644 index 0000000..33cbeaa Binary files /dev/null and b/max_pooling_mlp/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147264.gp4u3.1004033.0 differ diff --git a/max_pooling_mlp/loss_function.py b/max_pooling_mlp/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/max_pooling_mlp/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/max_pooling_mlp/make_network.py b/max_pooling_mlp/make_network.py new file mode 100644 index 0000000..d23f4a4 --- /dev/null +++ b/max_pooling_mlp/make_network.py @@ -0,0 +1,212 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32 * 1, 64 * 1, 96 * 1, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + last_layer=block_id == len(number_of_output_channels) - 1, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + + network.append(torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Conv2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # kernel_size=(1, 1), + # bias=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/max_pooling_mlp/make_optimize.py b/max_pooling_mlp/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/max_pooling_mlp/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/max_pooling_mlp/non_linear_weigth_function.py b/max_pooling_mlp/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/max_pooling_mlp/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/max_pooling_mlp/plot.py b/max_pooling_mlp/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/max_pooling_mlp/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/max_pooling_mlp/run_network.py b/max_pooling_mlp/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/max_pooling_mlp/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/max_pooling_nnmf/L1NormLayer.py b/max_pooling_nnmf/L1NormLayer.py new file mode 100644 index 0000000..6816b3a --- /dev/null +++ b/max_pooling_nnmf/L1NormLayer.py @@ -0,0 +1,13 @@ +import torch + + +class L1NormLayer(torch.nn.Module): + + epsilon: float + + def __init__(self, epsilon: float = 10e-20) -> None: + super().__init__() + self.epsilon = epsilon + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input / (input.sum(dim=1, keepdim=True) + self.epsilon) diff --git a/max_pooling_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt b/max_pooling_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt new file mode 100644 index 0000000..fccceef Binary files /dev/null and b/max_pooling_nnmf/Model_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.pt differ diff --git a/max_pooling_nnmf/NNMF2d.py b/max_pooling_nnmf/NNMF2d.py new file mode 100644 index 0000000..b84d083 --- /dev/null +++ b/max_pooling_nnmf/NNMF2d.py @@ -0,0 +1,252 @@ +import torch +from non_linear_weigth_function import non_linear_weigth_function + + +class NNMF2d(torch.nn.Module): + + in_channels: int + out_channels: int + weight: torch.Tensor + iterations: int + epsilon: float | None + init_min: float + init_max: float + beta: torch.Tensor | None + positive_function_type: int + local_learning: bool + local_learning_kl: bool + + def __init__( + self, + in_channels: int, + out_channels: int, + device=None, + dtype=None, + iterations: int = 20, + epsilon: float | None = None, + init_min: float = 0.0, + init_max: float = 1.0, + beta: float | None = None, + positive_function_type: int = 0, + local_learning: bool = False, + local_learning_kl: bool = False, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + + super().__init__() + + self.positive_function_type = positive_function_type + self.init_min = init_min + self.init_max = init_max + + self.in_channels = in_channels + self.out_channels = out_channels + + self.iterations = iterations + self.local_learning = local_learning + self.local_learning_kl = local_learning_kl + + self.weight = torch.nn.parameter.Parameter( + torch.empty((out_channels, in_channels), **factory_kwargs) + ) + + if beta is not None: + self.beta = torch.nn.parameter.Parameter(torch.empty((1), **factory_kwargs)) + self.beta.data[0] = beta + else: + self.beta = None + + self.reset_parameters() + self.functional_nnmf2d = FunctionalNNMF2d.apply + + self.epsilon = epsilon + + def extra_repr(self) -> str: + s: str = f"{self.in_channels}, {self.out_channels}" + + if self.epsilon is not None: + s += f", epsilon={self.epsilon}" + s += f", pfunctype={self.positive_function_type}" + s += f", local_learning={self.local_learning}" + + if self.local_learning: + s += f", local_learning_kl={self.local_learning_kl}" + + return s + + def reset_parameters(self) -> None: + torch.nn.init.uniform_(self.weight, a=self.init_min, b=self.init_max) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + positive_weights = non_linear_weigth_function( + self.weight, self.beta, self.positive_function_type + ) + positive_weights = positive_weights / ( + positive_weights.sum(dim=1, keepdim=True) + 10e-20 + ) + + h_dyn = self.functional_nnmf2d( + input, + positive_weights, + self.out_channels, + self.iterations, + self.epsilon, + self.local_learning, + self.local_learning_kl, + ) + + return h_dyn + + +class FunctionalNNMF2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weight: torch.Tensor, + out_channels: int, + iterations: int, + epsilon: float | None, + local_learning: bool, + local_learning_kl: bool, + ) -> torch.Tensor: + + # Prepare h + h = torch.full( + (input.shape[0], out_channels, input.shape[-2], input.shape[-1]), + 1.0 / float(out_channels), + device=input.device, + dtype=input.dtype, + ) + + h = h.movedim(1, -1) + input = input.movedim(1, -1) + for _ in range(0, iterations): + reconstruction = torch.nn.functional.linear(h, weight.T) + reconstruction += 1e-20 + if epsilon is None: + h *= torch.nn.functional.linear((input / reconstruction), weight) + else: + h *= 1 + epsilon * torch.nn.functional.linear( + (input / reconstruction), weight + ) + h /= h.sum(-1, keepdim=True) + 10e-20 + h = h.movedim(-1, 1) + input = input.movedim(-1, 1) + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + ctx.save_for_backward(input, weight, h) + ctx.local_learning = local_learning + ctx.local_learning_kl = local_learning_kl + + assert torch.isfinite(h).all() + return h + + @staticmethod + @torch.autograd.function.once_differentiable + def backward(ctx, grad_output: torch.Tensor) -> tuple[ # type: ignore + torch.Tensor, + torch.Tensor | None, + None, + None, + None, + None, + None, + ]: + + # ############################################## + # Default values + # ############################################## + grad_weight: torch.Tensor | None = None + + # ############################################## + # Get the variables back + # ############################################## + (input, weight, h) = ctx.saved_tensors + + # The back prop gradient + h = h.movedim(1, -1) + grad_output = grad_output.movedim(1, -1) + input = input.movedim(1, -1) + big_r = torch.nn.functional.linear(h, weight.T) + big_r_div = 1.0 / (big_r + 1e-20) + + factor_x_div_r = input * big_r_div + + grad_input: torch.Tensor = ( + torch.nn.functional.linear(h * grad_output, weight.T) * big_r_div + ) + + del big_r_div + + # The weight gradient + if ctx.local_learning is False: + del big_r + + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (factor_x_div_r * grad_input) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + + grad_weight += torch.nn.functional.linear( + (h * grad_output) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ) + .T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + + else: + if ctx.local_learning_kl: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + factor_x_div_r.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ).T, + ) + else: + grad_weight = -torch.nn.functional.linear( + h.reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + h.shape[3], + ).T, + (2 * (input - big_r)) + .reshape( + grad_input.shape[0] * grad_input.shape[1] * grad_input.shape[2], + grad_input.shape[3], + ) + .T, + ) + grad_input = grad_input.movedim(-1, 1) + assert torch.isfinite(grad_input).all() + assert torch.isfinite(grad_weight).all() + + return ( + grad_input, + grad_weight, + None, + None, + None, + None, + None, + ) diff --git a/max_pooling_nnmf/append_block.py b/max_pooling_nnmf/append_block.py new file mode 100644 index 0000000..a8de5d2 --- /dev/null +++ b/max_pooling_nnmf/append_block.py @@ -0,0 +1,149 @@ +import torch +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def append_block( + network: torch.nn.Sequential, + out_channels: int, + test_image: torch.Tensor, + parameter_cnn_top: list[torch.nn.parameter.Parameter], + parameter_nnmf: list[torch.nn.parameter.Parameter], + parameter_norm: list[torch.nn.parameter.Parameter], + torch_device: torch.device, + dilation: tuple[int, int] | int = 1, + padding: tuple[int, int] | int = 0, + stride: tuple[int, int] | int = 1, + kernel_size: tuple[int, int] = (5, 5), + epsilon: float | None = None, + positive_function_type: int = 0, + beta: float | None = None, + iterations: int = 20, + local_learning: bool = False, + local_learning_kl: bool = False, + momentum: float = 0.1, + track_running_stats: bool = False, +) -> torch.Tensor: + + kernel_size_internal: list[int] = [kernel_size[-2], kernel_size[-1]] + + if kernel_size[0] < 1: + kernel_size_internal[0] = test_image.shape[-2] + + if kernel_size[1] < 1: + kernel_size_internal[1] = test_image.shape[-1] + + # Main + network.append(torch.nn.ReLU()) + test_image = network[-1](test_image) + + # I need the output size + mock_output = ( + torch.nn.functional.conv2d( + torch.zeros( + 1, + 1, + test_image.shape[2], + test_image.shape[3], + ), + torch.zeros((1, 1, kernel_size_internal[0], kernel_size_internal[1])), + stride=stride, + padding=padding, + dilation=dilation, + ) + .squeeze(0) + .squeeze(0) + ) + network.append( + torch.nn.Unfold( + kernel_size=(kernel_size_internal[-2], kernel_size_internal[-1]), + dilation=dilation, + padding=padding, + stride=stride, + ) + ) + test_image = network[-1](test_image) + + network.append( + torch.nn.Fold( + output_size=mock_output.shape, + kernel_size=(1, 1), + dilation=1, + padding=0, + stride=1, + ) + ) + test_image = network[-1](test_image) + + network.append(L1NormLayer()) + test_image = network[-1](test_image) + + network.append( + NNMF2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + local_learning=local_learning, + local_learning_kl=local_learning_kl, + ).to(torch_device) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + momentum=momentum, + track_running_stats=track_running_stats, + device=torch_device, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append( + torch.nn.Conv2d( + in_channels=test_image.shape[1], + out_channels=out_channels, + kernel_size=(1, 1), + stride=(1, 1), + padding=(0, 0), + bias=True, + device=torch_device, + ) + ) + # Init the cnn top layers 1x1 conv2d layers + for name, param in network[-1].named_parameters(): + with torch.no_grad(): + if name == "bias": + param.data *= 0 + if name == "weight": + assert param.shape[-2] == 1 + assert param.shape[-1] == 1 + param[: param.shape[0], : param.shape[0], 0, 0] = torch.eye( + param.shape[0], dtype=param.dtype, device=param.device + ) + param[param.shape[0] :, :, 0, 0] = 0 + param[:, param.shape[0] :, 0, 0] = 0 + + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_cnn_top) + + if (test_image.shape[-1] > 1) or (test_image.shape[-2] > 1): + network.append( + torch.nn.BatchNorm2d( + num_features=test_image.shape[1], + device=torch_device, + momentum=momentum, + track_running_stats=track_running_stats, + ) + ) + test_image = network[-1](test_image) + append_parameter(module=network[-1], parameter_list=parameter_norm) + + return test_image diff --git a/max_pooling_nnmf/append_parameter.py b/max_pooling_nnmf/append_parameter.py new file mode 100644 index 0000000..b972e39 --- /dev/null +++ b/max_pooling_nnmf/append_parameter.py @@ -0,0 +1,8 @@ +import torch + + +def append_parameter( + module: torch.nn.Module, parameter_list: list[torch.nn.parameter.Parameter] +): + for netp in module.parameters(): + parameter_list.append(netp) diff --git a/max_pooling_nnmf/convert_log_to_numpy.py b/max_pooling_nnmf/convert_log_to_numpy.py new file mode 100644 index 0000000..05a5427 --- /dev/null +++ b/max_pooling_nnmf/convert_log_to_numpy.py @@ -0,0 +1,31 @@ +import os +import glob + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +from tensorboard.backend.event_processing import event_accumulator # type: ignore +import numpy as np + + +def get_data(path: str = "log_cnn"): + acc = event_accumulator.EventAccumulator(path) + acc.Reload() + + which_scalar = "Test Number Correct" + te = acc.Scalars(which_scalar) + + np_temp = np.zeros((len(te), 2)) + + for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value + + print(np_temp[:, 1] / 100) + np_temp = np.nan_to_num(np_temp) + return np_temp + + +for path in glob.glob("log_*"): + print(path) + data = get_data(path) + np.save("data_" + path + ".npy", data) diff --git a/max_pooling_nnmf/data_loader.py b/max_pooling_nnmf/data_loader.py new file mode 100644 index 0000000..0a0d430 --- /dev/null +++ b/max_pooling_nnmf/data_loader.py @@ -0,0 +1,31 @@ +import torch + + +def data_loader( + pattern: torch.Tensor, + labels: torch.Tensor, + worker_init_fn, + generator, + batch_size: int = 128, + shuffle: bool = True, + torch_device: torch.device = torch.device("cpu"), +) -> torch.utils.data.dataloader.DataLoader: + + assert pattern.ndim >= 3 + + pattern_storage: torch.Tensor = pattern.to(torch_device).type(torch.float32) + if pattern_storage.ndim == 3: + pattern_storage = pattern_storage.unsqueeze(1) + pattern_storage /= pattern_storage.max() + + label_storage: torch.Tensor = labels.to(torch_device).type(torch.int64) + + dataloader = torch.utils.data.DataLoader( + torch.utils.data.TensorDataset(pattern_storage, label_storage), + batch_size=batch_size, + shuffle=shuffle, + worker_init_fn=worker_init_fn, + generator=generator, + ) + + return dataloader diff --git a/max_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy b/max_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy new file mode 100644 index 0000000..67145ac Binary files /dev/null and b/max_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy differ diff --git a/max_pooling_nnmf/get_the_data.py b/max_pooling_nnmf/get_the_data.py new file mode 100644 index 0000000..fc61064 --- /dev/null +++ b/max_pooling_nnmf/get_the_data.py @@ -0,0 +1,147 @@ +import torch +import torchvision # type: ignore +from data_loader import data_loader + +from torchvision.transforms import v2 # type: ignore +import numpy as np + + +def get_the_data( + dataset: str, + batch_size_train: int, + batch_size_test: int, + torch_device: torch.device, + input_dim_x: int, + input_dim_y: int, + flip_p: float = 0.5, + jitter_brightness: float = 0.5, + jitter_contrast: float = 0.1, + jitter_saturation: float = 0.1, + jitter_hue: float = 0.15, + da_auto_mode: bool = False, +) -> tuple[ + torch.utils.data.dataloader.DataLoader, + torch.utils.data.dataloader.DataLoader, + torchvision.transforms.Compose, + torchvision.transforms.Compose, +]: + if dataset == "MNIST": + tv_dataset_train = torchvision.datasets.MNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.MNIST( + root="data", train=False, download=True + ) + elif dataset == "FashionMNIST": + tv_dataset_train = torchvision.datasets.FashionMNIST( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.FashionMNIST( + root="data", train=False, download=True + ) + elif dataset == "CIFAR10": + tv_dataset_train = torchvision.datasets.CIFAR10( + root="data", train=True, download=True + ) + tv_dataset_test = torchvision.datasets.CIFAR10( + root="data", train=False, download=True + ) + else: + raise NotImplementedError("This dataset is not implemented.") + + def seed_worker(worker_id): + worker_seed = torch.initial_seed() % 2**32 + np.random.seed(worker_seed) + torch.random.seed(worker_seed) + + g = torch.Generator() + g.manual_seed(0) + + if dataset == "MNIST" or dataset == "FashionMNIST": + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=tv_dataset_train.data, + labels=tv_dataset_train.targets, + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=tv_dataset_test.data, + labels=tv_dataset_test.targets, + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((input_dim_x, input_dim_y))], + ) + else: + + train_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_train, + pattern=torch.tensor(tv_dataset_train.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_train.targets), + shuffle=True, + worker_init_fn=seed_worker, + generator=g, + ) + + test_dataloader = data_loader( + torch_device=torch_device, + batch_size=batch_size_test, + pattern=torch.tensor(tv_dataset_test.data).movedim(-1, 1), + labels=torch.tensor(tv_dataset_test.targets), + shuffle=False, + worker_init_fn=seed_worker, + generator=g, + ) + + # Data augmentation filter + test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((input_dim_x, input_dim_y))], + ) + + if da_auto_mode: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + v2.AutoAugment( + policy=torchvision.transforms.AutoAugmentPolicy( + v2.AutoAugmentPolicy.CIFAR10 + ) + ), + torchvision.transforms.CenterCrop((input_dim_x, input_dim_y)), + ], + ) + else: + train_processing_chain = torchvision.transforms.Compose( + transforms=[ + torchvision.transforms.RandomCrop((input_dim_x, input_dim_y)), + torchvision.transforms.RandomHorizontalFlip(p=flip_p), + torchvision.transforms.ColorJitter( + brightness=jitter_brightness, + contrast=jitter_contrast, + saturation=jitter_saturation, + hue=jitter_hue, + ), + ], + ) + + return ( + train_dataloader, + test_dataloader, + train_processing_chain, + test_processing_chain, + ) diff --git a/max_pooling_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147482.gp4u4.933902.0 b/max_pooling_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147482.gp4u4.933902.0 new file mode 100644 index 0000000..3d5b045 Binary files /dev/null and b/max_pooling_nnmf/log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_/events.out.tfevents.1724147482.gp4u4.933902.0 differ diff --git a/max_pooling_nnmf/loss_function.py b/max_pooling_nnmf/loss_function.py new file mode 100644 index 0000000..e256840 --- /dev/null +++ b/max_pooling_nnmf/loss_function.py @@ -0,0 +1,64 @@ +import torch + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=h.device, + dtype=h.dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(h.device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=h.device, + dtype=h.dtype, + ), + ) + + my_loss: torch.Tensor = ((h - target_one_hot) ** 2).sum(dim=0).mean( + dim=0 + ) * loss_coeffs_mse + + my_loss = ( + my_loss + + ( + (target_one_hot * torch.log((target_one_hot + 1e-20) / (h + 1e-20))) + .sum(dim=0) + .mean(dim=0) + ) + * loss_coeffs_kldiv + ) + + my_loss = my_loss / (abs(loss_coeffs_kldiv) + abs(loss_coeffs_mse)) + + return my_loss + + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy(h, labels.to(h.device)) + return my_loss + else: + return None diff --git a/max_pooling_nnmf/make_network.py b/max_pooling_nnmf/make_network.py new file mode 100644 index 0000000..1fc1bc5 --- /dev/null +++ b/max_pooling_nnmf/make_network.py @@ -0,0 +1,215 @@ +import torch +from append_block import append_block +from L1NormLayer import L1NormLayer +from NNMF2d import NNMF2d +from append_parameter import append_parameter + + +def make_network( + input_dim_x: int, + input_dim_y: int, + input_number_of_channel: int, + iterations: int, + torch_device: torch.device, + epsilon: bool | None = None, + positive_function_type: int = 0, + beta: float | None = None, + # Conv: + number_of_output_channels: list[int] = [32, 64, 96, 10], + kernel_size_conv: list[tuple[int, int]] = [ + (5, 5), + (5, 5), + (-1, -1), # Take the whole input image x and y size + (1, 1), + ], + stride_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + padding_conv: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_conv: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + # Pool: + kernel_size_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), # No pooling layer + (-1, -1), # No pooling layer + ], + stride_pool: list[tuple[int, int]] = [ + (2, 2), + (2, 2), + (-1, -1), + (-1, -1), + ], + padding_pool: list[tuple[int, int]] = [ + (0, 0), + (0, 0), + (0, 0), + (0, 0), + ], + dilation_pool: list[tuple[int, int]] = [ + (1, 1), + (1, 1), + (1, 1), + (1, 1), + ], + enable_onoff: bool = False, +) -> tuple[ + torch.nn.Sequential, + list[list[torch.nn.parameter.Parameter]], + list[str], +]: + + assert len(number_of_output_channels) == len(kernel_size_conv) + assert len(number_of_output_channels) == len(stride_conv) + assert len(number_of_output_channels) == len(padding_conv) + assert len(number_of_output_channels) == len(dilation_conv) + assert len(number_of_output_channels) == len(kernel_size_pool) + assert len(number_of_output_channels) == len(stride_pool) + assert len(number_of_output_channels) == len(padding_pool) + assert len(number_of_output_channels) == len(dilation_pool) + + if enable_onoff: + input_number_of_channel *= 2 + + parameter_cnn_top: list[torch.nn.parameter.Parameter] = [] + parameter_nnmf: list[torch.nn.parameter.Parameter] = [] + parameter_norm: list[torch.nn.parameter.Parameter] = [] + + test_image = torch.ones( + (1, input_number_of_channel, input_dim_x, input_dim_y), device=torch_device + ) + + network = torch.nn.Sequential() + network = network.to(torch_device) + + for block_id in range(0, len(number_of_output_channels)): + + test_image = append_block( + network=network, + out_channels=number_of_output_channels[block_id], + test_image=test_image, + dilation=dilation_conv[block_id], + padding=padding_conv[block_id], + stride=stride_conv[block_id], + kernel_size=kernel_size_conv[block_id], + epsilon=epsilon, + positive_function_type=positive_function_type, + beta=beta, + iterations=iterations, + torch_device=torch_device, + parameter_cnn_top=parameter_cnn_top, + parameter_nnmf=parameter_nnmf, + parameter_norm=parameter_norm, + ) + + if (kernel_size_pool[block_id][0] > 0) and (kernel_size_pool[block_id][1] > 0): + network.append(torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))) + test_image = network[-1](test_image) + + # network.append(torch.nn.ReLU()) + # test_image = network[-1](test_image) + + # mock_output = ( + # torch.nn.functional.conv2d( + # torch.zeros( + # 1, + # 1, + # test_image.shape[2], + # test_image.shape[3], + # ), + # torch.zeros((1, 1, 2, 2)), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # .squeeze(0) + # .squeeze(0) + # ) + + # network.append( + # torch.nn.Unfold( + # kernel_size=(2, 2), + # stride=(2, 2), + # padding=(0, 0), + # dilation=(1, 1), + # ) + # ) + # test_image = network[-1](test_image) + + # network.append( + # torch.nn.Fold( + # output_size=mock_output.shape, + # kernel_size=(1, 1), + # dilation=1, + # padding=0, + # stride=1, + # ) + # ) + # test_image = network[-1](test_image) + + # network.append(L1NormLayer()) + # test_image = network[-1](test_image) + + # network.append( + # NNMF2d( + # in_channels=test_image.shape[1], + # out_channels=test_image.shape[1] // 4, + # epsilon=epsilon, + # positive_function_type=positive_function_type, + # beta=beta, + # iterations=iterations, + # local_learning=False, + # local_learning_kl=False, + # ).to(torch_device) + # ) + + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_nnmf) + + # network.append( + # torch.nn.BatchNorm2d( + # num_features=test_image.shape[1], + # device=torch_device, + # momentum=0.1, + # track_running_stats=False, + # ) + # ) + # test_image = network[-1](test_image) + # append_parameter(module=network[-1], parameter_list=parameter_norm) + + network.append(torch.nn.Softmax(dim=1)) + test_image = network[-1](test_image) + + network.append(torch.nn.Flatten()) + test_image = network[-1](test_image) + + parameters: list[list[torch.nn.parameter.Parameter]] = [ + parameter_cnn_top, + parameter_nnmf, + parameter_norm, + ] + + name_list: list[str] = [ + "cnn_top", + "nnmf", + "batchnorm2d", + ] + + return ( + network, + parameters, + name_list, + ) diff --git a/max_pooling_nnmf/make_optimize.py b/max_pooling_nnmf/make_optimize.py new file mode 100644 index 0000000..ab1a4e0 --- /dev/null +++ b/max_pooling_nnmf/make_optimize.py @@ -0,0 +1,32 @@ +import torch + + +def make_optimize( + parameters: list[list[torch.nn.parameter.Parameter]], + lr_initial: list[float], + eps=1e-10, +) -> tuple[ + list[torch.optim.Adam | None], + list[torch.optim.lr_scheduler.ReduceLROnPlateau | None], +]: + list_optimizer: list[torch.optim.Adam | None] = [] + list_lr_scheduler: list[torch.optim.lr_scheduler.ReduceLROnPlateau | None] = [] + + assert len(parameters) == len(lr_initial) + + for i in range(0, len(parameters)): + if len(parameters[i]) > 0: + list_optimizer.append(torch.optim.Adam(parameters[i], lr=lr_initial[i])) + else: + list_optimizer.append(None) + + for i in range(0, len(list_optimizer)): + if list_optimizer[i] is not None: + pass + list_lr_scheduler.append( + torch.optim.lr_scheduler.ReduceLROnPlateau(list_optimizer[i], eps=eps) # type: ignore + ) + else: + list_lr_scheduler.append(None) + + return (list_optimizer, list_lr_scheduler) diff --git a/max_pooling_nnmf/non_linear_weigth_function.py b/max_pooling_nnmf/non_linear_weigth_function.py new file mode 100644 index 0000000..053a9b6 --- /dev/null +++ b/max_pooling_nnmf/non_linear_weigth_function.py @@ -0,0 +1,26 @@ +import torch + + +def non_linear_weigth_function( + weight: torch.Tensor, beta: torch.Tensor | None, positive_function_type: int +) -> torch.Tensor: + + if positive_function_type == 0: + positive_weights = torch.abs(weight) + + elif positive_function_type == 1: + assert beta is not None + positive_weights = weight + max_value = torch.abs(positive_weights).max() + if max_value > 80: + positive_weights = 80.0 * positive_weights / max_value + positive_weights = torch.exp((torch.tanh(beta) + 1.0) * 0.5 * positive_weights) + + elif positive_function_type == 2: + assert beta is not None + positive_weights = (torch.tanh(beta * weight) + 1.0) * 0.5 + + else: + positive_weights = weight + + return positive_weights diff --git a/max_pooling_nnmf/plot.py b/max_pooling_nnmf/plot.py new file mode 100644 index 0000000..ad22d33 --- /dev/null +++ b/max_pooling_nnmf/plot.py @@ -0,0 +1,15 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("data_log.npy") +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + "k", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show() diff --git a/max_pooling_nnmf/run_network.py b/max_pooling_nnmf/run_network.py new file mode 100644 index 0000000..8d0a43f --- /dev/null +++ b/max_pooling_nnmf/run_network.py @@ -0,0 +1,251 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import argh + +import time +import numpy as np +import torch + +rand_seed: int = 21 +torch.manual_seed(rand_seed) +torch.cuda.manual_seed(rand_seed) +np.random.seed(rand_seed) + +from torch.utils.tensorboard import SummaryWriter + +from make_network import make_network +from get_the_data import get_the_data +from loss_function import loss_function +from make_optimize import make_optimize + + +def main( + lr_initial_nnmf: float = 0.01, + lr_initial_cnn_top: float = 0.001, + lr_initial_norm: float = 0.001, + iterations: int = 20, + dataset: str = "CIFAR10", # "CIFAR10", "FashionMNIST", "MNIST" + only_print_network: bool = False, +) -> None: + + da_auto_mode: bool = False # Automatic Data Augmentation from TorchVision + lr_limit: float = 1e-9 + + torch_device: torch.device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + torch.set_default_dtype(torch.float32) + + # Some parameters + batch_size_train: int = 50 # 0 + batch_size_test: int = 50 # 0 + number_of_epoch: int = 500 + + loss_mode: int = 0 + loss_coeffs_mse: float = 0.5 + loss_coeffs_kldiv: float = 1.0 + print( + "loss_mode: ", + loss_mode, + "loss_coeffs_mse: ", + loss_coeffs_mse, + "loss_coeffs_kldiv: ", + loss_coeffs_kldiv, + ) + + if dataset == "MNIST" or dataset == "FashionMNIST": + input_number_of_channel: int = 1 + input_dim_x: int = 24 + input_dim_y: int = 24 + else: + input_number_of_channel = 3 + input_dim_x = 28 + input_dim_y = 28 + + train_dataloader, test_dataloader, train_processing_chain, test_processing_chain = ( + get_the_data( + dataset, + batch_size_train, + batch_size_test, + torch_device, + input_dim_x, + input_dim_y, + flip_p=0.5, + jitter_brightness=0.5, + jitter_contrast=0.1, + jitter_saturation=0.1, + jitter_hue=0.15, + da_auto_mode=da_auto_mode, + ) + ) + + ( + network, + parameters, + name_list, + ) = make_network( + input_dim_x=input_dim_x, + input_dim_y=input_dim_y, + input_number_of_channel=input_number_of_channel, + iterations=iterations, + torch_device=torch_device, + ) + + print(network) + + print() + print("Information about used parameters:") + number_of_parameter: int = 0 + for i, parameter_list in enumerate(parameters): + count_parameter: int = 0 + for parameter_element in parameter_list: + count_parameter += parameter_element.numel() + print(f"{name_list[i]}: {count_parameter}") + number_of_parameter += count_parameter + print(f"total number of parameter: {number_of_parameter}") + + if only_print_network: + exit() + + ( + optimizers, + lr_schedulers, + ) = make_optimize( + parameters=parameters, + lr_initial=[ + lr_initial_cnn_top, + lr_initial_nnmf, + lr_initial_norm, + ], + ) + + my_string: str = "_lr_" + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f"{lr_schedulers[i].get_last_lr()[0]:.4e}_" # type: ignore + else: + my_string += "-_" + + default_path: str = f"iter{iterations}{my_string}" + log_dir: str = f"log_{default_path}" + + tb = SummaryWriter(log_dir=log_dir) + + for epoch_id in range(0, number_of_epoch): + print() + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_dataloader: + + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].zero_grad() # type: ignore + + output = network(train_processing_chain(image)) + + loss = loss_function( + h=output, + labels=target, + number_of_output_neurons=output.shape[1], + loss_mode=loss_mode, + loss_coeffs_mse=loss_coeffs_mse, + loss_coeffs_kldiv=loss_coeffs_kldiv, + ) + + assert loss is not None + train_loss += loss.item() + train_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + train_number += target.shape[0] + + # Calculate backprop + loss.backward() + + # Update the parameter + # Clean the gradient + for i in range(0, len(optimizers)): + if optimizers[i] is not None: + optimizers[i].step() # type: ignore + + perfomance_train_correct: float = 100.0 * train_correct / train_number + # Update the learning rate + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_schedulers[i].step(train_loss) # type: ignore + + my_string = "Actual lr: " + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + my_string += f" {lr_schedulers[i].get_last_lr()[0]:.4e} " # type: ignore + else: + my_string += " --- " + + print(my_string) + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + + with torch.no_grad(): + + for image, target in test_dataloader: + output = network(test_processing_chain(image)) + + test_correct += (output.argmax(dim=1) == target).sum().cpu().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + + tb.add_scalar("Train Loss", train_loss / float(train_number), epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print( + f"Training: Loss={train_loss / float(train_number):.5f} Correct={perfomance_train_correct:.2f}%" + ) + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" + ) + + tb.flush() + + lr_check: list[float] = [] + for i in range(0, len(lr_schedulers)): + if lr_schedulers[i] is not None: + lr_check.append(lr_schedulers[i].get_last_lr()[0]) # type: ignore + + lr_check_max = float(torch.tensor(lr_check).max()) + + if lr_check_max < lr_limit: + torch.save(network, f"Model_{default_path}.pt") + tb.close() + print("Done (lr_limit)") + return + + torch.save(network, f"Model_{default_path}.pt") + print() + + tb.close() + print("Done (loop end)") + + return + + +if __name__ == "__main__": + argh.dispatch_command(main) diff --git a/plot.py b/plot.py new file mode 100644 index 0000000..1425974 --- /dev/null +++ b/plot.py @@ -0,0 +1,92 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.load("./basis_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy") +plt.loglog(data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), "k", label="basis nnmf") + +data = np.load("./basis_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy") +plt.loglog(data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), "k--", label="basis mlp") + +data = np.load( + "./basis_nnmf_autograd/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), "k:", label="basis nnmf autograd" +) + +data = np.load("./basis_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy") + +plt.loglog( + data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), "k-.", label="basis conv2d" +) + + +# ---- + +data = np.load( + "./max_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog(data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), label="nnmf max pooling") + +data = np.load( + "./avg_pooling_nnmf/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), label="nnmf average pooling" +) + +data = np.load( + "./avg_pooling_nnmf_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + label="nnmf average noinbetween1x1", +) +# ---- +data = np.load( + "./avg_pooling_conv2d/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + label="conv2d average pooling (breaks during learning)", +) + +data = np.load( + "./avg_pooling_conv2d_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + label="conv2d average noinbetween1x1", +) + +# ---- + +data = np.load( + "./max_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog(data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), label="mlp max pooling") + +data = np.load( + "./avg_pooling_mlp/data_log_iter20_lr_1.0000e-03_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], 100.0 * (1.0 - data[:, 1] / 10000.0), label="mlp average pooling" +) + +data = np.load( + "./avg_pooling_mlp_noinbetween1x1/data_log_iter20_lr_-_1.0000e-02_1.0000e-03_.npy" +) +plt.loglog( + data[:, 0], + 100.0 * (1.0 - data[:, 1] / 10000.0), + label="mlp average noinbetween1x1", +) + +plt.legend() +plt.xlabel("Epoch") +plt.ylabel("Error [%]") +plt.title("CIFAR10") +plt.show()