From ac79146816d2d25bd442171518cdc6511ba909de Mon Sep 17 00:00:00 2001 From: David Rotermund <54365609+davrot@users.noreply.github.com> Date: Tue, 2 Jan 2024 22:13:23 +0100 Subject: [PATCH] Update README.md Signed-off-by: David Rotermund <54365609+davrot@users.noreply.github.com> --- pytorch/train/README.md | 258 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 248 insertions(+), 10 deletions(-) diff --git a/pytorch/train/README.md b/pytorch/train/README.md index 7a3a9e2..bf9fd9d 100644 --- a/pytorch/train/README.md +++ b/pytorch/train/README.md @@ -16,6 +16,8 @@ Questions to [David Rotermund](mailto:davrot@uni-bremen.de) ### Network +**Note: Okay, I forgot the full layer somehow but it is still at 99%.** + ```python import torch @@ -250,28 +252,32 @@ import matplotlib.pyplot as plt from tensorboard.backend.event_processing import event_accumulator import numpy as np -path: str = "./runs/Jan26_18-03-23_doppio/" # this way tensorboard directory +path: str = "run" acc = event_accumulator.EventAccumulator(path) acc.Reload() available_scalar = acc.Tags()["scalars"] available_histograms = acc.Tags()["histograms"] -print("Available Scalars") +print("Available Scalars:") print(available_scalar) +print() -print("Available Histograms") +print("Available Histograms:") print(available_histograms) -which_scalar = "Train Performance" +which_scalar = "Test Number Correct" te = acc.Scalars(which_scalar) -# %% -temp = [] -for te_item in te: - temp.append((te_item[1], te_item[2])) -temp = np.array(temp) -plt.plot(temp[:, 0], temp[:, 1]) +np_temp = np.zeros((len(te), 2)) + +for id in range(0, len(te)): + np_temp[id, 0] = te[id].step + np_temp[id, 1] = te[id].value +print(np_temp) + + +plt.plot(np_temp[:, 0], np_temp[:, 1]) plt.xlabel("Steps") plt.ylabel("Train Performance") plt.title(which_scalar) @@ -526,6 +532,8 @@ Time: Training=9.4sec, Testing=0.4sec ## MNIST with Adam, ReduceLROnPlateau, cross-entropy on GPU +![image1](image1.png) + Here a list of the changes: Added to the beginning @@ -818,3 +826,233 @@ def class_to_one_hot( output, class_to_one_hot(target, number_of_output_channels_full1) ) ``` + +Full source: + +```python +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +import torch +import torchvision # type:ignore +import numpy as np +import time +from torch.utils.tensorboard import SummaryWriter + + +def class_to_one_hot( + correct_label: torch.Tensor, number_of_neurons: int +) -> torch.Tensor: + target_one_hot: torch.Tensor = torch.zeros( + (correct_label.shape[0], number_of_neurons) + ) + target_one_hot.scatter_( + 1, correct_label.unsqueeze(1), torch.ones((correct_label.shape[0], 1)) + ) + + return target_one_hot + + +assert torch.cuda.is_available() is True +device_gpu = torch.device("cuda:0") + + +class MyDataset(torch.utils.data.Dataset): + # Initialize + def __init__(self, train: bool = False) -> None: + super(MyDataset, self).__init__() + + if train is True: + self.pattern_storage: np.ndarray = np.load("train_pattern_storage.npy") + self.label_storage: np.ndarray = np.load("train_label_storage.npy") + else: + self.pattern_storage = np.load("test_pattern_storage.npy") + self.label_storage = np.load("test_label_storage.npy") + + self.pattern_storage = self.pattern_storage.astype(np.float32) + self.pattern_storage /= np.max(self.pattern_storage) + + # How many pattern are there? + self.number_of_pattern: int = self.label_storage.shape[0] + + def __len__(self) -> int: + return self.number_of_pattern + + # Get one pattern at position index + def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: + image = torch.tensor(self.pattern_storage[index, np.newaxis, :, :]) + target = int(self.label_storage[index]) + + return image, target + + +# Some parameters +input_number_of_channel: int = 1 +input_dim_x: int = 24 +input_dim_y: int = 24 + +number_of_output_channels_conv1: int = 32 +number_of_output_channels_conv2: int = 64 +number_of_output_channels_flatten1: int = 576 +number_of_output_channels_full1: int = 10 + +kernel_size_conv1: tuple[int, int] = (5, 5) +kernel_size_pool1: tuple[int, int] = (2, 2) +kernel_size_conv2: tuple[int, int] = (5, 5) +kernel_size_pool2: tuple[int, int] = (2, 2) + +stride_conv1: tuple[int, int] = (1, 1) +stride_pool1: tuple[int, int] = (2, 2) +stride_conv2: tuple[int, int] = (1, 1) +stride_pool2: tuple[int, int] = (2, 2) + +padding_conv1: int = 0 +padding_pool1: int = 0 +padding_conv2: int = 0 +padding_pool2: int = 0 + +network = torch.nn.Sequential( + torch.nn.Conv2d( + in_channels=input_number_of_channel, + out_channels=number_of_output_channels_conv1, + kernel_size=kernel_size_conv1, + stride=stride_conv1, + padding=padding_conv1, + ), + torch.nn.ReLU(), + torch.nn.MaxPool2d( + kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1 + ), + torch.nn.Conv2d( + in_channels=number_of_output_channels_conv1, + out_channels=number_of_output_channels_conv2, + kernel_size=kernel_size_conv2, + stride=stride_conv2, + padding=padding_conv2, + ), + torch.nn.ReLU(), + torch.nn.MaxPool2d( + kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2 + ), + torch.nn.Flatten( + start_dim=1, + ), + torch.nn.Linear( + in_features=number_of_output_channels_flatten1, + out_features=number_of_output_channels_full1, + bias=True, + ), + torch.nn.Softmax(dim=1), +).to(device=device_gpu) + +test_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.CenterCrop((24, 24))], +) + +train_processing_chain = torchvision.transforms.Compose( + transforms=[torchvision.transforms.RandomCrop((24, 24))], +) + +dataset_train = MyDataset(train=True) +dataset_test = MyDataset(train=False) +batch_size_train = 100 +batch_size_test = 100 + +train_data_load = torch.utils.data.DataLoader( + dataset_train, batch_size=batch_size_train, shuffle=True +) + +test_data_load = torch.utils.data.DataLoader( + dataset_test, batch_size=batch_size_test, shuffle=False +) + +# ------------------------------------------- + +# The optimizer +optimizer = torch.optim.Adam(network.parameters(), lr=0.001) + +# The LR Scheduler +lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) + +number_of_test_pattern: int = dataset_test.__len__() +number_of_train_pattern: int = dataset_train.__len__() + +number_of_epoch: int = 50 + +tb = SummaryWriter(log_dir="run") + +loss_function = torch.nn.MSELoss() + +for epoch_id in range(0, number_of_epoch): + print(f"Epoch: {epoch_id}") + t_start: float = time.perf_counter() + + train_loss: float = 0.0 + train_correct: int = 0 + train_number: int = 0 + test_correct: int = 0 + test_number: int = 0 + + # Switch the network into training mode + network.train() + + # This runs in total for one epoch split up into mini-batches + for image, target in train_data_load: + # Clean the gradient + optimizer.zero_grad() + + output = network(train_processing_chain(image).to(device=device_gpu)) + + loss = loss_function( + output, + class_to_one_hot(target, number_of_output_channels_full1).to( + device=device_gpu + ), + ) + + train_loss += loss.item() + train_correct += (output.argmax(dim=1).cpu() == target).sum().numpy() + train_number += target.shape[0] + # Calculate backprop + loss.backward() + + # Update the parameter + optimizer.step() + + # Update the learning rate + lr_scheduler.step(train_loss) + + t_training: float = time.perf_counter() + + # Switch the network into evalution mode + network.eval() + with torch.no_grad(): + for image, target in test_data_load: + output = network(test_processing_chain(image).to(device=device_gpu)) + + test_correct += (output.argmax(dim=1).cpu() == target).sum().numpy() + test_number += target.shape[0] + + t_testing = time.perf_counter() + + perfomance_test_correct: float = 100.0 * test_correct / test_number + perfomance_train_correct: float = 100.0 * train_correct / train_number + + tb.add_scalar("Train Loss", train_loss, epoch_id) + tb.add_scalar("Train Number Correct", train_correct, epoch_id) + tb.add_scalar("Test Number Correct", test_correct, epoch_id) + + print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%") + print(f"Testing: Correct={perfomance_test_correct:.2f}%") + print( + f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec" + ) + torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt") + print() + + tb.flush() + +tb.close() +``` +