Update README.md

Signed-off-by: David Rotermund <54365609+davrot@users.noreply.github.com>
2025-07-07 23:00:02 +02:00 · 2024-01-02 22:13:23 +01:00 · 2024-01-02 22:13:23 +01:00 · ac79146816
commit ac79146816
parent 35dd8fafaf
1 changed files with 248 additions and 10 deletions
--- a/pytorch/train/README.md
+++ b/pytorch/train/README.md
@ -16,6 +16,8 @@ Questions to [David Rotermund](mailto:davrot@uni-bremen.de)
 ### Network
 **Note: Okay, I forgot the full layer somehow but it is still at 99%.** 
 ```python
 import torch
@ -250,28 +252,32 @@ import matplotlib.pyplot as plt
 from tensorboard.backend.event_processing import event_accumulator
 import numpy as np
-path: str = "./runs/Jan26_18-03-23_doppio/"  # this way tensorboard directory
+path: str = "run"
 acc = event_accumulator.EventAccumulator(path)
 acc.Reload()
 available_scalar = acc.Tags()["scalars"]
 available_histograms = acc.Tags()["histograms"]
-print("Available Scalars")
+print("Available Scalars:")
 print(available_scalar)
 print()
-print("Available Histograms")
+print("Available Histograms:")
 print(available_histograms)
-which_scalar = "Train Performance"
+which_scalar = "Test Number Correct"
 te = acc.Scalars(which_scalar)
 # %%
 temp = []
 for te_item in te:
    temp.append((te_item[1], te_item[2]))
 temp = np.array(temp)
-plt.plot(temp[:, 0], temp[:, 1])
+np_temp = np.zeros((len(te), 2))
 for id in range(0, len(te)):
    np_temp[id, 0] = te[id].step
    np_temp[id, 1] = te[id].value
 print(np_temp)
 plt.plot(np_temp[:, 0], np_temp[:, 1])
 plt.xlabel("Steps")
 plt.ylabel("Train Performance")
 plt.title(which_scalar)
@ -526,6 +532,8 @@ Time: Training=9.4sec, Testing=0.4sec
 ## MNIST with Adam, ReduceLROnPlateau, cross-entropy on GPU
 ![image1](image1.png)
 Here a list of the changes:
 Added to the beginning
@ -818,3 +826,233 @@ def class_to_one_hot(
            output, class_to_one_hot(target, number_of_output_channels_full1)
        )
 ```
 Full source:
 ```python
 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import torch
 import torchvision  # type:ignore
 import numpy as np
 import time
 from torch.utils.tensorboard import SummaryWriter
 def class_to_one_hot(
    correct_label: torch.Tensor, number_of_neurons: int
 ) -> torch.Tensor:
    target_one_hot: torch.Tensor = torch.zeros(
        (correct_label.shape[0], number_of_neurons)
    )
    target_one_hot.scatter_(
        1, correct_label.unsqueeze(1), torch.ones((correct_label.shape[0], 1))
    )
    return target_one_hot
 assert torch.cuda.is_available() is True
 device_gpu = torch.device("cuda:0")
 class MyDataset(torch.utils.data.Dataset):
    # Initialize
    def __init__(self, train: bool = False) -> None:
        super(MyDataset, self).__init__()
        if train is True:
            self.pattern_storage: np.ndarray = np.load("train_pattern_storage.npy")
            self.label_storage: np.ndarray = np.load("train_label_storage.npy")
        else:
            self.pattern_storage = np.load("test_pattern_storage.npy")
            self.label_storage = np.load("test_label_storage.npy")
        self.pattern_storage = self.pattern_storage.astype(np.float32)
        self.pattern_storage /= np.max(self.pattern_storage)
        # How many pattern are there?
        self.number_of_pattern: int = self.label_storage.shape[0]
    def __len__(self) -> int:
        return self.number_of_pattern
    # Get one pattern at position index
    def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
        image = torch.tensor(self.pattern_storage[index, np.newaxis, :, :])
        target = int(self.label_storage[index])
        return image, target
 # Some parameters
 input_number_of_channel: int = 1
 input_dim_x: int = 24
 input_dim_y: int = 24
 number_of_output_channels_conv1: int = 32
 number_of_output_channels_conv2: int = 64
 number_of_output_channels_flatten1: int = 576
 number_of_output_channels_full1: int = 10
 kernel_size_conv1: tuple[int, int] = (5, 5)
 kernel_size_pool1: tuple[int, int] = (2, 2)
 kernel_size_conv2: tuple[int, int] = (5, 5)
 kernel_size_pool2: tuple[int, int] = (2, 2)
 stride_conv1: tuple[int, int] = (1, 1)
 stride_pool1: tuple[int, int] = (2, 2)
 stride_conv2: tuple[int, int] = (1, 1)
 stride_pool2: tuple[int, int] = (2, 2)
 padding_conv1: int = 0
 padding_pool1: int = 0
 padding_conv2: int = 0
 padding_pool2: int = 0
 network = torch.nn.Sequential(
    torch.nn.Conv2d(
        in_channels=input_number_of_channel,
        out_channels=number_of_output_channels_conv1,
        kernel_size=kernel_size_conv1,
        stride=stride_conv1,
        padding=padding_conv1,
    ),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(
        kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1
    ),
    torch.nn.Conv2d(
        in_channels=number_of_output_channels_conv1,
        out_channels=number_of_output_channels_conv2,
        kernel_size=kernel_size_conv2,
        stride=stride_conv2,
        padding=padding_conv2,
    ),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(
        kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2
    ),
    torch.nn.Flatten(
        start_dim=1,
    ),
    torch.nn.Linear(
        in_features=number_of_output_channels_flatten1,
        out_features=number_of_output_channels_full1,
        bias=True,
    ),
    torch.nn.Softmax(dim=1),
 ).to(device=device_gpu)
 test_processing_chain = torchvision.transforms.Compose(
    transforms=[torchvision.transforms.CenterCrop((24, 24))],
 )
 train_processing_chain = torchvision.transforms.Compose(
    transforms=[torchvision.transforms.RandomCrop((24, 24))],
 )
 dataset_train = MyDataset(train=True)
 dataset_test = MyDataset(train=False)
 batch_size_train = 100
 batch_size_test = 100
 train_data_load = torch.utils.data.DataLoader(
    dataset_train, batch_size=batch_size_train, shuffle=True
 )
 test_data_load = torch.utils.data.DataLoader(
    dataset_test, batch_size=batch_size_test, shuffle=False
 )
 # -------------------------------------------
 # The optimizer
 optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
 # The LR Scheduler
 lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
 number_of_test_pattern: int = dataset_test.__len__()
 number_of_train_pattern: int = dataset_train.__len__()
 number_of_epoch: int = 50
 tb = SummaryWriter(log_dir="run")
 loss_function = torch.nn.MSELoss()
 for epoch_id in range(0, number_of_epoch):
    print(f"Epoch: {epoch_id}")
    t_start: float = time.perf_counter()
    train_loss: float = 0.0
    train_correct: int = 0
    train_number: int = 0
    test_correct: int = 0
    test_number: int = 0
    # Switch the network into training mode
    network.train()
    # This runs in total for one epoch split up into mini-batches
    for image, target in train_data_load:
        # Clean the gradient
        optimizer.zero_grad()
        output = network(train_processing_chain(image).to(device=device_gpu))
        loss = loss_function(
            output,
            class_to_one_hot(target, number_of_output_channels_full1).to(
                device=device_gpu
            ),
        )
        train_loss += loss.item()
        train_correct += (output.argmax(dim=1).cpu() == target).sum().numpy()
        train_number += target.shape[0]
        # Calculate backprop
        loss.backward()
        # Update the parameter
        optimizer.step()
    # Update the learning rate
    lr_scheduler.step(train_loss)
    t_training: float = time.perf_counter()
    # Switch the network into evalution mode
    network.eval()
    with torch.no_grad():
        for image, target in test_data_load:
            output = network(test_processing_chain(image).to(device=device_gpu))
            test_correct += (output.argmax(dim=1).cpu() == target).sum().numpy()
            test_number += target.shape[0]
    t_testing = time.perf_counter()
    perfomance_test_correct: float = 100.0 * test_correct / test_number
    perfomance_train_correct: float = 100.0 * train_correct / train_number
    tb.add_scalar("Train Loss", train_loss, epoch_id)
    tb.add_scalar("Train Number Correct", train_correct, epoch_id)
    tb.add_scalar("Test Number Correct", test_correct, epoch_id)
    print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%")
    print(f"Testing: Correct={perfomance_test_correct:.2f}%")
    print(
        f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec"
    )
    torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt")
    print()
    tb.flush()
 tb.close()
 ```