import torch import torchvision # type: ignore from torchvision.transforms import v2 # type: ignore import time import os number_of_epoch: int = 500 lr_parameter_max: float = 1e-9 ModelsPath: str = "Models" os.makedirs(ModelsPath, exist_ok=True) # Tensorboard os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" from torch.utils.tensorboard import SummaryWriter tb = SummaryWriter(log_dir="run") # GPU ? device: torch.device = ( torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") ) torch.set_default_dtype(torch.float32) # Data augmentation test_processing_chain = v2.Compose( transforms=[ v2.ToImage(), v2.ToDtype(torch.float32, scale=True), v2.CenterCrop((28, 28)), ], ) train_processing_chain = v2.Compose( transforms=[ v2.ToImage(), v2.ToDtype(torch.float32, scale=True), v2.RandomCrop((28, 28)), v2.AutoAugment(), ], ) # Data provider tv_dataset_train = torchvision.datasets.CIFAR10( root="data", train=True, download=True, transform=train_processing_chain, ) tv_dataset_test = torchvision.datasets.CIFAR10( root="data", train=False, download=True, transform=test_processing_chain, ) # Data loader train_data_load = torch.utils.data.DataLoader( tv_dataset_train, batch_size=100, shuffle=True ) test_data_load = torch.utils.data.DataLoader( tv_dataset_test, batch_size=100, shuffle=False ) # Network network = torch.nn.Sequential( torch.nn.Conv2d( in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=0, ), torch.nn.ReLU(), torch.nn.BatchNorm2d(32), torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0), torch.nn.Conv2d( in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=0, ), torch.nn.ReLU(), torch.nn.BatchNorm2d(64), torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0), torch.nn.Flatten( start_dim=1, ), torch.nn.Linear( in_features=1024, out_features=1024, bias=True, ), torch.nn.ReLU(), torch.nn.Linear( in_features=1024, out_features=10, bias=True, ), ).to(device) # Optimizer optimizer = torch.optim.Adam(network.parameters(), lr=0.001) # LR Scheduler lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # Loss function loss_function = torch.nn.CrossEntropyLoss() # Main loop for epoch_id in range(0, number_of_epoch): print(f"Epoch: {epoch_id}") t_start: float = time.perf_counter() train_loss: float = 0.0 train_correct: int = 0 train_number: int = 0 test_correct: int = 0 test_number: int = 0 # Switch the network into training mode network.train() # This runs in total for one epoch split up into mini-batches for image, target in train_data_load: # Clean the gradient optimizer.zero_grad() # Run data through network output = network(image.to(device)) # Measure the loss loss = loss_function(output, target.to(device)) train_loss += loss.item() # Classifiy train_correct += ( (output.argmax(dim=1) == target.to(device)).sum().detach().cpu().numpy() ) train_number += target.shape[0] # Calculate backprop loss.backward() # Update the parameter optimizer.step() # Update the learning rate lr_scheduler.step(train_loss) t_training: float = time.perf_counter() # Switch the network into evalution mode network.eval() with torch.no_grad(): for image, target in test_data_load: # Run data thorugh network output = network(image.to(device)) # Classifiy test_correct += ( (output.argmax(dim=1) == target.to(device)).sum().detach().cpu().numpy() ) test_number += target.shape[0] t_testing = time.perf_counter() perfomance_test_correct: float = 100.0 * test_correct / test_number perfomance_train_correct: float = 100.0 * train_correct / train_number tb.add_scalar("Train Loss", train_loss, epoch_id) tb.add_scalar("Train Number Correct", train_correct, epoch_id) tb.add_scalar("Test Number Correct", test_correct, epoch_id) tb.add_scalar("Error Test", 100.0 - perfomance_test_correct, epoch_id) tb.add_scalar("Error Train", 100.0 - perfomance_train_correct, epoch_id) tb.add_scalar("Learning Rate", optimizer.param_groups[-1]["lr"], epoch_id) tb.flush() print( f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}% LR:{optimizer.param_groups[-1]["lr"]}" ) print(f"Testing: Correct={perfomance_test_correct:.2f}%") print( f"Time: Training={(t_training - t_start):.1f}sec, Testing={(t_testing - t_training):.1f}sec" ) torch.save(network, os.path.join(ModelsPath, f"Model_MNIST_A_{epoch_id}.pt")) print() if optimizer.param_groups[-1]["lr"] < lr_parameter_max: tb.close() print("Done (lr_limit)") exit() tb.close()