Add files via upload
This commit is contained in:
parent
4426f05e61
commit
81ec320f5c
77 changed files with 8266 additions and 0 deletions
161
DATA_FASHION_MNIST/convert.py
Normal file
161
DATA_FASHION_MNIST/convert.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
# MIT License
|
||||
# Copyright 2022 University of Bremen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#
|
||||
# David Rotermund ( davrot@uni-bremen.de )
|
||||
#
|
||||
#
|
||||
# Release history:
|
||||
# ================
|
||||
# 1.0.0 -- 01.05.2022: first release
|
||||
#
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
|
||||
# [offset] [type] [value] [description]
|
||||
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
|
||||
# 0004 32 bit integer 60000 number of items
|
||||
# 0008 unsigned byte ?? label
|
||||
# 0009 unsigned byte ?? label
|
||||
# ........
|
||||
# xxxx unsigned byte ?? label
|
||||
# The labels values are 0 to 9.
|
||||
|
||||
|
||||
class ReadLabel:
|
||||
"""Class for reading the labels from an MNIST label file"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename: str = filename
|
||||
self.data = self.read_from_file(filename)
|
||||
|
||||
def read_from_file(self, filename):
|
||||
int32_data = np.dtype(np.uint32)
|
||||
int32_data = int32_data.newbyteorder(">")
|
||||
file = open(filename, "rb")
|
||||
|
||||
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if magic_flag != 2049:
|
||||
data = np.zeros(0)
|
||||
number_of_elements = 0
|
||||
else:
|
||||
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_elements < 1:
|
||||
data = np.zeros(0)
|
||||
else:
|
||||
data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8)
|
||||
|
||||
file.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# [offset] [type] [value] [description]
|
||||
# 0000 32 bit integer 0x00000803(2051) magic number
|
||||
# 0004 32 bit integer 60000 number of images
|
||||
# 0008 32 bit integer 28 number of rows
|
||||
# 0012 32 bit integer 28 number of columns
|
||||
# 0016 unsigned byte ?? pixel
|
||||
# 0017 unsigned byte ?? pixel
|
||||
# ........
|
||||
# xxxx unsigned byte ?? pixel
|
||||
# Pixels are organized row-wise.
|
||||
# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
|
||||
|
||||
|
||||
class ReadPicture:
|
||||
"""Class for reading the images from an MNIST image file"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename: str = filename
|
||||
self.data = self.read_from_file(filename)
|
||||
|
||||
def read_from_file(self, filename):
|
||||
int32_data = np.dtype(np.uint32)
|
||||
int32_data = int32_data.newbyteorder(">")
|
||||
file = open(filename, "rb")
|
||||
|
||||
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if magic_flag != 2051:
|
||||
data = np.zeros(0)
|
||||
number_of_elements = 0
|
||||
else:
|
||||
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_elements < 1:
|
||||
data = np.zeros(0)
|
||||
number_of_rows = 0
|
||||
else:
|
||||
number_of_rows = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_rows != 28:
|
||||
data = np.zeros(0)
|
||||
number_of_columns = 0
|
||||
else:
|
||||
number_of_columns = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_columns != 28:
|
||||
data = np.zeros(0)
|
||||
else:
|
||||
data = np.frombuffer(
|
||||
file.read(number_of_elements * number_of_rows * number_of_columns),
|
||||
dtype=np.uint8,
|
||||
)
|
||||
data = data.reshape(number_of_elements, number_of_columns, number_of_rows)
|
||||
|
||||
file.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def proprocess_data_set(test_mode):
|
||||
|
||||
if test_mode is True:
|
||||
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||
filename_out_label: str = "TestLabelStorage.npy"
|
||||
filename_in_image: str = "t10k-images-idx3-ubyte"
|
||||
filename_in_label = "t10k-labels-idx1-ubyte"
|
||||
else:
|
||||
filename_out_pattern = "TrainPatternStorage.npy"
|
||||
filename_out_label = "TrainLabelStorage.npy"
|
||||
filename_in_image = "train-images-idx3-ubyte"
|
||||
filename_in_label = "train-labels-idx1-ubyte"
|
||||
|
||||
pictures = ReadPicture(filename_in_image)
|
||||
labels = ReadLabel(filename_in_label)
|
||||
|
||||
# Down to 0 ... 1.0
|
||||
max_value = np.max(pictures.data.astype(np.float32))
|
||||
d = np.float32(pictures.data.astype(np.float32) / max_value)
|
||||
|
||||
label_storage = np.uint64(labels.data)
|
||||
pattern_storage = d.astype(np.float32)
|
||||
|
||||
np.save(filename_out_pattern, pattern_storage)
|
||||
np.save(filename_out_label, label_storage)
|
||||
|
||||
|
||||
proprocess_data_set(True)
|
||||
proprocess_data_set(False)
|
8
DATA_FASHION_MNIST/data_url.txt
Normal file
8
DATA_FASHION_MNIST/data_url.txt
Normal file
|
@ -0,0 +1,8 @@
|
|||
https://github.com/zalandoresearch/fashion-mnist
|
||||
|
||||
We need:
|
||||
t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
|
||||
|
||||
Then
|
||||
gzip -d *.gz
|
||||
python convert.py
|
10
clean.sh
Normal file
10
clean.sh
Normal file
|
@ -0,0 +1,10 @@
|
|||
read -p "Are you sure? " -n 1 -r
|
||||
echo # (optional) move to a new line
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]
|
||||
then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm -rf Log
|
||||
rm -rf Parameters
|
||||
rm *.txt
|
4
dataset.json
Normal file
4
dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"data_path": "./DATA_FASHION_MNIST/",
|
||||
"data_mode": "MNIST_FASHION"
|
||||
}
|
24
def.json
Normal file
24
def.json
Normal file
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"epoch_id_max": 200,
|
||||
"number_of_spikes": [
|
||||
1600
|
||||
],
|
||||
"batch_size": 24,
|
||||
"forgetting_offset": 0.0,
|
||||
"stage_id": 0,
|
||||
"simulation_id": 0,
|
||||
"learning_parameters": {
|
||||
"learning_rate_threshold_w": 0.001,
|
||||
"eps_xy_intitial": 1.0,
|
||||
"number_of_batches_for_one_update": 20,
|
||||
"learning_rate_gamma_w": 0.001,
|
||||
"lr_scheduler_patience_w": 50,
|
||||
"adapt_learning_rate_after_minibatch": true,
|
||||
"w_trainable": [
|
||||
true
|
||||
]
|
||||
},
|
||||
"augmentation": {},
|
||||
"image_statistics": {},
|
||||
"approximation_setting": {}
|
||||
}
|
24
def_sbs_L0.json
Normal file
24
def_sbs_L0.json
Normal file
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"epoch_id_max": 200,
|
||||
"number_of_spikes": [
|
||||
1600
|
||||
],
|
||||
"batch_size": 24,
|
||||
"forgetting_offset": 0.0,
|
||||
"stage_id": 0,
|
||||
"simulation_id": 0,
|
||||
"learning_parameters": {
|
||||
"learning_rate_threshold_w": 0.001,
|
||||
"eps_xy_intitial": 1.0,
|
||||
"number_of_batches_for_one_update": 20,
|
||||
"learning_rate_gamma_w": 1.0,
|
||||
"lr_scheduler_patience_w": 50,
|
||||
"adapt_learning_rate_after_minibatch": true,
|
||||
"w_trainable": [
|
||||
true
|
||||
]
|
||||
},
|
||||
"augmentation": {},
|
||||
"image_statistics": {},
|
||||
"approximation_setting": {}
|
||||
}
|
47
get_perf.py
Normal file
47
get_perf.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import os
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from tensorboard.backend.event_processing import event_accumulator
|
||||
import numpy as np
|
||||
import json
|
||||
from jsmin import jsmin
|
||||
import glob
|
||||
|
||||
# -------------------------------
|
||||
|
||||
filename:str = "def.json"
|
||||
with open(filename) as json_file:
|
||||
minified = jsmin(json_file.read())
|
||||
data = json.loads(minified)
|
||||
number_of_spikes = data["number_of_spikes"]
|
||||
|
||||
|
||||
# -------------------------------
|
||||
|
||||
|
||||
path_runs: str = "./Log/*"
|
||||
|
||||
temp = glob.glob(path_runs)
|
||||
assert len(temp) == 1
|
||||
path = temp[0]
|
||||
|
||||
|
||||
acc = event_accumulator.EventAccumulator(path)
|
||||
acc.Reload()
|
||||
|
||||
available_scalar = acc.Tags()["scalars"]
|
||||
available_histograms = acc.Tags()["histograms"]
|
||||
|
||||
which_scalar = "Test Error"
|
||||
te = acc.Scalars(which_scalar)
|
||||
|
||||
temp = []
|
||||
for te_item in te:
|
||||
temp.append((te_item[1], te_item[2]))
|
||||
temp = np.array(temp)
|
||||
|
||||
print(temp)
|
||||
np.save(f"test_error_{number_of_spikes}.npy", temp)
|
||||
|
201
learn_it.py
Normal file
201
learn_it.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
# %%
|
||||
import os
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
|
||||
import sys
|
||||
import torch
|
||||
import dataconf
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from network.Parameter import Config
|
||||
|
||||
from network.build_network import build_network
|
||||
from network.build_optimizer import build_optimizer
|
||||
from network.build_lr_scheduler import build_lr_scheduler
|
||||
from network.build_datasets import build_datasets
|
||||
from network.load_previous_weights import load_previous_weights
|
||||
|
||||
from network.loop_train_test import loop_test, loop_train, run_lr_scheduler
|
||||
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
|
||||
# ######################################################################
|
||||
# We want to log what is going on into a file and screen
|
||||
# ######################################################################
|
||||
|
||||
now = datetime.now()
|
||||
dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S")
|
||||
logging.basicConfig(
|
||||
filename="log_" + dt_string_filename + ".txt",
|
||||
filemode="w",
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(message)s",
|
||||
)
|
||||
logging.getLogger().addHandler(logging.StreamHandler())
|
||||
|
||||
# ######################################################################
|
||||
# Load the config data from the json file
|
||||
# ######################################################################
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
raise Exception("Argument: Config file name is missing")
|
||||
|
||||
filename: str = sys.argv[1]
|
||||
|
||||
if os.path.exists(filename) is False:
|
||||
raise Exception(f"Config file not found! {filename}")
|
||||
|
||||
if os.path.exists("network.json") is False:
|
||||
raise Exception("Config file not found! network.json")
|
||||
|
||||
if os.path.exists("dataset.json") is False:
|
||||
raise Exception("Config file not found! dataset.json")
|
||||
|
||||
|
||||
cfg = dataconf.multi.file("network.json").file("dataset.json").file(filename).on(Config)
|
||||
logging.info(cfg)
|
||||
|
||||
logging.info(f"Using configuration file: {filename}")
|
||||
|
||||
logging.info(f"Number of spikes: {cfg.number_of_spikes}")
|
||||
logging.info(f"Cooldown after spikes: {cfg.cooldown_after_number_of_spikes}")
|
||||
logging.info(f"Reduction cooldown: {cfg.reduction_cooldown}")
|
||||
logging.info("")
|
||||
logging.info(f"Epsilon 0: {cfg.epsilon_0}")
|
||||
logging.info(f"Batch size: {cfg.batch_size}")
|
||||
logging.info(f"Data mode: {cfg.data_mode}")
|
||||
logging.info("")
|
||||
logging.info("*** Config loaded.")
|
||||
logging.info("")
|
||||
|
||||
tb = SummaryWriter(log_dir=cfg.log_path)
|
||||
|
||||
# ###########################################
|
||||
# GPU Yes / NO ?
|
||||
# ###########################################
|
||||
default_dtype = torch.float32
|
||||
torch.set_default_dtype(default_dtype)
|
||||
torch_device: str = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
use_gpu: bool = True if torch.cuda.is_available() else False
|
||||
print(f"Using {torch_device} device")
|
||||
device = torch.device(torch_device)
|
||||
|
||||
# ######################################################################
|
||||
# Prepare the test and training data
|
||||
# ######################################################################
|
||||
|
||||
the_dataset_train, the_dataset_test, my_loader_test, my_loader_train = build_datasets(
|
||||
cfg
|
||||
)
|
||||
|
||||
logging.info("*** Data loaded.")
|
||||
|
||||
# ######################################################################
|
||||
# Build the network, Optimizer, and LR Scheduler #
|
||||
# ######################################################################
|
||||
|
||||
network = build_network(
|
||||
cfg=cfg, device=device, default_dtype=default_dtype, logging=logging
|
||||
)
|
||||
logging.info("")
|
||||
|
||||
optimizer = build_optimizer(network=network, cfg=cfg, logging=logging)
|
||||
|
||||
lr_scheduler = build_lr_scheduler(optimizer=optimizer, cfg=cfg, logging=logging)
|
||||
|
||||
logging.info("*** Network generated.")
|
||||
|
||||
load_previous_weights(
|
||||
network=network,
|
||||
overload_path=cfg.learning_parameters.overload_path,
|
||||
logging=logging,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
)
|
||||
|
||||
logging.info("")
|
||||
|
||||
last_test_performance: float = -1.0
|
||||
with torch.no_grad():
|
||||
if cfg.learning_parameters.learning_active is True:
|
||||
while cfg.epoch_id < cfg.epoch_id_max:
|
||||
|
||||
# ##############################################
|
||||
# Run a training data epoch
|
||||
# ##############################################
|
||||
network.train()
|
||||
(
|
||||
my_loss_for_batch,
|
||||
performance_for_batch,
|
||||
full_loss,
|
||||
full_correct,
|
||||
) = loop_train(
|
||||
cfg=cfg,
|
||||
network=network,
|
||||
my_loader_train=my_loader_train,
|
||||
the_dataset_train=the_dataset_train,
|
||||
optimizer=optimizer,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
logging=logging,
|
||||
tb=tb,
|
||||
adapt_learning_rate=cfg.learning_parameters.adapt_learning_rate_after_minibatch,
|
||||
lr_scheduler=lr_scheduler,
|
||||
last_test_performance=last_test_performance,
|
||||
)
|
||||
|
||||
# Let the torch learning rate scheduler update the
|
||||
# learning rates of the optimiers
|
||||
if cfg.learning_parameters.adapt_learning_rate_after_minibatch is False:
|
||||
run_lr_scheduler(
|
||||
cfg=cfg,
|
||||
lr_scheduler=lr_scheduler,
|
||||
optimizer=optimizer,
|
||||
performance_for_batch=performance_for_batch,
|
||||
my_loss_for_batch=my_loss_for_batch,
|
||||
tb=tb,
|
||||
logging=logging,
|
||||
)
|
||||
|
||||
# ##############################################
|
||||
# Run test data
|
||||
# ##############################################
|
||||
network.eval()
|
||||
last_test_performance = loop_test(
|
||||
epoch_id=cfg.epoch_id,
|
||||
cfg=cfg,
|
||||
network=network,
|
||||
my_loader_test=my_loader_test,
|
||||
the_dataset_test=the_dataset_test,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
logging=logging,
|
||||
tb=tb,
|
||||
)
|
||||
|
||||
# Next epoch
|
||||
cfg.epoch_id += 1
|
||||
else:
|
||||
# ##############################################
|
||||
# Run test data
|
||||
# ##############################################
|
||||
network.eval()
|
||||
last_test_performance = loop_test(
|
||||
epoch_id=cfg.epoch_id,
|
||||
cfg=cfg,
|
||||
network=network,
|
||||
my_loader_test=my_loader_test,
|
||||
the_dataset_test=the_dataset_test,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
logging=logging,
|
||||
tb=tb,
|
||||
)
|
||||
|
||||
tb.close()
|
||||
|
||||
|
||||
# %%
|
5
make_new_previous.sh
Normal file
5
make_new_previous.sh
Normal file
|
@ -0,0 +1,5 @@
|
|||
MAX_NUMBER=199
|
||||
|
||||
MyPATH="Previous++"
|
||||
mkdir $MyPATH
|
||||
cp Parameters/*_S$MAX_NUMBER.npy $MyPATH
|
91
network.json
Normal file
91
network.json
Normal file
|
@ -0,0 +1,91 @@
|
|||
{
|
||||
"network_structure": {
|
||||
"number_of_output_neurons": 10,
|
||||
"layer_type": [
|
||||
"SbS",
|
||||
"MAX POOLING",
|
||||
"SbS",
|
||||
"MAX POOLING",
|
||||
"SbS",
|
||||
"SbS"
|
||||
],
|
||||
"strides": [
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "SbS"
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "SbS"
|
||||
],
|
||||
"forward_neuron_numbers": [
|
||||
[
|
||||
1,
|
||||
32
|
||||
], // "SbS"
|
||||
[
|
||||
32,
|
||||
32
|
||||
], // POOLING
|
||||
[
|
||||
32,
|
||||
64
|
||||
], // "SbS"
|
||||
[
|
||||
64,
|
||||
64
|
||||
], // POOLING
|
||||
[
|
||||
64,
|
||||
96
|
||||
], // "SbS"
|
||||
[
|
||||
96,
|
||||
10
|
||||
] // "SbS"
|
||||
],
|
||||
"forward_kernel_size": [
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
3,
|
||||
3
|
||||
], // "SbS"
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "SbS"
|
||||
]
|
||||
}
|
||||
}
|
154
network/Adam.py
Normal file
154
network/Adam.py
Normal file
|
@ -0,0 +1,154 @@
|
|||
import torch
|
||||
import math
|
||||
|
||||
|
||||
class Adam(torch.optim.Optimizer):
|
||||
|
||||
sbs_setting: list[bool]
|
||||
lr: float
|
||||
beta1: float
|
||||
beta2: float
|
||||
eps: float
|
||||
maximize: bool
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
params,
|
||||
sbs_setting: list[bool],
|
||||
lr: float = 1e-3,
|
||||
beta1: float = 0.9,
|
||||
beta2: float = 0.999,
|
||||
eps: float = 1e-8,
|
||||
maximize: bool = False,
|
||||
) -> None:
|
||||
|
||||
assert lr > 0.0
|
||||
|
||||
assert eps > 0.0
|
||||
|
||||
assert beta1 > 0.0
|
||||
assert beta1 < 1.0
|
||||
|
||||
assert beta2 > 0.0
|
||||
assert beta2 < 1.0
|
||||
|
||||
assert len(sbs_setting) == len(params)
|
||||
|
||||
self.sbs_setting = sbs_setting
|
||||
self.params = params
|
||||
self.lr = lr
|
||||
self.beta1 = beta1
|
||||
self.beta2 = beta2
|
||||
self.eps = eps
|
||||
self.maximize = maximize
|
||||
|
||||
defaults = dict(
|
||||
lr=lr,
|
||||
beta1=beta1,
|
||||
beta2=beta2,
|
||||
eps=eps,
|
||||
maximize=maximize,
|
||||
)
|
||||
super().__init__(params, defaults)
|
||||
|
||||
def step(self):
|
||||
|
||||
params_with_grad = []
|
||||
grads = []
|
||||
exp_avgs = []
|
||||
exp_avg_sqs = []
|
||||
state_steps = []
|
||||
sbs_setting = []
|
||||
|
||||
for id, p in enumerate(self.params):
|
||||
if p.grad is not None:
|
||||
params_with_grad.append(p)
|
||||
grads.append(p.grad)
|
||||
sbs_setting.append(self.sbs_setting[id])
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# Lazy state initialization
|
||||
if len(state) == 0:
|
||||
|
||||
state["step"] = torch.tensor(0.0)
|
||||
|
||||
# Exponential moving average of gradient values
|
||||
state["exp_avg"] = torch.zeros_like(
|
||||
p, memory_format=torch.preserve_format
|
||||
)
|
||||
# Exponential moving average of squared gradient values
|
||||
state["exp_avg_sq"] = torch.zeros_like(
|
||||
p, memory_format=torch.preserve_format
|
||||
)
|
||||
|
||||
exp_avgs.append(state["exp_avg"])
|
||||
exp_avg_sqs.append(state["exp_avg_sq"])
|
||||
state_steps.append(state["step"])
|
||||
|
||||
self.adam(
|
||||
params_with_grad,
|
||||
grads,
|
||||
sbs_setting,
|
||||
exp_avgs,
|
||||
exp_avg_sqs,
|
||||
state_steps,
|
||||
beta1=self.beta1,
|
||||
beta2=self.beta2,
|
||||
lr=self.lr,
|
||||
eps=self.eps,
|
||||
maximize=self.maximize,
|
||||
)
|
||||
|
||||
def adam(
|
||||
self,
|
||||
params: list[torch.Tensor],
|
||||
grads: list[torch.Tensor],
|
||||
sbs_setting: list[bool],
|
||||
exp_avgs: list[torch.Tensor],
|
||||
exp_avg_sqs: list[torch.Tensor],
|
||||
state_steps: list[torch.Tensor],
|
||||
beta1: float,
|
||||
beta2: float,
|
||||
lr: float,
|
||||
eps: float,
|
||||
maximize: bool,
|
||||
) -> None:
|
||||
|
||||
with torch.no_grad():
|
||||
|
||||
for i, param in enumerate(params):
|
||||
|
||||
if maximize is False:
|
||||
grad = grads[i]
|
||||
else:
|
||||
grad = -grads[i]
|
||||
|
||||
exp_avg = exp_avgs[i]
|
||||
exp_avg_sq = exp_avg_sqs[i]
|
||||
step_t = state_steps[i]
|
||||
|
||||
# increase step
|
||||
step_t += 1
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg *= beta1
|
||||
exp_avg += (1.0 - beta1) * grad
|
||||
|
||||
exp_avg_sq *= beta2
|
||||
exp_avg_sq += (1.0 - beta2) * grad**2
|
||||
|
||||
step_size: float = lr / (1.0 - beta1 ** float(step_t))
|
||||
|
||||
denom = (
|
||||
exp_avg_sq.sqrt() / math.sqrt(1.0 - beta2 ** float(step_t))
|
||||
) + eps
|
||||
|
||||
if sbs_setting[i] is False:
|
||||
param -= step_size * (exp_avg / denom)
|
||||
else:
|
||||
delta = torch.exp(-step_size * (exp_avg / denom))
|
||||
print(
|
||||
f"{float(delta.min()) - 1.0:.4e} {float(delta.max()) - 1.0:.4e}"
|
||||
)
|
||||
param *= delta
|
714
network/CPP_Cuda/HDynamicCNNManyIP.cu
Normal file
714
network/CPP_Cuda/HDynamicCNNManyIP.cu
Normal file
|
@ -0,0 +1,714 @@
|
|||
#include "HDynamicCNNManyIP.h"
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
HDynamicCNNManyIP::HDynamicCNNManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
HDynamicCNNManyIP::~HDynamicCNNManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
bool HDynamicCNNManyIP::update_entrypoint(
|
||||
int64_t h_pointer_addr,
|
||||
int64_t h_dim_0,
|
||||
int64_t h_dim_1,
|
||||
int64_t h_dim_2,
|
||||
int64_t h_dim_3,
|
||||
int64_t epsilon_xy_pointer_addr,
|
||||
int64_t epsilon_xy_dim_0,
|
||||
int64_t epsilon_xy_dim_1,
|
||||
int64_t epsilon_xy_dim_2,
|
||||
int64_t epsilon_t_pointer_addr,
|
||||
int64_t epsilon_t_dim_0,
|
||||
int64_t weights_pointer_addr,
|
||||
int64_t weights_dim_0,
|
||||
int64_t weights_dim_1,
|
||||
int64_t input_pointer_addr,
|
||||
int64_t input_dim_0,
|
||||
int64_t input_dim_1,
|
||||
int64_t input_dim_2,
|
||||
int64_t input_dim_3,
|
||||
int64_t init_vector_pointer_addr,
|
||||
int64_t init_vector_dim_0,
|
||||
int64_t number_of_processes,
|
||||
float forgetting_offset,
|
||||
int64_t gpu_tuning_factor)
|
||||
{
|
||||
|
||||
size_t number_of_pattern = input_dim_0;
|
||||
|
||||
size_t h_dim = init_vector_dim_0;
|
||||
float* h_init_ptr = (float*)init_vector_pointer_addr;
|
||||
assert((h_init_ptr != nullptr));
|
||||
assert((h_dim > 0));
|
||||
|
||||
float* h_pointer = (float*)h_pointer_addr;
|
||||
assert((h_pointer != nullptr));
|
||||
assert((h_dim_0 > 0));
|
||||
assert((h_dim_1 > 0));
|
||||
assert((h_dim_2 > 0));
|
||||
assert((h_dim_3 > 0));
|
||||
|
||||
size_t h_dim_c0 = h_dim_1 * h_dim_2 * h_dim_3;
|
||||
size_t h_dim_c1 = h_dim_2 * h_dim_3;
|
||||
size_t h_dim_c2 = h_dim_3;
|
||||
|
||||
float* epsilon_xy_pointer = (float*)epsilon_xy_pointer_addr;
|
||||
assert((epsilon_xy_pointer != nullptr));
|
||||
assert((epsilon_xy_dim_0 > 0));
|
||||
assert((epsilon_xy_dim_1 > 0));
|
||||
|
||||
size_t epsilon_xy_dim_c0 = epsilon_xy_dim_2 * epsilon_xy_dim_1;
|
||||
size_t epsilon_xy_dim_c1 = epsilon_xy_dim_2;
|
||||
|
||||
float* epsilon_t_pointer = (float*)epsilon_t_pointer_addr;
|
||||
assert((epsilon_t_pointer != nullptr));
|
||||
assert((epsilon_t_dim_0 > 0));
|
||||
|
||||
float* weights_pointer = (float*)weights_pointer_addr;
|
||||
assert((weights_pointer != nullptr));
|
||||
assert((weights_dim_0 > 0));
|
||||
assert((weights_dim_1 > 0));
|
||||
|
||||
size_t weights_dim_c0 = weights_dim_1;
|
||||
|
||||
int64_t* input_pointer = (int64_t*)input_pointer_addr;
|
||||
assert((input_pointer != nullptr));
|
||||
assert((input_dim_0 > 0));
|
||||
assert((input_dim_1 > 0));
|
||||
assert((input_dim_2 > 0));
|
||||
assert((input_dim_3 > 0));
|
||||
|
||||
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c1 = input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c2 = input_dim_3;
|
||||
|
||||
assert((h_dim == weights_dim_1));
|
||||
size_t number_of_spikes = input_dim_1;
|
||||
size_t dim_x = input_dim_2;
|
||||
size_t dim_y = input_dim_3;
|
||||
|
||||
float forgetting_offset_local = forgetting_offset / static_cast<float>(h_dim);
|
||||
|
||||
|
||||
// --------------------
|
||||
if (number_of_processes > 0)
|
||||
{
|
||||
omp_set_num_threads(number_of_processes);
|
||||
|
||||
size_t pattern_id;
|
||||
#pragma omp parallel for
|
||||
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
|
||||
{
|
||||
update(
|
||||
h_init_ptr,
|
||||
h_pointer,
|
||||
h_dim_c0,
|
||||
h_dim_c1,
|
||||
h_dim_c2,
|
||||
h_dim,
|
||||
epsilon_xy_pointer,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_xy_dim_c1,
|
||||
epsilon_t_pointer,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
number_of_spikes,
|
||||
dim_x,
|
||||
dim_y,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local,
|
||||
pattern_id);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gpu_update(
|
||||
h_init_ptr,
|
||||
h_pointer,
|
||||
h_dim_c0,
|
||||
h_dim_c1,
|
||||
h_dim_c2,
|
||||
h_dim,
|
||||
epsilon_xy_pointer,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_xy_dim_c1,
|
||||
epsilon_t_pointer,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
number_of_spikes,
|
||||
dim_x,
|
||||
dim_y,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local,
|
||||
number_of_pattern,
|
||||
gpu_tuning_factor);
|
||||
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
bool HDynamicCNNManyIP::update(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* epsilon_t_pointer,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
size_t number_of_spikes,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t pattern_id)
|
||||
{
|
||||
|
||||
float* h_ptr;
|
||||
float* epsilon_xy_ptr;
|
||||
int64_t* input_ptr;
|
||||
|
||||
size_t counter_x;
|
||||
size_t counter_y;
|
||||
|
||||
for (counter_x = 0; counter_x < dim_x; counter_x++)
|
||||
{
|
||||
for (counter_y = 0; counter_y < dim_y; counter_y++)
|
||||
{
|
||||
epsilon_xy_ptr = epsilon_xy_pointer +
|
||||
counter_x * epsilon_xy_dim_c1 + counter_y;
|
||||
|
||||
h_ptr = h_pointer +
|
||||
pattern_id * h_dim_c0 + counter_x * h_dim_c2 + counter_y;
|
||||
|
||||
input_ptr = input_pointer +
|
||||
pattern_id * input_dim_c0 + counter_x * input_dim_c2 + counter_y;
|
||||
|
||||
update_one_ip(
|
||||
h_init_ptr,
|
||||
h_ptr,
|
||||
h_dim_c1,
|
||||
h_dim,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_ptr,
|
||||
input_dim_c1,
|
||||
epsilon_xy_ptr,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_t_pointer,
|
||||
number_of_spikes,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
void HDynamicCNNManyIP::update_one_ip(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c1,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
float* epsilon_t_pointer,
|
||||
size_t number_of_spikes,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local)
|
||||
{
|
||||
|
||||
float* h_temp = new float[h_dim];
|
||||
float* h_subsegment = new float[h_dim];
|
||||
|
||||
memcpy(h_subsegment, h_init_ptr, sizeof(float) * h_dim);
|
||||
|
||||
size_t counter_spike;
|
||||
size_t counter;
|
||||
|
||||
float h_temp_sum;
|
||||
float temp_value;
|
||||
|
||||
float epsilon_subsegment;
|
||||
float epsilon_scale = 1.0;
|
||||
|
||||
int64_t* spike;
|
||||
float* w_ptr;
|
||||
|
||||
for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++)
|
||||
{
|
||||
if (epsilon_scale > 1E10)
|
||||
{
|
||||
temp_value = 1.0 / epsilon_scale;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] *= temp_value;
|
||||
}
|
||||
|
||||
epsilon_scale = 1.0;
|
||||
}
|
||||
|
||||
spike = input_pointer + counter_spike * input_dim_c1;
|
||||
|
||||
if (*spike >= 0)
|
||||
{
|
||||
epsilon_subsegment =
|
||||
epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike];
|
||||
|
||||
w_ptr = weights_pointer + *spike * weights_dim_c0;
|
||||
|
||||
memcpy(h_temp, h_subsegment, sizeof(float) * h_dim);
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp[counter] *= w_ptr[counter];
|
||||
}
|
||||
|
||||
h_temp_sum = 0.0;
|
||||
#pragma omp simd reduction(+ : h_temp_sum)
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp_sum += h_temp[counter];
|
||||
}
|
||||
|
||||
if (h_temp_sum > 1E-10)
|
||||
{
|
||||
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp[counter] *= temp_value;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] += h_temp[counter];
|
||||
}
|
||||
|
||||
if (forgetting_offset_local > 0.0)
|
||||
{
|
||||
temp_value =
|
||||
epsilon_scale * epsilon_subsegment * forgetting_offset_local;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] += temp_value;
|
||||
}
|
||||
|
||||
epsilon_scale *=
|
||||
1.0 + epsilon_subsegment * (1.0 + forgetting_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
epsilon_scale *= 1.0 + epsilon_subsegment * 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
temp_value = 1.0 / epsilon_scale;
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_pointer[counter * h_dim_c1] =
|
||||
h_subsegment[counter] * temp_value;
|
||||
}
|
||||
|
||||
delete[] h_temp;
|
||||
delete[] h_subsegment;
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
__device__ void gpu_update_one_ip(
|
||||
float* __restrict__ h_init_ptr,
|
||||
float* __restrict__ h_pointer,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim,
|
||||
float* __restrict__ weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c1,
|
||||
float* __restrict__ epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
float* __restrict__ epsilon_t_pointer,
|
||||
size_t number_of_spikes,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
float* __restrict__ h_temp,
|
||||
float* __restrict__ h_subsegment
|
||||
)
|
||||
{
|
||||
|
||||
size_t counter_spike;
|
||||
size_t counter;
|
||||
|
||||
float h_temp_sum;
|
||||
float temp_value;
|
||||
|
||||
float epsilon_subsegment;
|
||||
float epsilon_scale = 1.0;
|
||||
|
||||
int64_t* spike;
|
||||
float* w_ptr;
|
||||
|
||||
// float* h_temp = new float[h_dim];
|
||||
// float* h_subsegment = new float[h_dim];
|
||||
|
||||
// Initialize the sub-segement
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] = h_init_ptr[counter];
|
||||
}
|
||||
|
||||
for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++)
|
||||
{
|
||||
if (epsilon_scale > 1E10)
|
||||
{
|
||||
temp_value = 1.0 / epsilon_scale;
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] *= temp_value;
|
||||
}
|
||||
|
||||
epsilon_scale = 1.0;
|
||||
}
|
||||
|
||||
spike = input_pointer + counter_spike * input_dim_c1;
|
||||
|
||||
if (*spike >= 0)
|
||||
{
|
||||
epsilon_subsegment =
|
||||
epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike];
|
||||
|
||||
w_ptr = weights_pointer + *spike * weights_dim_c0;
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp[counter] = h_subsegment[counter] * w_ptr[counter];
|
||||
}
|
||||
|
||||
h_temp_sum = 0.0;
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp_sum += h_temp[counter];
|
||||
}
|
||||
|
||||
if (h_temp_sum > 1E-10)
|
||||
{
|
||||
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp[counter] *= temp_value;
|
||||
}
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] += h_temp[counter];
|
||||
}
|
||||
|
||||
if (forgetting_offset_local > 0.0)
|
||||
{
|
||||
temp_value =
|
||||
epsilon_scale * epsilon_subsegment * forgetting_offset_local;
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] += temp_value;
|
||||
}
|
||||
|
||||
epsilon_scale *=
|
||||
1.0 + epsilon_subsegment * (1.0 + forgetting_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
epsilon_scale *= 1.0 + epsilon_subsegment * 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
temp_value = 1.0 / epsilon_scale;
|
||||
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_pointer[counter * h_dim_c1] =
|
||||
h_subsegment[counter] * temp_value;
|
||||
}
|
||||
|
||||
// delete[] h_temp;
|
||||
// delete[] h_subsegment;
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
__global__ void kernel_spike_generation(
|
||||
float* __restrict__ h_init_ptr,
|
||||
float* __restrict__ h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* __restrict__ weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* __restrict__ input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* __restrict__ epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* __restrict__ epsilon_t_pointer,
|
||||
size_t number_of_spikes,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
size_t dim_xy,
|
||||
size_t max_threadable_tasks,
|
||||
float* __restrict__ temp_memory_a,
|
||||
float* __restrict__ temp_memory_b
|
||||
)
|
||||
{
|
||||
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
if (idx < max_threadable_tasks)
|
||||
{
|
||||
float* h_ptr;
|
||||
float* epsilon_xy_ptr;
|
||||
int64_t* input_ptr;
|
||||
|
||||
float* temp_memory_ptr_a = temp_memory_a + idx * h_dim;
|
||||
float* temp_memory_ptr_b = temp_memory_b + idx * h_dim;
|
||||
|
||||
// int pattern_id = idx;
|
||||
int pattern_id = idx / dim_xy;
|
||||
int position_xy = idx - (pattern_id * dim_xy);
|
||||
|
||||
// size_t position_x = blockIdx.y;
|
||||
// size_t position_y = blockIdx.z;
|
||||
size_t position_x = position_xy / dim_y;
|
||||
size_t position_y = position_xy - (position_x * dim_y);
|
||||
|
||||
epsilon_xy_ptr = epsilon_xy_pointer +
|
||||
position_x * epsilon_xy_dim_c1 + position_y;
|
||||
|
||||
h_ptr = h_pointer +
|
||||
pattern_id * h_dim_c0 + position_x * h_dim_c2 + position_y;
|
||||
|
||||
input_ptr = input_pointer +
|
||||
pattern_id * input_dim_c0 + position_x * input_dim_c2 + position_y;
|
||||
|
||||
gpu_update_one_ip(
|
||||
h_init_ptr,
|
||||
h_ptr,
|
||||
h_dim_c1,
|
||||
h_dim,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_ptr,
|
||||
input_dim_c1,
|
||||
epsilon_xy_ptr,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_t_pointer,
|
||||
number_of_spikes,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local,
|
||||
temp_memory_ptr_a,
|
||||
temp_memory_ptr_b
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Let's face it... We need a better way to paralelize it...
|
||||
bool HDynamicCNNManyIP::gpu_update(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* epsilon_t_pointer,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
size_t number_of_spikes,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t number_of_pattern,
|
||||
size_t gpu_tuning_factor)
|
||||
{
|
||||
|
||||
cudaError_t status;
|
||||
assert((dim_x < 65535));
|
||||
assert((dim_y < 65535));
|
||||
|
||||
// // //////////////////////////////////////
|
||||
// // Get infos about the device
|
||||
// // //////////////////////////////////////
|
||||
|
||||
// int device;
|
||||
// cudaDeviceProp prop;
|
||||
|
||||
// status = cudaGetDevice(&device);
|
||||
// assert((status == cudaSuccess));
|
||||
// // std::cout << "Device ID: " << device << std::endl;
|
||||
|
||||
// status = cudaGetDeviceProperties(&prop, device);
|
||||
// assert((status == cudaSuccess));
|
||||
// // std::cout << "Device name: " << prop.name << std::endl;
|
||||
|
||||
// int _cuda_heap_size_in_mb = 16;
|
||||
// status = cudaDeviceSetLimit(cudaLimitMallocHeapSize, _cuda_heap_size_in_mb * (1 << 20));
|
||||
// assert((status == cudaSuccess));
|
||||
|
||||
// size_t pValue;
|
||||
// cudaDeviceGetLimit(&pValue, cudaLimitMallocHeapSize);
|
||||
// std::cout << pValue << " " << (pValue/(2*4*h_dim)) << std::endl;
|
||||
// exit(1);
|
||||
|
||||
|
||||
// //////////////////////////////////////
|
||||
// Calculate the distribution on the GPU
|
||||
// //////////////////////////////////////
|
||||
|
||||
int min_grid_size;
|
||||
int block_size;
|
||||
int grid_size;
|
||||
|
||||
size_t dynamic_s_mem_size = 0;
|
||||
size_t max_threadable_tasks = number_of_pattern * dim_x * dim_y;
|
||||
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator
|
||||
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
|
||||
(void*)kernel_spike_generation,
|
||||
dynamic_s_mem_size, max_threadable_tasks);
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
|
||||
// Maximum dimensionality of grid of thread blocks: 3
|
||||
// Maximum x -dimension of a grid of thread blocks: (2^31)-1
|
||||
// Maximum y- or z-dimension of a grid of thread blocks: 65535
|
||||
|
||||
// Reduce the automatic block size with our guess
|
||||
if ((gpu_tuning_factor > 0) && (gpu_tuning_factor < block_size))
|
||||
{
|
||||
block_size = int(gpu_tuning_factor);
|
||||
}
|
||||
// Round up according to array size
|
||||
// (I will separate x and y into other grid dimentsions soon)
|
||||
// grid_size = (number_of_pattern + block_size - 1) / block_size;
|
||||
grid_size = (max_threadable_tasks + block_size - 1) / block_size;
|
||||
|
||||
// std::cout << min_grid_size << std::endl;
|
||||
// std::cout << grid_size << std::endl;
|
||||
// std::cout << block_size << std::endl;
|
||||
// std::cout << max_threadable_tasks << std::endl;
|
||||
|
||||
//dim3 grid(grid_size, dim_x, dim_y);
|
||||
|
||||
float* temp_memory_a = nullptr;
|
||||
status = cudaMalloc((void**)&temp_memory_a, h_dim * max_threadable_tasks * sizeof(float));
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
float* temp_memory_b = nullptr;
|
||||
status = cudaMalloc((void**)&temp_memory_b, h_dim * max_threadable_tasks * sizeof(float));
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
|
||||
//kernel_spike_generation<<<grid, block_size >>>(
|
||||
kernel_spike_generation<<<grid_size, block_size >>>(
|
||||
h_init_ptr,
|
||||
h_pointer,
|
||||
h_dim_c0,
|
||||
h_dim_c1,
|
||||
h_dim_c2,
|
||||
h_dim,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
epsilon_xy_pointer,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_xy_dim_c1,
|
||||
epsilon_t_pointer,
|
||||
number_of_spikes,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local,
|
||||
dim_x,
|
||||
dim_y,
|
||||
(dim_x * dim_y),
|
||||
//number_of_pattern
|
||||
max_threadable_tasks,
|
||||
temp_memory_a,
|
||||
temp_memory_b
|
||||
);
|
||||
|
||||
status = cudaDeviceSynchronize();
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
status = cudaFree(temp_memory_a);
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
status = cudaFree(temp_memory_b);
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
|
||||
return true;
|
||||
};
|
111
network/CPP_Cuda/HDynamicCNNManyIP.h
Normal file
111
network/CPP_Cuda/HDynamicCNNManyIP.h
Normal file
|
@ -0,0 +1,111 @@
|
|||
#ifndef SRC_HDYNAMICCNNMANYIP_H_
|
||||
#define SRC_HDYNAMICCNNMANYIP_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
|
||||
class HDynamicCNNManyIP
|
||||
{
|
||||
public:
|
||||
HDynamicCNNManyIP();
|
||||
~HDynamicCNNManyIP();
|
||||
|
||||
bool update_entrypoint(
|
||||
int64_t h_pointer_addr,
|
||||
int64_t h_dim_0,
|
||||
int64_t h_dim_1,
|
||||
int64_t h_dim_2,
|
||||
int64_t h_dim_3,
|
||||
int64_t epsilon_xy_pointer_addr,
|
||||
int64_t epsilon_xy_dim_0,
|
||||
int64_t epsilon_xy_dim_1,
|
||||
int64_t epsilon_xy_dim_2,
|
||||
int64_t epsilon_t_pointer_addr,
|
||||
int64_t epsilon_t_dim_0,
|
||||
int64_t weights_pointer_addr,
|
||||
int64_t weights_dim_0,
|
||||
int64_t weights_dim_1,
|
||||
int64_t input_pointer_addr,
|
||||
int64_t input_dim_0,
|
||||
int64_t input_dim_1,
|
||||
int64_t input_dim_2,
|
||||
int64_t input_dim_3,
|
||||
int64_t init_vector_pointer_addr,
|
||||
int64_t init_vector_dim_0,
|
||||
int64_t number_of_processes,
|
||||
float forgetting_offset,
|
||||
int64_t gpu_tuning_factor);
|
||||
|
||||
private:
|
||||
|
||||
bool update(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* epsilon_t_pointer,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
size_t number_of_spikes,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t pattern_id);
|
||||
|
||||
void update_one_ip(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c1,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
float* epsilon_t_pointer,
|
||||
size_t number_of_spikes,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local);
|
||||
|
||||
bool gpu_update(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* epsilon_t_pointer,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
size_t number_of_spikes,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t number_of_pattern,
|
||||
size_t gpu_tuning_factor);
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif /* SRC_HDYNAMICCNNMANYIP_H_ */
|
67
network/CPP_Cuda/Makefile
Normal file
67
network/CPP_Cuda/Makefile
Normal file
|
@ -0,0 +1,67 @@
|
|||
# Change to your python bin directory (tested with Python 3.10.4)
|
||||
PYBIN=~/P3.10GPU/bin/
|
||||
NVCC=/usr/local/cuda-12/bin/nvcc -allow-unsupported-compiler
|
||||
CC=/usr/lib64/ccache/clang++
|
||||
|
||||
PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes`
|
||||
PARAMETERS_O= -O3 -std=c++14 $(PYBIND11INCLUDE) -ccbin=$(CC) \
|
||||
-Xcompiler "-fPIC -Wall -fopenmp=libomp"
|
||||
|
||||
PARAMETERS_Linker=-Xcompiler "-shared -lm -lomp -lstdc++ -Wall"
|
||||
|
||||
PYPOSTFIX=`$(PYBIN)python3-config --extension-suffix`
|
||||
|
||||
|
||||
all: PyHDynamicCNNManyIP \
|
||||
PySpikeGeneration2DManyIP \
|
||||
PyMultiApp
|
||||
|
||||
#######################
|
||||
|
||||
HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cu
|
||||
$(NVCC) $(PARAMETERS_O) -c HDynamicCNNManyIP.cu -o HDynamicCNNManyIP.o
|
||||
|
||||
PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp
|
||||
$(NVCC) $(PARAMETERS_O) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o
|
||||
|
||||
PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
|
||||
$(NVCC) $(PARAMETERS_Linker) -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
|
||||
cp PyHDynamicCNNManyIP PyHDynamicCNNManyIP$(PYPOSTFIX)
|
||||
$(PYBIN)python3 pybind11_auto_pyi.py
|
||||
|
||||
#######################
|
||||
|
||||
SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cu
|
||||
$(NVCC) $(PARAMETERS_O) -c SpikeGeneration2DManyIP.cu -o SpikeGeneration2DManyIP.o
|
||||
|
||||
PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp
|
||||
$(NVCC) $(PARAMETERS_O) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o
|
||||
|
||||
PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
|
||||
$(NVCC) $(PARAMETERS_Linker) -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
|
||||
cp PySpikeGeneration2DManyIP PySpikeGeneration2DManyIP$(PYPOSTFIX)
|
||||
$(PYBIN)python3 pybind11_auto_pyi.py
|
||||
|
||||
|
||||
#######################
|
||||
|
||||
MultiApp.o: MultiApp.h MultiApp.cu approximation_multiplication_function.cpp \
|
||||
gpu_approximation_multiplication_function.cu error_term.cpp gpu_error_term.cu
|
||||
$(NVCC) $(PARAMETERS_O) -c MultiApp.cu -o MultiApp.o
|
||||
|
||||
PyMultiApp.o: MultiApp.h PyMultiApp.cpp
|
||||
$(NVCC) $(PARAMETERS_O) -c PyMultiApp.cpp -o PyMultiApp.o
|
||||
|
||||
PyMultiApp: MultiApp.o PyMultiApp.o
|
||||
$(NVCC) $(PARAMETERS_Linker) -o PyMultiApp MultiApp.o PyMultiApp.o
|
||||
cp PyMultiApp PyMultiApp$(PYPOSTFIX)
|
||||
$(PYBIN)python3 pybind11_auto_pyi.py
|
||||
|
||||
#######################
|
||||
clean:
|
||||
rm -f PyHDynamicCNNManyIP
|
||||
rm -f PySpikeGeneration2DManyIP
|
||||
rm -f PyMultiApp
|
||||
rm -f *.o
|
||||
rm -f *.so
|
||||
|
313
network/CPP_Cuda/MultiApp.cu
Normal file
313
network/CPP_Cuda/MultiApp.cu
Normal file
|
@ -0,0 +1,313 @@
|
|||
#include "MultiApp.h"
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "approximation_multiplication_function.cpp"
|
||||
#include "gpu_approximation_multiplication_function.cu"
|
||||
|
||||
MultiApp::MultiApp()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
MultiApp::~MultiApp()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
bool MultiApp::update(float* np_input_pointer,
|
||||
float* np_weight_pointer,
|
||||
float* np_output_pointer, int64_t pattern_dim,
|
||||
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
|
||||
int64_t input_channel_dim, int64_t id_pattern,
|
||||
bool approximation_enable, int64_t number_of_trunc_bits,
|
||||
int64_t number_of_frac_bits)
|
||||
{
|
||||
|
||||
assert((id_pattern >= 0));
|
||||
assert((id_pattern < pattern_dim));
|
||||
|
||||
float* np_input_pointer_pattern;
|
||||
float* np_output_pointer_pattern;
|
||||
|
||||
float* input_ptr;
|
||||
float* output_ptr;
|
||||
float* w_ptr;
|
||||
|
||||
uint64_t pattern_size = input_channel_dim;
|
||||
|
||||
std::vector<float> ap_h_vector;
|
||||
ap_h_vector.resize(pattern_size);
|
||||
float* ap_h_ptr = ap_h_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_x_vector;
|
||||
ap_x_vector.resize(pattern_size);
|
||||
uint32_t* ap_x_ptr = ap_x_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_y_vector;
|
||||
ap_y_vector.resize(pattern_size);
|
||||
uint32_t* ap_y_ptr = ap_y_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_x_exponent_vector;
|
||||
ap_x_exponent_vector.resize(pattern_size);
|
||||
uint32_t* ap_x_exponent_ptr = ap_x_exponent_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_y_exponent_vector;
|
||||
ap_y_exponent_vector.resize(pattern_size);
|
||||
uint32_t* ap_y_exponent_ptr = ap_y_exponent_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_h_exponent_vector;
|
||||
ap_h_exponent_vector.resize(pattern_size);
|
||||
uint32_t* ap_h_exponent_ptr = ap_h_exponent_vector.data();
|
||||
|
||||
std::vector<uint64_t> ap_res_vector;
|
||||
ap_res_vector.resize(pattern_size);
|
||||
uint64_t* ap_res_ptr = ap_res_vector.data();
|
||||
|
||||
uint32_t ap_mask = static_cast<uint64_t>(pow(2, number_of_trunc_bits)) - 1;
|
||||
|
||||
std::vector<uint32_t> sign_temp_vector;
|
||||
sign_temp_vector.resize(pattern_size);
|
||||
uint32_t* sign_temp_ptr = sign_temp_vector.data();
|
||||
|
||||
uint64_t input_pattern_size = input_channel_dim * x_dim * y_dim;
|
||||
uint64_t output_pattern_size = feature_dim * x_dim * y_dim;
|
||||
|
||||
np_input_pointer_pattern = np_input_pointer + id_pattern * input_pattern_size;
|
||||
np_output_pointer_pattern =
|
||||
np_output_pointer + id_pattern * output_pattern_size;
|
||||
|
||||
uint64_t counter;
|
||||
|
||||
uint64_t counter_x;
|
||||
uint64_t counter_y;
|
||||
uint64_t counter_feature;
|
||||
uint64_t pos_xy;
|
||||
uint64_t pos_xy_if;
|
||||
|
||||
float temp_sum;
|
||||
|
||||
uint64_t pattern_c_2 = x_dim * y_dim;
|
||||
|
||||
for (counter_x = 0; counter_x < x_dim; counter_x++)
|
||||
{
|
||||
for (counter_y = 0; counter_y < y_dim; counter_y++)
|
||||
{
|
||||
pos_xy = counter_y + counter_x * y_dim;
|
||||
for (counter_feature = 0; counter_feature < feature_dim;
|
||||
counter_feature++)
|
||||
{
|
||||
pos_xy_if = counter_feature * pattern_c_2 + pos_xy;
|
||||
|
||||
input_ptr = np_input_pointer_pattern + pos_xy;
|
||||
output_ptr = np_output_pointer_pattern + pos_xy_if;
|
||||
w_ptr = np_weight_pointer + counter_feature * input_channel_dim;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_size; counter++)
|
||||
{
|
||||
ap_h_ptr[counter] = input_ptr[counter * pattern_c_2];
|
||||
}
|
||||
|
||||
approximation_multiplication_function(
|
||||
ap_h_ptr, w_ptr, pattern_size, number_of_trunc_bits,
|
||||
number_of_frac_bits, ap_x_ptr, ap_y_ptr, ap_x_exponent_ptr,
|
||||
ap_y_exponent_ptr, ap_h_exponent_ptr, ap_mask, ap_res_ptr,
|
||||
sign_temp_ptr, approximation_enable);
|
||||
|
||||
temp_sum = 0.0;
|
||||
#pragma omp simd reduction(+ \
|
||||
: temp_sum)
|
||||
for (counter = 0; counter < pattern_size; counter++)
|
||||
{
|
||||
temp_sum += ap_h_ptr[counter];
|
||||
}
|
||||
|
||||
output_ptr[0] = temp_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool MultiApp::update_with_init_vector_multi_pattern(
|
||||
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
|
||||
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
|
||||
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
|
||||
int64_t number_of_processes, bool approximation_enable,
|
||||
int64_t number_of_trunc_bits, int64_t number_of_frac)
|
||||
{
|
||||
|
||||
|
||||
|
||||
int64_t number_of_pattern = pattern_dim;
|
||||
int64_t pattern_id;
|
||||
|
||||
float* np_input_pointer = (float*)np_input_pointer_addr;
|
||||
float* np_weight_pointer = (float*)np_weight_pointer_addr;
|
||||
float* np_output_pointer = (float*)np_output_pointer_addr;
|
||||
|
||||
assert((np_input_pointer != nullptr));
|
||||
assert((np_output_pointer != nullptr));
|
||||
assert((np_weight_pointer != nullptr));
|
||||
|
||||
assert((pattern_dim > 0));
|
||||
assert((feature_dim > 0));
|
||||
assert((x_dim > 0));
|
||||
assert((y_dim > 0));
|
||||
assert((input_channel_dim > 0));
|
||||
|
||||
if (number_of_processes > 0)
|
||||
{
|
||||
omp_set_num_threads(number_of_processes);
|
||||
// For debugging: Only one thread
|
||||
// omp_set_num_threads(1);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
|
||||
{
|
||||
|
||||
update(np_input_pointer, np_weight_pointer,
|
||||
np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim,
|
||||
input_channel_dim, pattern_id, approximation_enable,
|
||||
number_of_trunc_bits, number_of_frac);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
update_gpu(np_input_pointer, np_weight_pointer,
|
||||
np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim,
|
||||
input_channel_dim, approximation_enable,
|
||||
number_of_trunc_bits, number_of_frac);
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
__global__ void kernel_approx_multiplication(float* __restrict__ input_pointer, float* __restrict__ weight_pointer,
|
||||
float* __restrict__ output_pointer, uint64_t pattern_dim,
|
||||
uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim,
|
||||
uint64_t input_channel_dim, size_t max_threadable_tasks,
|
||||
uint64_t input_index_scale, uint64_t number_of_frac_bits,
|
||||
bool approximation_enable, uint64_t number_of_trunc_bits,
|
||||
uint32_t ap_mask)
|
||||
{
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
if (idx < max_threadable_tasks)
|
||||
{
|
||||
int pattern_id = idx / feature_dim;
|
||||
int feature_id = idx - (pattern_id * feature_dim);
|
||||
int x_id = blockIdx.y;
|
||||
int y_id = blockIdx.z;
|
||||
|
||||
float* weight_pointer_sub = weight_pointer + feature_id * input_channel_dim;
|
||||
float* input_pointer_sub = input_pointer + pattern_id * input_channel_dim * x_dim * y_dim + x_id * y_dim + y_id;
|
||||
float* output_pointer_sub = output_pointer +
|
||||
pattern_id * feature_dim * x_dim * y_dim +
|
||||
feature_id * x_dim * y_dim + x_id * y_dim + y_id;
|
||||
*output_pointer_sub = 0.0;
|
||||
|
||||
size_t counter;
|
||||
for (counter = 0; counter < input_channel_dim; counter++)
|
||||
{
|
||||
*output_pointer_sub += gpu_approximation_multiplication_function(
|
||||
weight_pointer_sub[counter],
|
||||
input_pointer_sub[counter * input_index_scale],
|
||||
number_of_frac_bits, approximation_enable,
|
||||
number_of_trunc_bits, ap_mask);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool MultiApp::update_gpu(float* np_input_pointer,
|
||||
float* np_weight_pointer,
|
||||
float* np_output_pointer, uint64_t pattern_dim,
|
||||
uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim,
|
||||
uint64_t input_channel_dim,
|
||||
bool approximation_enable, uint64_t number_of_trunc_bits,
|
||||
uint64_t number_of_frac_bits)
|
||||
{
|
||||
|
||||
uint32_t ap_mask = static_cast<uint64_t>(pow(2, number_of_trunc_bits)) - 1;
|
||||
// std::cout << approximation_enable << std::endl;
|
||||
// std::cout << number_of_trunc_bits << std::endl;
|
||||
// std::cout << number_of_frac_bits << std::endl;
|
||||
|
||||
cudaError_t status;
|
||||
assert((x_dim < 65535));
|
||||
assert((y_dim < 65535));
|
||||
|
||||
// //////////////////////////////////////
|
||||
// Get infos about the device
|
||||
// //////////////////////////////////////
|
||||
|
||||
int device;
|
||||
cudaDeviceProp prop;
|
||||
|
||||
status = cudaGetDevice(&device);
|
||||
assert((status == cudaSuccess));
|
||||
// std::cout << "Device ID: " << device << std::endl;
|
||||
|
||||
status = cudaGetDeviceProperties(&prop, device);
|
||||
assert((status == cudaSuccess));
|
||||
// std::cout << "Device name: " << prop.name << std::endl;
|
||||
|
||||
// //////////////////////////////////////
|
||||
// Calculate the distribution on the GPU
|
||||
// //////////////////////////////////////
|
||||
|
||||
int min_grid_size;
|
||||
int block_size;
|
||||
int grid_size;
|
||||
|
||||
size_t dynamic_s_mem_size = 0;
|
||||
size_t max_threadable_tasks = pattern_dim * feature_dim * x_dim * y_dim;
|
||||
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator
|
||||
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
|
||||
(void*)kernel_approx_multiplication,
|
||||
dynamic_s_mem_size, max_threadable_tasks);
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
|
||||
// Maximum dimensionality of grid of thread blocks: 3
|
||||
// Maximum x -dimension of a grid of thread blocks: (2^31)-1
|
||||
// Maximum y- or z-dimension of a grid of thread blocks: 65535
|
||||
|
||||
// Round up according to array size
|
||||
grid_size = ((pattern_dim * feature_dim) + block_size - 1) / block_size;
|
||||
|
||||
// std::cout << min_grid_size << std::endl;
|
||||
// std::cout << grid_size << std::endl;
|
||||
// std::cout << block_size << std::endl;
|
||||
// std::cout << max_threadable_tasks << std::endl;
|
||||
|
||||
dim3 grid(grid_size, x_dim, y_dim);
|
||||
|
||||
kernel_approx_multiplication<<<grid, block_size>>>(np_input_pointer,
|
||||
np_weight_pointer,
|
||||
np_output_pointer,
|
||||
pattern_dim,
|
||||
feature_dim,
|
||||
x_dim,
|
||||
y_dim,
|
||||
input_channel_dim,
|
||||
(pattern_dim * feature_dim),
|
||||
(x_dim * y_dim),
|
||||
number_of_frac_bits,
|
||||
approximation_enable,
|
||||
number_of_trunc_bits,
|
||||
ap_mask);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
return true;
|
||||
};
|
39
network/CPP_Cuda/MultiApp.h
Normal file
39
network/CPP_Cuda/MultiApp.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#ifndef SRC_MultiApp_H_
|
||||
#define SRC_MultiApp_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
|
||||
class MultiApp
|
||||
{
|
||||
public:
|
||||
MultiApp();
|
||||
~MultiApp();
|
||||
|
||||
bool update(float *np_input_pointer, float *np_weight_pointer,
|
||||
float *np_output_pointer, int64_t pattern_dim,
|
||||
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
|
||||
int64_t input_channel_dim, int64_t id_pattern,
|
||||
bool approximation_enable, int64_t number_of_trunc_bits,
|
||||
int64_t number_of_frac);
|
||||
|
||||
bool update_gpu(float *input_pointer, float *weight_pointer,
|
||||
float *output_pointer, uint64_t pattern_dim,
|
||||
uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim,
|
||||
uint64_t input_channel_dim,
|
||||
bool approximation_enable, uint64_t number_of_trunc_bits,
|
||||
uint64_t number_of_frac);
|
||||
|
||||
bool update_with_init_vector_multi_pattern(
|
||||
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
|
||||
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
|
||||
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
|
||||
int64_t number_of_processes, bool approximation_enable,
|
||||
int64_t number_of_trunc_bits, int64_t number_of_frac);
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
#endif /* SRC_MultiApp_H_ */
|
14
network/CPP_Cuda/PyHDynamicCNNManyIP.cpp
Normal file
14
network/CPP_Cuda/PyHDynamicCNNManyIP.cpp
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "HDynamicCNNManyIP.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(PyHDynamicCNNManyIP, m)
|
||||
{
|
||||
m.doc() = "HDynamicCNNManyIP Module";
|
||||
py::class_<HDynamicCNNManyIP>(m, "HDynamicCNNManyIP")
|
||||
.def(py::init<>())
|
||||
.def("update",
|
||||
&HDynamicCNNManyIP::update_entrypoint);
|
||||
}
|
18
network/CPP_Cuda/PyHDynamicCNNManyIP.pyi
Normal file
18
network/CPP_Cuda/PyHDynamicCNNManyIP.pyi
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
|
||||
#
|
||||
|
||||
"""HDynamicCNNManyIP Module"""
|
||||
from __future__ import annotations
|
||||
import PyHDynamicCNNManyIP
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"HDynamicCNNManyIP"
|
||||
]
|
||||
|
||||
|
||||
class HDynamicCNNManyIP():
|
||||
def __init__(self) -> None: ...
|
||||
def update(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int, arg22: float, arg23: int) -> bool: ...
|
||||
pass
|
14
network/CPP_Cuda/PyMultiApp.cpp
Normal file
14
network/CPP_Cuda/PyMultiApp.cpp
Normal file
|
@ -0,0 +1,14 @@
|
|||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "MultiApp.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(PyMultiApp, m) {
|
||||
m.doc() = "MultiApp Module";
|
||||
py::class_<MultiApp>(m, "MultiApp")
|
||||
.def(py::init<>())
|
||||
.def("update_with_init_vector_multi_pattern",
|
||||
&MultiApp::update_with_init_vector_multi_pattern);
|
||||
}
|
18
network/CPP_Cuda/PyMultiApp.pyi
Normal file
18
network/CPP_Cuda/PyMultiApp.pyi
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
|
||||
#
|
||||
|
||||
"""MultiApp Module"""
|
||||
from __future__ import annotations
|
||||
import PyMultiApp
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"MultiApp"
|
||||
]
|
||||
|
||||
|
||||
class MultiApp():
|
||||
def __init__(self) -> None: ...
|
||||
def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: bool, arg10: int, arg11: int) -> bool: ...
|
||||
pass
|
15
network/CPP_Cuda/PySpikeGeneration2DManyIP.cpp
Normal file
15
network/CPP_Cuda/PySpikeGeneration2DManyIP.cpp
Normal file
|
@ -0,0 +1,15 @@
|
|||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "SpikeGeneration2DManyIP.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(PySpikeGeneration2DManyIP, m)
|
||||
{
|
||||
m.doc() = "SpikeGeneration2DManyIP Module";
|
||||
py::class_<SpikeGeneration2DManyIP>(m, "SpikeGeneration2DManyIP")
|
||||
.def(py::init<>())
|
||||
.def("spike_generation",
|
||||
&SpikeGeneration2DManyIP::spike_generation_entrypoint);
|
||||
}
|
18
network/CPP_Cuda/PySpikeGeneration2DManyIP.pyi
Normal file
18
network/CPP_Cuda/PySpikeGeneration2DManyIP.pyi
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
|
||||
#
|
||||
|
||||
"""SpikeGeneration2DManyIP Module"""
|
||||
from __future__ import annotations
|
||||
import PySpikeGeneration2DManyIP
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"SpikeGeneration2DManyIP"
|
||||
]
|
||||
|
||||
|
||||
class SpikeGeneration2DManyIP():
|
||||
def __init__(self) -> None: ...
|
||||
def spike_generation(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ...
|
||||
pass
|
390
network/CPP_Cuda/SpikeGeneration2DManyIP.cu
Normal file
390
network/CPP_Cuda/SpikeGeneration2DManyIP.cu
Normal file
|
@ -0,0 +1,390 @@
|
|||
#include "SpikeGeneration2DManyIP.h"
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
SpikeGeneration2DManyIP::SpikeGeneration2DManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
bool SpikeGeneration2DManyIP::spike_generation_entrypoint(
|
||||
int64_t input_pointer_addr, int64_t input_dim_0,
|
||||
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
|
||||
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
|
||||
int64_t random_values_dim_1, int64_t random_values_dim_2,
|
||||
int64_t random_values_dim_3, int64_t output_pointer_addr,
|
||||
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
|
||||
int64_t output_dim_3, int64_t number_of_cpu_processes)
|
||||
{
|
||||
|
||||
float* input_pointer = (float*)input_pointer_addr;
|
||||
float* random_values_pointer = (float*)random_values_pointer_addr;
|
||||
int64_t* output_pointer = (int64_t*)output_pointer_addr;
|
||||
|
||||
// Input
|
||||
assert((input_pointer != nullptr));
|
||||
assert((input_dim_0 > 0));
|
||||
assert((input_dim_1 > 0));
|
||||
assert((input_dim_2 > 0));
|
||||
assert((input_dim_3 > 0));
|
||||
|
||||
// Random
|
||||
assert((random_values_pointer != nullptr));
|
||||
assert((random_values_dim_0 > 0));
|
||||
assert((random_values_dim_1 > 0));
|
||||
assert((random_values_dim_2 > 0));
|
||||
assert((random_values_dim_3 > 0));
|
||||
|
||||
// Output
|
||||
assert((output_pointer != nullptr));
|
||||
assert((output_dim_0 > 0));
|
||||
assert((output_dim_1 > 0));
|
||||
assert((output_dim_2 > 0));
|
||||
assert((output_dim_3 > 0));
|
||||
|
||||
// Input
|
||||
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c1 = input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c2 = input_dim_3;
|
||||
|
||||
// Random
|
||||
size_t random_values_dim_c0 =
|
||||
random_values_dim_1 * random_values_dim_2 * random_values_dim_3;
|
||||
size_t random_values_dim_c1 =
|
||||
random_values_dim_2 * random_values_dim_3;
|
||||
size_t random_values_dim_c2 = random_values_dim_3;
|
||||
|
||||
// Output
|
||||
size_t output_dim_c0 =
|
||||
output_dim_1 * output_dim_2 * output_dim_3;
|
||||
size_t output_dim_c1 = output_dim_2 * output_dim_3;
|
||||
size_t output_dim_c2 = output_dim_3;
|
||||
|
||||
size_t number_of_pattern = input_dim_0;
|
||||
size_t h_dim = input_dim_1;
|
||||
size_t spike_dim = output_dim_1;
|
||||
size_t x_dim = output_dim_2;
|
||||
size_t y_dim = output_dim_2;
|
||||
|
||||
if (number_of_cpu_processes > 0)
|
||||
{
|
||||
|
||||
omp_set_num_threads(number_of_cpu_processes);
|
||||
// DEBUG:
|
||||
// omp_set_num_threads(1);
|
||||
|
||||
size_t pattern_id;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
|
||||
{
|
||||
spike_generation(
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
random_values_pointer,
|
||||
random_values_dim_c0,
|
||||
random_values_dim_c1,
|
||||
random_values_dim_c2,
|
||||
output_pointer,
|
||||
output_dim_c0,
|
||||
output_dim_c1,
|
||||
output_dim_c2,
|
||||
x_dim,
|
||||
y_dim,
|
||||
spike_dim,
|
||||
h_dim,
|
||||
pattern_id);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gpu_spike_generation(
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
random_values_pointer,
|
||||
random_values_dim_c0,
|
||||
random_values_dim_c1,
|
||||
random_values_dim_c2,
|
||||
output_pointer,
|
||||
output_dim_c0,
|
||||
output_dim_c1,
|
||||
output_dim_c2,
|
||||
x_dim,
|
||||
y_dim,
|
||||
spike_dim,
|
||||
h_dim,
|
||||
number_of_pattern);
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool SpikeGeneration2DManyIP::spike_generation(
|
||||
float* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t pattern_id)
|
||||
{
|
||||
|
||||
size_t counter;
|
||||
size_t counter_x = 0;
|
||||
size_t counter_y = 0;
|
||||
|
||||
float* p_ptr = nullptr;
|
||||
int64_t* out_ptr = nullptr;
|
||||
float* rand_ptr = nullptr;
|
||||
|
||||
for (counter_x = 0; counter_x < x_dim; counter_x++)
|
||||
{
|
||||
for (counter_y = 0; counter_y < y_dim; counter_y++)
|
||||
{
|
||||
p_ptr = input_pointer + pattern_id * input_dim_c0 +
|
||||
counter_x * input_dim_c2 + counter_y;
|
||||
// + counter * input_dim_c1
|
||||
|
||||
out_ptr = output_pointer + pattern_id * output_dim_c0 +
|
||||
counter_x * output_dim_c2 + counter_y;
|
||||
// + counter * output_dim_c1
|
||||
|
||||
rand_ptr = random_values_pointer +
|
||||
pattern_id * random_values_dim_c0 +
|
||||
counter_x * random_values_dim_c2 + counter_y;
|
||||
// + counter * random_values_dim_c1
|
||||
|
||||
for (counter = 0; counter < spike_dim; counter++)
|
||||
{
|
||||
out_ptr[counter * output_dim_c1] = lower_bound(p_ptr,
|
||||
h_dim,
|
||||
input_dim_c1,
|
||||
rand_ptr[counter * random_values_dim_c1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// algorithmic idea stolen from libc++
|
||||
size_t SpikeGeneration2DManyIP::lower_bound(float* data_ptr,
|
||||
size_t data_length,
|
||||
size_t data_ptr_stride,
|
||||
float compare_to_value)
|
||||
{
|
||||
|
||||
size_t start_of_range = 0;
|
||||
size_t length_of_range = data_length;
|
||||
|
||||
while (length_of_range != 0)
|
||||
{
|
||||
size_t half_length = length_of_range >> 1;
|
||||
size_t actual_position = start_of_range + half_length;
|
||||
|
||||
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
|
||||
{
|
||||
start_of_range = ++actual_position;
|
||||
length_of_range -= half_length + 1;
|
||||
}
|
||||
else
|
||||
length_of_range = half_length;
|
||||
}
|
||||
return start_of_range;
|
||||
};
|
||||
|
||||
__device__ size_t gpu_lower_bound(float* __restrict__ data_ptr,
|
||||
size_t data_length,
|
||||
size_t data_ptr_stride,
|
||||
float compare_to_value)
|
||||
{
|
||||
|
||||
size_t start_of_range = 0;
|
||||
size_t length_of_range = data_length;
|
||||
|
||||
while (length_of_range != 0)
|
||||
{
|
||||
size_t half_length = length_of_range >> 1;
|
||||
size_t actual_position = start_of_range + half_length;
|
||||
|
||||
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
|
||||
{
|
||||
start_of_range = ++actual_position;
|
||||
length_of_range -= half_length + 1;
|
||||
}
|
||||
else
|
||||
length_of_range = half_length;
|
||||
}
|
||||
return start_of_range;
|
||||
};
|
||||
|
||||
__global__ void kernel_spike_generation(
|
||||
float* __restrict__ input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* __restrict__ random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* __restrict__ output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t max_threadable_tasks)
|
||||
{
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
if (idx < max_threadable_tasks)
|
||||
{
|
||||
|
||||
size_t pattern_id = idx / spike_dim;
|
||||
size_t position_spike = idx - (pattern_id * spike_dim);
|
||||
|
||||
size_t position_x = blockIdx.y;
|
||||
size_t position_y = blockIdx.z;
|
||||
|
||||
float* p_ptr = input_pointer + pattern_id * input_dim_c0 +
|
||||
position_x * input_dim_c2 + position_y;
|
||||
|
||||
int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 +
|
||||
position_x * output_dim_c2 + position_y
|
||||
+ position_spike * output_dim_c1;
|
||||
|
||||
float* rand_ptr = random_values_pointer +
|
||||
pattern_id * random_values_dim_c0 +
|
||||
position_x * random_values_dim_c2 + position_y
|
||||
+ position_spike * random_values_dim_c1;
|
||||
|
||||
*out_ptr = gpu_lower_bound(p_ptr,
|
||||
h_dim,
|
||||
input_dim_c1,
|
||||
*rand_ptr);
|
||||
}
|
||||
};
|
||||
|
||||
bool SpikeGeneration2DManyIP::gpu_spike_generation(
|
||||
float* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t number_of_pattern)
|
||||
{
|
||||
cudaError_t status;
|
||||
assert((x_dim < 65535));
|
||||
assert((y_dim < 65535));
|
||||
|
||||
// // //////////////////////////////////////
|
||||
// // Get infos about the device
|
||||
// // //////////////////////////////////////
|
||||
|
||||
// int device;
|
||||
// cudaDeviceProp prop;
|
||||
|
||||
// status = cudaGetDevice(&device);
|
||||
// assert((status == cudaSuccess));
|
||||
// // std::cout << "Device ID: " << device << std::endl;
|
||||
|
||||
// status = cudaGetDeviceProperties(&prop, device);
|
||||
// assert((status == cudaSuccess));
|
||||
// // std::cout << "Device name: " << prop.name << std::endl;
|
||||
|
||||
// //////////////////////////////////////
|
||||
// Calculate the distribution on the GPU
|
||||
// //////////////////////////////////////
|
||||
|
||||
int min_grid_size;
|
||||
int block_size;
|
||||
int grid_size;
|
||||
|
||||
size_t dynamic_s_mem_size = 0;
|
||||
size_t max_threadable_tasks = number_of_pattern * spike_dim * x_dim * y_dim;
|
||||
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator
|
||||
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
|
||||
(void*)kernel_spike_generation,
|
||||
dynamic_s_mem_size, max_threadable_tasks);
|
||||
assert((status == cudaSuccess));
|
||||
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
|
||||
// Maximum dimensionality of grid of thread blocks: 3
|
||||
// Maximum x -dimension of a grid of thread blocks: (2^31)-1
|
||||
// Maximum y- or z-dimension of a grid of thread blocks: 65535
|
||||
|
||||
// Round up according to array size
|
||||
// (I will separate x and y into other grid dimentsions soon)
|
||||
grid_size = ((number_of_pattern * spike_dim) + block_size - 1) / block_size;
|
||||
|
||||
// std::cout << min_grid_size << std::endl;
|
||||
// std::cout << grid_size << std::endl;
|
||||
// std::cout << block_size << std::endl;
|
||||
// std::cout << max_threadable_tasks << std::endl;
|
||||
|
||||
dim3 grid(grid_size, x_dim, y_dim);
|
||||
|
||||
|
||||
kernel_spike_generation<<<grid, block_size >>>(
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
random_values_pointer,
|
||||
random_values_dim_c0,
|
||||
random_values_dim_c1,
|
||||
random_values_dim_c2,
|
||||
output_pointer,
|
||||
output_dim_c0,
|
||||
output_dim_c1,
|
||||
output_dim_c2,
|
||||
x_dim,
|
||||
y_dim,
|
||||
spike_dim,
|
||||
h_dim,
|
||||
(number_of_pattern * spike_dim));
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
return true;
|
||||
};
|
68
network/CPP_Cuda/SpikeGeneration2DManyIP.h
Normal file
68
network/CPP_Cuda/SpikeGeneration2DManyIP.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
#ifndef SRC_SPIKEGENERATION2DMANYIP_H_
|
||||
#define SRC_SPIKEGENERATION2DMANYIP_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
|
||||
class SpikeGeneration2DManyIP
|
||||
{
|
||||
public:
|
||||
SpikeGeneration2DManyIP();
|
||||
~SpikeGeneration2DManyIP();
|
||||
|
||||
bool spike_generation_entrypoint(
|
||||
int64_t input_pointer_addr, int64_t input_dim_0,
|
||||
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
|
||||
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
|
||||
int64_t random_values_dim_1, int64_t random_values_dim_2,
|
||||
int64_t random_values_dim_3, int64_t output_pointer_addr,
|
||||
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
|
||||
int64_t output_dim_3, int64_t number_of_cpu_processes);
|
||||
|
||||
bool spike_generation(
|
||||
float* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t pattern_id);
|
||||
|
||||
bool gpu_spike_generation(
|
||||
float* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t number_of_pattern);
|
||||
|
||||
private:
|
||||
size_t lower_bound(float* data_ptr, size_t data_length,
|
||||
size_t data_ptr_stride,
|
||||
float compare_to_value);
|
||||
};
|
||||
|
||||
#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */
|
138
network/CPP_Cuda/approximation_multiplication_function.cpp
Normal file
138
network/CPP_Cuda/approximation_multiplication_function.cpp
Normal file
|
@ -0,0 +1,138 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
|
||||
#include "error_term.cpp"
|
||||
|
||||
// Best way to plot the bits
|
||||
// std::cout << std::bitset<32>(ap_y_ptr[1]) << "\n";
|
||||
|
||||
// The result needs to be written back into h_pointer (which contains h)
|
||||
// Don't write to w_pointer.
|
||||
void approximation_multiplication_function(
|
||||
float *h_pointer, float *w_pointer, int64_t pattern_length,
|
||||
uint64_t number_of_trunc_bits, uint64_t number_of_frac_bits,
|
||||
uint32_t *ap_x_ptr, uint32_t *ap_y_ptr, uint32_t *ap_x_exponent_ptr,
|
||||
uint32_t *ap_y_exponent_ptr, uint32_t *ap_h_exponent_ptr, uint32_t ap_mask,
|
||||
uint64_t *ap_res_ptr, uint32_t *sign_temp_ptr, bool approximation_enable) {
|
||||
uint64_t counter;
|
||||
|
||||
uint32_t *w_pointer_mod = (uint32_t *)w_pointer;
|
||||
uint32_t *h_pointer_mod = (uint32_t *)h_pointer;
|
||||
|
||||
// Calculate the new sign
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
sign_temp_ptr[counter] = (w_pointer_mod[counter] & 0x80000000) ^
|
||||
(h_pointer_mod[counter] & 0x80000000);
|
||||
}
|
||||
|
||||
// Extract the exponent
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_x_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
|
||||
}
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_y_exponent_ptr[counter] = (w_pointer_mod[counter] << 1) >> 24;
|
||||
}
|
||||
|
||||
// Cast and "normalize"
|
||||
uint64_t shift_value = 32 - number_of_frac_bits;
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_x_ptr[counter] =
|
||||
((h_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_y_ptr[counter] =
|
||||
((w_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
|
||||
}
|
||||
|
||||
// Make the zero -g-r-e-a-t- correct again
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
if (h_pointer[counter] == 0) {
|
||||
ap_x_ptr[counter] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
if (w_pointer[counter] == 0) {
|
||||
ap_y_ptr[counter] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// res = x*y
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_res_ptr[counter] = static_cast<uint64_t>(ap_x_ptr[counter]) * static_cast<uint64_t>(ap_y_ptr[counter]);
|
||||
}
|
||||
|
||||
uint32_t temp;
|
||||
if (approximation_enable == true){
|
||||
// Go through the vector values
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
temp = error_term(ap_y_ptr[counter], ap_x_ptr[counter], ap_mask,
|
||||
number_of_trunc_bits);
|
||||
if (temp > ap_res_ptr[counter]) {
|
||||
ap_res_ptr[counter] = 0;
|
||||
} else {
|
||||
ap_res_ptr[counter] -= temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Cast from int to float
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer[counter] = static_cast<float>(ap_res_ptr[counter]);
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_h_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
|
||||
}
|
||||
|
||||
// devide by the 2^number_of_frac_bits
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_h_exponent_ptr[counter] -= 2 * number_of_frac_bits;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
temp = ap_x_exponent_ptr[counter] + ap_y_exponent_ptr[counter] +
|
||||
ap_h_exponent_ptr[counter];
|
||||
if (temp > 252) {
|
||||
ap_h_exponent_ptr[counter] = temp - 252;
|
||||
} else {
|
||||
// Here I try to catch the case that the new exponent is too small
|
||||
ap_h_exponent_ptr[counter] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the old exponent
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer_mod[counter] = (h_pointer_mod[counter] << 9) >> 9;
|
||||
}
|
||||
|
||||
// Install the new exponent
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer_mod[counter] += ap_h_exponent_ptr[counter] << 23;
|
||||
}
|
||||
|
||||
// Add the sign back
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer_mod[counter] += sign_temp_ptr[counter];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
28
network/CPP_Cuda/error_term.cpp
Normal file
28
network/CPP_Cuda/error_term.cpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
|
||||
uint32_t error_term(uint32_t a, uint32_t b, uint32_t ap_mask,
|
||||
uint32_t number_of_trunc_bits) {
|
||||
uint32_t error_value = 0;
|
||||
|
||||
uint32_t temp_shift_a = a;
|
||||
uint32_t temp_shift_b = b & ap_mask;
|
||||
|
||||
uint32_t counter_trunc;
|
||||
uint32_t temp;
|
||||
|
||||
// Go through the bits
|
||||
for (counter_trunc = 0; counter_trunc < number_of_trunc_bits;
|
||||
counter_trunc++) {
|
||||
temp = temp_shift_a & 1;
|
||||
if (temp == 1) {
|
||||
error_value += temp_shift_b & ap_mask;
|
||||
}
|
||||
temp_shift_a >>= 1;
|
||||
temp_shift_b <<= 1;
|
||||
}
|
||||
|
||||
return error_value;
|
||||
}
|
102
network/CPP_Cuda/gpu_approximation_multiplication_function.cu
Normal file
102
network/CPP_Cuda/gpu_approximation_multiplication_function.cu
Normal file
|
@ -0,0 +1,102 @@
|
|||
#include "gpu_error_term.cu"
|
||||
|
||||
__device__ float gpu_approximation_multiplication_function(
|
||||
float weight,
|
||||
float input,
|
||||
uint64_t number_of_frac_bits,
|
||||
bool approximation_enable,
|
||||
uint64_t number_of_trunc_bits,
|
||||
uint32_t ap_mask)
|
||||
{
|
||||
|
||||
float weight_copy = weight;
|
||||
float input_copy = input;
|
||||
|
||||
uint32_t *weight_pointer_mod = (uint32_t *)&weight_copy;
|
||||
uint32_t *input_pointer_mod = (uint32_t *)&input_copy;
|
||||
|
||||
// Calculate the new sign
|
||||
uint32_t sign_temp = (*weight_pointer_mod & 0x80000000) ^
|
||||
(*input_pointer_mod & 0x80000000);
|
||||
|
||||
// Extract the exponent
|
||||
uint32_t ap_input_exponent = (*input_pointer_mod << 1) >> 24;
|
||||
uint32_t ap_weight_exponent = (*weight_pointer_mod << 1) >> 24;
|
||||
|
||||
// Cast and "normalize"
|
||||
uint64_t shift_value = 32 - number_of_frac_bits;
|
||||
|
||||
uint32_t ap_input_mantissa =
|
||||
((*input_pointer_mod << 8) | 0x80000000) >> shift_value;
|
||||
|
||||
uint32_t ap_weight_mantissa =
|
||||
((*weight_pointer_mod << 8) | 0x80000000) >> shift_value;
|
||||
|
||||
// Make the zero -g-r-e-a-t- correct again
|
||||
if (input == 0)
|
||||
{
|
||||
ap_input_mantissa = 0;
|
||||
}
|
||||
|
||||
if (weight == 0)
|
||||
{
|
||||
ap_weight_mantissa = 0;
|
||||
}
|
||||
|
||||
// res = x*y
|
||||
uint64_t ap_result = static_cast<uint64_t>(ap_input_mantissa) * static_cast<uint64_t>(ap_weight_mantissa);
|
||||
|
||||
uint32_t temp;
|
||||
// --------------------------------------------
|
||||
// Approx
|
||||
// --------------------------------------------
|
||||
|
||||
if (approximation_enable == true)
|
||||
{
|
||||
// Go through the vector values
|
||||
temp = gpu_error_term(ap_weight_mantissa, ap_input_mantissa, ap_mask,
|
||||
number_of_trunc_bits);
|
||||
if (temp > ap_result)
|
||||
{
|
||||
ap_result = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ap_result -= temp;
|
||||
}
|
||||
}
|
||||
|
||||
// Cast from int to float
|
||||
float output = static_cast<float>(ap_result);
|
||||
if (ap_result == 0)
|
||||
{
|
||||
output = 0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t *output_pointer_mod = (uint32_t *)&output;
|
||||
|
||||
uint32_t ap_output_exponent = (*output_pointer_mod << 1) >> 24;
|
||||
ap_output_exponent -= 2 * number_of_frac_bits;
|
||||
temp = ap_input_exponent + ap_weight_exponent + ap_output_exponent;
|
||||
if (temp > 252)
|
||||
{
|
||||
ap_output_exponent = temp - 252;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Here I try to catch the case that the new exponent is too small
|
||||
ap_output_exponent = 0;
|
||||
}
|
||||
|
||||
// Remove the old exponent
|
||||
*output_pointer_mod = (*output_pointer_mod << 9) >> 9;
|
||||
|
||||
// Install the new exponent
|
||||
*output_pointer_mod += ap_output_exponent << 23;
|
||||
|
||||
// Add the sign back
|
||||
*output_pointer_mod += sign_temp;
|
||||
}
|
||||
return output;
|
||||
};
|
28
network/CPP_Cuda/gpu_error_term.cu
Normal file
28
network/CPP_Cuda/gpu_error_term.cu
Normal file
|
@ -0,0 +1,28 @@
|
|||
__device__ uint32_t gpu_error_term(uint32_t ap_weight_mantissa,
|
||||
uint32_t ap_input_mantissa,
|
||||
uint32_t ap_mask,
|
||||
uint32_t number_of_trunc_bits)
|
||||
{
|
||||
uint32_t error_value = 0;
|
||||
|
||||
uint32_t temp_shift_a = ap_weight_mantissa;
|
||||
uint32_t temp_shift_b = ap_input_mantissa & ap_mask;
|
||||
|
||||
uint32_t counter_trunc;
|
||||
uint32_t temp;
|
||||
|
||||
// Go through the bits
|
||||
for (counter_trunc = 0; counter_trunc < number_of_trunc_bits;
|
||||
counter_trunc++)
|
||||
{
|
||||
temp = temp_shift_a & 1;
|
||||
if (temp == 1)
|
||||
{
|
||||
error_value += temp_shift_b & ap_mask;
|
||||
}
|
||||
temp_shift_a >>= 1;
|
||||
temp_shift_b <<= 1;
|
||||
}
|
||||
|
||||
return error_value;
|
||||
}
|
23
network/CPP_Cuda/pybind11_auto_pyi.py
Normal file
23
network/CPP_Cuda/pybind11_auto_pyi.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# %%
|
||||
# pip install pybind11-stubgen
|
||||
from pybind11_stubgen import ModuleStubsGenerator # type: ignore
|
||||
import glob
|
||||
|
||||
|
||||
def process(module_name: str) -> None:
|
||||
module = ModuleStubsGenerator(module_name)
|
||||
module.parse()
|
||||
module.write_setup_py = False
|
||||
|
||||
with open(module_name + ".pyi", "w") as fp:
|
||||
fp.write("#\n# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!\n#\n\n")
|
||||
fp.write("\n".join(module.to_lines()))
|
||||
|
||||
|
||||
Files = glob.glob("*.so")
|
||||
|
||||
for fid in Files:
|
||||
Idx: int = fid.find(".")
|
||||
module_name: str = fid[:Idx]
|
||||
print("Processing: " + module_name)
|
||||
process(module_name)
|
339
network/CPP_NoCuda/HDynamicCNNManyIP.cpp
Normal file
339
network/CPP_NoCuda/HDynamicCNNManyIP.cpp
Normal file
|
@ -0,0 +1,339 @@
|
|||
#include "HDynamicCNNManyIP.h"
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
HDynamicCNNManyIP::HDynamicCNNManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
HDynamicCNNManyIP::~HDynamicCNNManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
bool HDynamicCNNManyIP::update_entrypoint(
|
||||
int64_t h_pointer_addr,
|
||||
int64_t h_dim_0,
|
||||
int64_t h_dim_1,
|
||||
int64_t h_dim_2,
|
||||
int64_t h_dim_3,
|
||||
int64_t epsilon_xy_pointer_addr,
|
||||
int64_t epsilon_xy_dim_0,
|
||||
int64_t epsilon_xy_dim_1,
|
||||
int64_t epsilon_xy_dim_2,
|
||||
int64_t epsilon_t_pointer_addr,
|
||||
int64_t epsilon_t_dim_0,
|
||||
int64_t weights_pointer_addr,
|
||||
int64_t weights_dim_0,
|
||||
int64_t weights_dim_1,
|
||||
int64_t input_pointer_addr,
|
||||
int64_t input_dim_0,
|
||||
int64_t input_dim_1,
|
||||
int64_t input_dim_2,
|
||||
int64_t input_dim_3,
|
||||
int64_t init_vector_pointer_addr,
|
||||
int64_t init_vector_dim_0,
|
||||
int64_t number_of_processes,
|
||||
float forgetting_offset,
|
||||
int64_t gpu_tuning_factor)
|
||||
{
|
||||
|
||||
size_t number_of_pattern = input_dim_0;
|
||||
|
||||
size_t h_dim = init_vector_dim_0;
|
||||
float* h_init_ptr = (float*)init_vector_pointer_addr;
|
||||
assert((h_init_ptr != nullptr));
|
||||
assert((h_dim > 0));
|
||||
|
||||
float* h_pointer = (float*)h_pointer_addr;
|
||||
assert((h_pointer != nullptr));
|
||||
assert((h_dim_0 > 0));
|
||||
assert((h_dim_1 > 0));
|
||||
assert((h_dim_2 > 0));
|
||||
assert((h_dim_3 > 0));
|
||||
|
||||
size_t h_dim_c0 = h_dim_1 * h_dim_2 * h_dim_3;
|
||||
size_t h_dim_c1 = h_dim_2 * h_dim_3;
|
||||
size_t h_dim_c2 = h_dim_3;
|
||||
|
||||
float* epsilon_xy_pointer = (float*)epsilon_xy_pointer_addr;
|
||||
assert((epsilon_xy_pointer != nullptr));
|
||||
assert((epsilon_xy_dim_0 > 0));
|
||||
assert((epsilon_xy_dim_1 > 0));
|
||||
|
||||
size_t epsilon_xy_dim_c0 = epsilon_xy_dim_2 * epsilon_xy_dim_1;
|
||||
size_t epsilon_xy_dim_c1 = epsilon_xy_dim_2;
|
||||
|
||||
float* epsilon_t_pointer = (float*)epsilon_t_pointer_addr;
|
||||
assert((epsilon_t_pointer != nullptr));
|
||||
assert((epsilon_t_dim_0 > 0));
|
||||
|
||||
float* weights_pointer = (float*)weights_pointer_addr;
|
||||
assert((weights_pointer != nullptr));
|
||||
assert((weights_dim_0 > 0));
|
||||
assert((weights_dim_1 > 0));
|
||||
|
||||
size_t weights_dim_c0 = weights_dim_1;
|
||||
|
||||
int64_t* input_pointer = (int64_t*)input_pointer_addr;
|
||||
assert((input_pointer != nullptr));
|
||||
assert((input_dim_0 > 0));
|
||||
assert((input_dim_1 > 0));
|
||||
assert((input_dim_2 > 0));
|
||||
assert((input_dim_3 > 0));
|
||||
|
||||
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c1 = input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c2 = input_dim_3;
|
||||
|
||||
assert((h_dim == weights_dim_1));
|
||||
size_t number_of_spikes = input_dim_1;
|
||||
size_t dim_x = input_dim_2;
|
||||
size_t dim_y = input_dim_3;
|
||||
|
||||
float forgetting_offset_local = forgetting_offset / static_cast<float>(h_dim);
|
||||
|
||||
|
||||
// --------------------
|
||||
if (number_of_processes > 0)
|
||||
{
|
||||
omp_set_num_threads(number_of_processes);
|
||||
|
||||
size_t pattern_id;
|
||||
#pragma omp parallel for
|
||||
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
|
||||
{
|
||||
update(
|
||||
h_init_ptr,
|
||||
h_pointer,
|
||||
h_dim_c0,
|
||||
h_dim_c1,
|
||||
h_dim_c2,
|
||||
h_dim,
|
||||
epsilon_xy_pointer,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_xy_dim_c1,
|
||||
epsilon_t_pointer,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
number_of_spikes,
|
||||
dim_x,
|
||||
dim_y,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local,
|
||||
pattern_id);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Error: number_of_processes <= 0" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
bool HDynamicCNNManyIP::update(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* epsilon_t_pointer,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
size_t number_of_spikes,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t pattern_id)
|
||||
{
|
||||
|
||||
float* h_ptr;
|
||||
float* epsilon_xy_ptr;
|
||||
int64_t* input_ptr;
|
||||
|
||||
size_t counter_x;
|
||||
size_t counter_y;
|
||||
|
||||
for (counter_x = 0; counter_x < dim_x; counter_x++)
|
||||
{
|
||||
for (counter_y = 0; counter_y < dim_y; counter_y++)
|
||||
{
|
||||
epsilon_xy_ptr = epsilon_xy_pointer +
|
||||
counter_x * epsilon_xy_dim_c1 + counter_y;
|
||||
|
||||
h_ptr = h_pointer +
|
||||
pattern_id * h_dim_c0 + counter_x * h_dim_c2 + counter_y;
|
||||
|
||||
input_ptr = input_pointer +
|
||||
pattern_id * input_dim_c0 + counter_x * input_dim_c2 + counter_y;
|
||||
|
||||
update_one_ip(
|
||||
h_init_ptr,
|
||||
h_ptr,
|
||||
h_dim_c1,
|
||||
h_dim,
|
||||
weights_pointer,
|
||||
weights_dim_c0,
|
||||
input_ptr,
|
||||
input_dim_c1,
|
||||
epsilon_xy_ptr,
|
||||
epsilon_xy_dim_c0,
|
||||
epsilon_t_pointer,
|
||||
number_of_spikes,
|
||||
forgetting_offset,
|
||||
forgetting_offset_local);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
void HDynamicCNNManyIP::update_one_ip(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c1,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
float* epsilon_t_pointer,
|
||||
size_t number_of_spikes,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local)
|
||||
{
|
||||
|
||||
float* h_temp = new float[h_dim];
|
||||
float* h_subsegment = new float[h_dim];
|
||||
|
||||
memcpy(h_subsegment, h_init_ptr, sizeof(float) * h_dim);
|
||||
|
||||
size_t counter_spike;
|
||||
size_t counter;
|
||||
|
||||
float h_temp_sum;
|
||||
float temp_value;
|
||||
|
||||
float epsilon_subsegment;
|
||||
float epsilon_scale = 1.0;
|
||||
|
||||
int64_t* spike;
|
||||
float* w_ptr;
|
||||
|
||||
for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++)
|
||||
{
|
||||
if (epsilon_scale > 1E10)
|
||||
{
|
||||
temp_value = 1.0 / epsilon_scale;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] *= temp_value;
|
||||
}
|
||||
|
||||
epsilon_scale = 1.0;
|
||||
}
|
||||
|
||||
spike = input_pointer + counter_spike * input_dim_c1;
|
||||
|
||||
if (*spike >= 0)
|
||||
{
|
||||
epsilon_subsegment =
|
||||
epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike];
|
||||
|
||||
w_ptr = weights_pointer + *spike * weights_dim_c0;
|
||||
|
||||
memcpy(h_temp, h_subsegment, sizeof(float) * h_dim);
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp[counter] *= w_ptr[counter];
|
||||
}
|
||||
|
||||
h_temp_sum = 0.0;
|
||||
#pragma omp simd reduction(+ : h_temp_sum)
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp_sum += h_temp[counter];
|
||||
}
|
||||
|
||||
if (h_temp_sum > 1E-10)
|
||||
{
|
||||
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_temp[counter] *= temp_value;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] += h_temp[counter];
|
||||
}
|
||||
|
||||
if (forgetting_offset_local > 0.0)
|
||||
{
|
||||
temp_value =
|
||||
epsilon_scale * epsilon_subsegment * forgetting_offset_local;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_subsegment[counter] += temp_value;
|
||||
}
|
||||
|
||||
epsilon_scale *=
|
||||
1.0 + epsilon_subsegment * (1.0 + forgetting_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
epsilon_scale *= 1.0 + epsilon_subsegment * 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
temp_value = 1.0 / epsilon_scale;
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < h_dim; counter++)
|
||||
{
|
||||
h_pointer[counter * h_dim_c1] =
|
||||
h_subsegment[counter] * temp_value;
|
||||
}
|
||||
|
||||
delete[] h_temp;
|
||||
delete[] h_subsegment;
|
||||
|
||||
return;
|
||||
};
|
||||
|
85
network/CPP_NoCuda/HDynamicCNNManyIP.h
Normal file
85
network/CPP_NoCuda/HDynamicCNNManyIP.h
Normal file
|
@ -0,0 +1,85 @@
|
|||
#ifndef SRC_HDYNAMICCNNMANYIP_H_
|
||||
#define SRC_HDYNAMICCNNMANYIP_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
|
||||
class HDynamicCNNManyIP
|
||||
{
|
||||
public:
|
||||
HDynamicCNNManyIP();
|
||||
~HDynamicCNNManyIP();
|
||||
|
||||
bool update_entrypoint(
|
||||
int64_t h_pointer_addr,
|
||||
int64_t h_dim_0,
|
||||
int64_t h_dim_1,
|
||||
int64_t h_dim_2,
|
||||
int64_t h_dim_3,
|
||||
int64_t epsilon_xy_pointer_addr,
|
||||
int64_t epsilon_xy_dim_0,
|
||||
int64_t epsilon_xy_dim_1,
|
||||
int64_t epsilon_xy_dim_2,
|
||||
int64_t epsilon_t_pointer_addr,
|
||||
int64_t epsilon_t_dim_0,
|
||||
int64_t weights_pointer_addr,
|
||||
int64_t weights_dim_0,
|
||||
int64_t weights_dim_1,
|
||||
int64_t input_pointer_addr,
|
||||
int64_t input_dim_0,
|
||||
int64_t input_dim_1,
|
||||
int64_t input_dim_2,
|
||||
int64_t input_dim_3,
|
||||
int64_t init_vector_pointer_addr,
|
||||
int64_t init_vector_dim_0,
|
||||
int64_t number_of_processes,
|
||||
float forgetting_offset,
|
||||
int64_t gpu_tuning_factor);
|
||||
|
||||
private:
|
||||
|
||||
bool update(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c0,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim_c2,
|
||||
size_t h_dim,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
size_t epsilon_xy_dim_c1,
|
||||
float* epsilon_t_pointer,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
size_t number_of_spikes,
|
||||
size_t dim_x,
|
||||
size_t dim_y,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local,
|
||||
size_t pattern_id);
|
||||
|
||||
void update_one_ip(
|
||||
float* h_init_ptr,
|
||||
float* h_pointer,
|
||||
size_t h_dim_c1,
|
||||
size_t h_dim,
|
||||
float* weights_pointer,
|
||||
size_t weights_dim_c0,
|
||||
int64_t* input_pointer,
|
||||
size_t input_dim_c1,
|
||||
float* epsilon_xy_pointer,
|
||||
size_t epsilon_xy_dim_c0,
|
||||
float* epsilon_t_pointer,
|
||||
size_t number_of_spikes,
|
||||
float forgetting_offset,
|
||||
float forgetting_offset_local);
|
||||
|
||||
};
|
||||
|
||||
#endif /* SRC_HDYNAMICCNNMANYIP_H_ */
|
64
network/CPP_NoCuda/Makefile
Normal file
64
network/CPP_NoCuda/Makefile
Normal file
|
@ -0,0 +1,64 @@
|
|||
# Change to your python bin directory (tested with Python 3.10.4)
|
||||
PYBIN=~/P3.10GPU/bin/
|
||||
CC=/usr/lib64/ccache/clang++
|
||||
|
||||
PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes`
|
||||
PARAMETERS_O= -O3 -std=c++14 $(PYBIND11INCLUDE) -fPIC -Wall -fopenmp=libomp
|
||||
|
||||
PARAMETERS_Linker=-shared -lm -lomp -lstdc++ -Wall
|
||||
|
||||
PYPOSTFIX=`$(PYBIN)python3-config --extension-suffix`
|
||||
|
||||
|
||||
all: PyHDynamicCNNManyIP \
|
||||
PySpikeGeneration2DManyIP \
|
||||
PyMultiApp
|
||||
|
||||
#######################
|
||||
|
||||
HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cpp
|
||||
$(CC) $(PARAMETERS_O) -c HDynamicCNNManyIP.cpp -o HDynamicCNNManyIP.o
|
||||
|
||||
PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp
|
||||
$(CC) $(PARAMETERS_O) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o
|
||||
|
||||
PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
|
||||
$(CC) $(PARAMETERS_Linker) -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
|
||||
cp PyHDynamicCNNManyIP PyHDynamicCNNManyIP$(PYPOSTFIX)
|
||||
$(PYBIN)python3 pybind11_auto_pyi.py
|
||||
|
||||
#######################
|
||||
|
||||
SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cpp
|
||||
$(CC) $(PARAMETERS_O) -c SpikeGeneration2DManyIP.cpp -o SpikeGeneration2DManyIP.o
|
||||
|
||||
PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp
|
||||
$(CC) $(PARAMETERS_O) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o
|
||||
|
||||
PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
|
||||
$(CC) $(PARAMETERS_Linker) -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
|
||||
cp PySpikeGeneration2DManyIP PySpikeGeneration2DManyIP$(PYPOSTFIX)
|
||||
$(PYBIN)python3 pybind11_auto_pyi.py
|
||||
|
||||
#######################
|
||||
|
||||
MultiApp.o: MultiApp.h MultiApp.cpp approximation_multiplication_function.cpp \
|
||||
error_term.cpp
|
||||
$(CC) $(PARAMETERS_O) -c MultiApp.cpp -o MultiApp.o
|
||||
|
||||
PyMultiApp.o: MultiApp.h PyMultiApp.cpp
|
||||
$(CC) $(PARAMETERS_O) -c PyMultiApp.cpp -o PyMultiApp.o
|
||||
|
||||
PyMultiApp: MultiApp.o PyMultiApp.o
|
||||
$(CC) $(PARAMETERS_Linker) -o PyMultiApp MultiApp.o PyMultiApp.o
|
||||
cp PyMultiApp PyMultiApp$(PYPOSTFIX)
|
||||
$(PYBIN)python3 pybind11_auto_pyi.py
|
||||
|
||||
#######################
|
||||
clean:
|
||||
rm -f PyHDynamicCNNManyIP
|
||||
rm -f PySpikeGeneration2DManyIP
|
||||
rm -f PyMultiApp
|
||||
rm -f *.o
|
||||
rm -f *.so
|
||||
|
187
network/CPP_NoCuda/MultiApp.cpp
Normal file
187
network/CPP_NoCuda/MultiApp.cpp
Normal file
|
@ -0,0 +1,187 @@
|
|||
#include "MultiApp.h"
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "approximation_multiplication_function.cpp"
|
||||
|
||||
MultiApp::MultiApp()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
MultiApp::~MultiApp()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
bool MultiApp::update(float* np_input_pointer,
|
||||
float* np_weight_pointer,
|
||||
float* np_output_pointer, int64_t pattern_dim,
|
||||
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
|
||||
int64_t input_channel_dim, int64_t id_pattern,
|
||||
bool approximation_enable, int64_t number_of_trunc_bits,
|
||||
int64_t number_of_frac_bits)
|
||||
{
|
||||
|
||||
assert((id_pattern >= 0));
|
||||
assert((id_pattern < pattern_dim));
|
||||
|
||||
float* np_input_pointer_pattern;
|
||||
float* np_output_pointer_pattern;
|
||||
|
||||
float* input_ptr;
|
||||
float* output_ptr;
|
||||
float* w_ptr;
|
||||
|
||||
uint64_t pattern_size = input_channel_dim;
|
||||
|
||||
std::vector<float> ap_h_vector;
|
||||
ap_h_vector.resize(pattern_size);
|
||||
float* ap_h_ptr = ap_h_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_x_vector;
|
||||
ap_x_vector.resize(pattern_size);
|
||||
uint32_t* ap_x_ptr = ap_x_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_y_vector;
|
||||
ap_y_vector.resize(pattern_size);
|
||||
uint32_t* ap_y_ptr = ap_y_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_x_exponent_vector;
|
||||
ap_x_exponent_vector.resize(pattern_size);
|
||||
uint32_t* ap_x_exponent_ptr = ap_x_exponent_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_y_exponent_vector;
|
||||
ap_y_exponent_vector.resize(pattern_size);
|
||||
uint32_t* ap_y_exponent_ptr = ap_y_exponent_vector.data();
|
||||
|
||||
std::vector<uint32_t> ap_h_exponent_vector;
|
||||
ap_h_exponent_vector.resize(pattern_size);
|
||||
uint32_t* ap_h_exponent_ptr = ap_h_exponent_vector.data();
|
||||
|
||||
std::vector<uint64_t> ap_res_vector;
|
||||
ap_res_vector.resize(pattern_size);
|
||||
uint64_t* ap_res_ptr = ap_res_vector.data();
|
||||
|
||||
uint32_t ap_mask = static_cast<uint64_t>(pow(2, number_of_trunc_bits)) - 1;
|
||||
|
||||
std::vector<uint32_t> sign_temp_vector;
|
||||
sign_temp_vector.resize(pattern_size);
|
||||
uint32_t* sign_temp_ptr = sign_temp_vector.data();
|
||||
|
||||
uint64_t input_pattern_size = input_channel_dim * x_dim * y_dim;
|
||||
uint64_t output_pattern_size = feature_dim * x_dim * y_dim;
|
||||
|
||||
np_input_pointer_pattern = np_input_pointer + id_pattern * input_pattern_size;
|
||||
np_output_pointer_pattern =
|
||||
np_output_pointer + id_pattern * output_pattern_size;
|
||||
|
||||
uint64_t counter;
|
||||
|
||||
uint64_t counter_x;
|
||||
uint64_t counter_y;
|
||||
uint64_t counter_feature;
|
||||
uint64_t pos_xy;
|
||||
uint64_t pos_xy_if;
|
||||
|
||||
float temp_sum;
|
||||
|
||||
uint64_t pattern_c_2 = x_dim * y_dim;
|
||||
|
||||
for (counter_x = 0; counter_x < x_dim; counter_x++)
|
||||
{
|
||||
for (counter_y = 0; counter_y < y_dim; counter_y++)
|
||||
{
|
||||
pos_xy = counter_y + counter_x * y_dim;
|
||||
for (counter_feature = 0; counter_feature < feature_dim;
|
||||
counter_feature++)
|
||||
{
|
||||
pos_xy_if = counter_feature * pattern_c_2 + pos_xy;
|
||||
|
||||
input_ptr = np_input_pointer_pattern + pos_xy;
|
||||
output_ptr = np_output_pointer_pattern + pos_xy_if;
|
||||
w_ptr = np_weight_pointer + counter_feature * input_channel_dim;
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_size; counter++)
|
||||
{
|
||||
ap_h_ptr[counter] = input_ptr[counter * pattern_c_2];
|
||||
}
|
||||
|
||||
approximation_multiplication_function(
|
||||
ap_h_ptr, w_ptr, pattern_size, number_of_trunc_bits,
|
||||
number_of_frac_bits, ap_x_ptr, ap_y_ptr, ap_x_exponent_ptr,
|
||||
ap_y_exponent_ptr, ap_h_exponent_ptr, ap_mask, ap_res_ptr,
|
||||
sign_temp_ptr, approximation_enable);
|
||||
|
||||
temp_sum = 0.0;
|
||||
#pragma omp simd reduction(+ \
|
||||
: temp_sum)
|
||||
for (counter = 0; counter < pattern_size; counter++)
|
||||
{
|
||||
temp_sum += ap_h_ptr[counter];
|
||||
}
|
||||
|
||||
output_ptr[0] = temp_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool MultiApp::update_with_init_vector_multi_pattern(
|
||||
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
|
||||
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
|
||||
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
|
||||
int64_t number_of_processes, bool approximation_enable,
|
||||
int64_t number_of_trunc_bits, int64_t number_of_frac)
|
||||
{
|
||||
int64_t number_of_pattern = pattern_dim;
|
||||
int64_t pattern_id;
|
||||
|
||||
float* np_input_pointer = (float*)np_input_pointer_addr;
|
||||
float* np_weight_pointer = (float*)np_weight_pointer_addr;
|
||||
float* np_output_pointer = (float*)np_output_pointer_addr;
|
||||
|
||||
assert((np_input_pointer != nullptr));
|
||||
assert((np_output_pointer != nullptr));
|
||||
assert((np_weight_pointer != nullptr));
|
||||
|
||||
assert((pattern_dim > 0));
|
||||
assert((feature_dim > 0));
|
||||
assert((x_dim > 0));
|
||||
assert((y_dim > 0));
|
||||
assert((input_channel_dim > 0));
|
||||
|
||||
if (number_of_processes > 0)
|
||||
{
|
||||
omp_set_num_threads(number_of_processes);
|
||||
// For debugging: Only one thread
|
||||
// omp_set_num_threads(1);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
|
||||
{
|
||||
update(np_input_pointer, np_weight_pointer,
|
||||
np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim,
|
||||
input_channel_dim, pattern_id, approximation_enable,
|
||||
number_of_trunc_bits, number_of_frac);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Error: number_of_processes <= 0" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
32
network/CPP_NoCuda/MultiApp.h
Normal file
32
network/CPP_NoCuda/MultiApp.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
#ifndef SRC_MultiApp_H_
|
||||
#define SRC_MultiApp_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
|
||||
class MultiApp
|
||||
{
|
||||
public:
|
||||
MultiApp();
|
||||
~MultiApp();
|
||||
|
||||
bool update(float* np_input_pointer, float* np_weight_pointer,
|
||||
float* np_output_pointer, int64_t pattern_dim,
|
||||
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
|
||||
int64_t input_channel_dim, int64_t id_pattern,
|
||||
bool approximation_enable, int64_t number_of_trunc_bits,
|
||||
int64_t number_of_frac);
|
||||
|
||||
bool update_with_init_vector_multi_pattern(
|
||||
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
|
||||
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
|
||||
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
|
||||
int64_t number_of_processes, bool approximation_enable,
|
||||
int64_t number_of_trunc_bits, int64_t number_of_frac);
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
#endif /* SRC_MultiApp_H_ */
|
14
network/CPP_NoCuda/PyHDynamicCNNManyIP.cpp
Normal file
14
network/CPP_NoCuda/PyHDynamicCNNManyIP.cpp
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "HDynamicCNNManyIP.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(PyHDynamicCNNManyIP, m)
|
||||
{
|
||||
m.doc() = "HDynamicCNNManyIP Module";
|
||||
py::class_<HDynamicCNNManyIP>(m, "HDynamicCNNManyIP")
|
||||
.def(py::init<>())
|
||||
.def("update",
|
||||
&HDynamicCNNManyIP::update_entrypoint);
|
||||
}
|
18
network/CPP_NoCuda/PyHDynamicCNNManyIP.pyi
Normal file
18
network/CPP_NoCuda/PyHDynamicCNNManyIP.pyi
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
|
||||
#
|
||||
|
||||
"""HDynamicCNNManyIP Module"""
|
||||
from __future__ import annotations
|
||||
import PyHDynamicCNNManyIP
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"HDynamicCNNManyIP"
|
||||
]
|
||||
|
||||
|
||||
class HDynamicCNNManyIP():
|
||||
def __init__(self) -> None: ...
|
||||
def update(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int, arg22: float, arg23: int) -> bool: ...
|
||||
pass
|
14
network/CPP_NoCuda/PyMultiApp.cpp
Normal file
14
network/CPP_NoCuda/PyMultiApp.cpp
Normal file
|
@ -0,0 +1,14 @@
|
|||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "MultiApp.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(PyMultiApp, m) {
|
||||
m.doc() = "MultiApp Module";
|
||||
py::class_<MultiApp>(m, "MultiApp")
|
||||
.def(py::init<>())
|
||||
.def("update_with_init_vector_multi_pattern",
|
||||
&MultiApp::update_with_init_vector_multi_pattern);
|
||||
}
|
18
network/CPP_NoCuda/PyMultiApp.pyi
Normal file
18
network/CPP_NoCuda/PyMultiApp.pyi
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
|
||||
#
|
||||
|
||||
"""MultiApp Module"""
|
||||
from __future__ import annotations
|
||||
import PyMultiApp
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"MultiApp"
|
||||
]
|
||||
|
||||
|
||||
class MultiApp():
|
||||
def __init__(self) -> None: ...
|
||||
def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: bool, arg10: int, arg11: int) -> bool: ...
|
||||
pass
|
15
network/CPP_NoCuda/PySpikeGeneration2DManyIP.cpp
Normal file
15
network/CPP_NoCuda/PySpikeGeneration2DManyIP.cpp
Normal file
|
@ -0,0 +1,15 @@
|
|||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
#include "SpikeGeneration2DManyIP.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(PySpikeGeneration2DManyIP, m)
|
||||
{
|
||||
m.doc() = "SpikeGeneration2DManyIP Module";
|
||||
py::class_<SpikeGeneration2DManyIP>(m, "SpikeGeneration2DManyIP")
|
||||
.def(py::init<>())
|
||||
.def("spike_generation",
|
||||
&SpikeGeneration2DManyIP::spike_generation_entrypoint);
|
||||
}
|
18
network/CPP_NoCuda/PySpikeGeneration2DManyIP.pyi
Normal file
18
network/CPP_NoCuda/PySpikeGeneration2DManyIP.pyi
Normal file
|
@ -0,0 +1,18 @@
|
|||
#
|
||||
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
|
||||
#
|
||||
|
||||
"""SpikeGeneration2DManyIP Module"""
|
||||
from __future__ import annotations
|
||||
import PySpikeGeneration2DManyIP
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"SpikeGeneration2DManyIP"
|
||||
]
|
||||
|
||||
|
||||
class SpikeGeneration2DManyIP():
|
||||
def __init__(self) -> None: ...
|
||||
def spike_generation(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ...
|
||||
pass
|
205
network/CPP_NoCuda/SpikeGeneration2DManyIP.cpp
Normal file
205
network/CPP_NoCuda/SpikeGeneration2DManyIP.cpp
Normal file
|
@ -0,0 +1,205 @@
|
|||
#include "SpikeGeneration2DManyIP.h"
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
SpikeGeneration2DManyIP::SpikeGeneration2DManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP()
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
bool SpikeGeneration2DManyIP::spike_generation_entrypoint(
|
||||
int64_t input_pointer_addr, int64_t input_dim_0,
|
||||
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
|
||||
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
|
||||
int64_t random_values_dim_1, int64_t random_values_dim_2,
|
||||
int64_t random_values_dim_3, int64_t output_pointer_addr,
|
||||
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
|
||||
int64_t output_dim_3, int64_t number_of_cpu_processes)
|
||||
{
|
||||
|
||||
float* input_pointer = (float*)input_pointer_addr;
|
||||
float* random_values_pointer = (float*)random_values_pointer_addr;
|
||||
int64_t* output_pointer = (int64_t*)output_pointer_addr;
|
||||
|
||||
// Input
|
||||
assert((input_pointer != nullptr));
|
||||
assert((input_dim_0 > 0));
|
||||
assert((input_dim_1 > 0));
|
||||
assert((input_dim_2 > 0));
|
||||
assert((input_dim_3 > 0));
|
||||
|
||||
// Random
|
||||
assert((random_values_pointer != nullptr));
|
||||
assert((random_values_dim_0 > 0));
|
||||
assert((random_values_dim_1 > 0));
|
||||
assert((random_values_dim_2 > 0));
|
||||
assert((random_values_dim_3 > 0));
|
||||
|
||||
// Output
|
||||
assert((output_pointer != nullptr));
|
||||
assert((output_dim_0 > 0));
|
||||
assert((output_dim_1 > 0));
|
||||
assert((output_dim_2 > 0));
|
||||
assert((output_dim_3 > 0));
|
||||
|
||||
// Input
|
||||
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c1 = input_dim_2 * input_dim_3;
|
||||
size_t input_dim_c2 = input_dim_3;
|
||||
|
||||
// Random
|
||||
size_t random_values_dim_c0 =
|
||||
random_values_dim_1 * random_values_dim_2 * random_values_dim_3;
|
||||
size_t random_values_dim_c1 =
|
||||
random_values_dim_2 * random_values_dim_3;
|
||||
size_t random_values_dim_c2 = random_values_dim_3;
|
||||
|
||||
// Output
|
||||
size_t output_dim_c0 =
|
||||
output_dim_1 * output_dim_2 * output_dim_3;
|
||||
size_t output_dim_c1 = output_dim_2 * output_dim_3;
|
||||
size_t output_dim_c2 = output_dim_3;
|
||||
|
||||
size_t number_of_pattern = input_dim_0;
|
||||
size_t h_dim = input_dim_1;
|
||||
size_t spike_dim = output_dim_1;
|
||||
size_t x_dim = output_dim_2;
|
||||
size_t y_dim = output_dim_2;
|
||||
|
||||
if (number_of_cpu_processes > 0)
|
||||
{
|
||||
|
||||
omp_set_num_threads(number_of_cpu_processes);
|
||||
// DEBUG:
|
||||
// omp_set_num_threads(1);
|
||||
|
||||
size_t pattern_id;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
|
||||
{
|
||||
spike_generation(
|
||||
input_pointer,
|
||||
input_dim_c0,
|
||||
input_dim_c1,
|
||||
input_dim_c2,
|
||||
random_values_pointer,
|
||||
random_values_dim_c0,
|
||||
random_values_dim_c1,
|
||||
random_values_dim_c2,
|
||||
output_pointer,
|
||||
output_dim_c0,
|
||||
output_dim_c1,
|
||||
output_dim_c2,
|
||||
x_dim,
|
||||
y_dim,
|
||||
spike_dim,
|
||||
h_dim,
|
||||
pattern_id);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Error: number_of_processes <= 0" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool SpikeGeneration2DManyIP::spike_generation(
|
||||
float* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t pattern_id)
|
||||
{
|
||||
|
||||
size_t counter;
|
||||
size_t counter_x = 0;
|
||||
size_t counter_y = 0;
|
||||
|
||||
float* p_ptr = nullptr;
|
||||
int64_t* out_ptr = nullptr;
|
||||
float* rand_ptr = nullptr;
|
||||
|
||||
for (counter_x = 0; counter_x < x_dim; counter_x++)
|
||||
{
|
||||
for (counter_y = 0; counter_y < y_dim; counter_y++)
|
||||
{
|
||||
p_ptr = input_pointer + pattern_id * input_dim_c0 +
|
||||
counter_x * input_dim_c2 + counter_y;
|
||||
// + counter * input_dim_c1
|
||||
|
||||
out_ptr = output_pointer + pattern_id * output_dim_c0 +
|
||||
counter_x * output_dim_c2 + counter_y;
|
||||
// + counter * output_dim_c1
|
||||
|
||||
rand_ptr = random_values_pointer +
|
||||
pattern_id * random_values_dim_c0 +
|
||||
counter_x * random_values_dim_c2 + counter_y;
|
||||
// + counter * random_values_dim_c1
|
||||
|
||||
for (counter = 0; counter < spike_dim; counter++)
|
||||
{
|
||||
out_ptr[counter * output_dim_c1] = lower_bound(p_ptr,
|
||||
h_dim,
|
||||
input_dim_c1,
|
||||
rand_ptr[counter * random_values_dim_c1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// algorithmic idea stolen from libc++
|
||||
size_t SpikeGeneration2DManyIP::lower_bound(float* data_ptr,
|
||||
size_t data_length,
|
||||
size_t data_ptr_stride,
|
||||
float compare_to_value)
|
||||
{
|
||||
|
||||
size_t start_of_range = 0;
|
||||
size_t length_of_range = data_length;
|
||||
|
||||
while (length_of_range != 0)
|
||||
{
|
||||
size_t half_length = length_of_range >> 1;
|
||||
size_t actual_position = start_of_range + half_length;
|
||||
|
||||
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
|
||||
{
|
||||
start_of_range = ++actual_position;
|
||||
length_of_range -= half_length + 1;
|
||||
}
|
||||
else
|
||||
length_of_range = half_length;
|
||||
}
|
||||
return start_of_range;
|
||||
};
|
||||
|
49
network/CPP_NoCuda/SpikeGeneration2DManyIP.h
Normal file
49
network/CPP_NoCuda/SpikeGeneration2DManyIP.h
Normal file
|
@ -0,0 +1,49 @@
|
|||
#ifndef SRC_SPIKEGENERATION2DMANYIP_H_
|
||||
#define SRC_SPIKEGENERATION2DMANYIP_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
|
||||
class SpikeGeneration2DManyIP
|
||||
{
|
||||
public:
|
||||
SpikeGeneration2DManyIP();
|
||||
~SpikeGeneration2DManyIP();
|
||||
|
||||
bool spike_generation_entrypoint(
|
||||
int64_t input_pointer_addr, int64_t input_dim_0,
|
||||
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
|
||||
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
|
||||
int64_t random_values_dim_1, int64_t random_values_dim_2,
|
||||
int64_t random_values_dim_3, int64_t output_pointer_addr,
|
||||
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
|
||||
int64_t output_dim_3, int64_t number_of_cpu_processes);
|
||||
|
||||
bool spike_generation(
|
||||
float* input_pointer,
|
||||
size_t input_dim_c0,
|
||||
size_t input_dim_c1,
|
||||
size_t input_dim_c2,
|
||||
float* random_values_pointer,
|
||||
size_t random_values_dim_c0,
|
||||
size_t random_values_dim_c1,
|
||||
size_t random_values_dim_c2,
|
||||
int64_t* output_pointer,
|
||||
size_t output_dim_c0,
|
||||
size_t output_dim_c1,
|
||||
size_t output_dim_c2,
|
||||
size_t x_dim,
|
||||
size_t y_dim,
|
||||
size_t spike_dim,
|
||||
size_t h_dim,
|
||||
size_t pattern_id);
|
||||
|
||||
private:
|
||||
size_t lower_bound(float* data_ptr, size_t data_length,
|
||||
size_t data_ptr_stride,
|
||||
float compare_to_value);
|
||||
};
|
||||
|
||||
#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */
|
138
network/CPP_NoCuda/approximation_multiplication_function.cpp
Normal file
138
network/CPP_NoCuda/approximation_multiplication_function.cpp
Normal file
|
@ -0,0 +1,138 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
|
||||
#include "error_term.cpp"
|
||||
|
||||
// Best way to plot the bits
|
||||
// std::cout << std::bitset<32>(ap_y_ptr[1]) << "\n";
|
||||
|
||||
// The result needs to be written back into h_pointer (which contains h)
|
||||
// Don't write to w_pointer.
|
||||
void approximation_multiplication_function(
|
||||
float *h_pointer, float *w_pointer, int64_t pattern_length,
|
||||
uint64_t number_of_trunc_bits, uint64_t number_of_frac_bits,
|
||||
uint32_t *ap_x_ptr, uint32_t *ap_y_ptr, uint32_t *ap_x_exponent_ptr,
|
||||
uint32_t *ap_y_exponent_ptr, uint32_t *ap_h_exponent_ptr, uint32_t ap_mask,
|
||||
uint64_t *ap_res_ptr, uint32_t *sign_temp_ptr, bool approximation_enable) {
|
||||
uint64_t counter;
|
||||
|
||||
uint32_t *w_pointer_mod = (uint32_t *)w_pointer;
|
||||
uint32_t *h_pointer_mod = (uint32_t *)h_pointer;
|
||||
|
||||
// Calculate the new sign
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
sign_temp_ptr[counter] = (w_pointer_mod[counter] & 0x80000000) ^
|
||||
(h_pointer_mod[counter] & 0x80000000);
|
||||
}
|
||||
|
||||
// Extract the exponent
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_x_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
|
||||
}
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_y_exponent_ptr[counter] = (w_pointer_mod[counter] << 1) >> 24;
|
||||
}
|
||||
|
||||
// Cast and "normalize"
|
||||
uint64_t shift_value = 32 - number_of_frac_bits;
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_x_ptr[counter] =
|
||||
((h_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_y_ptr[counter] =
|
||||
((w_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
|
||||
}
|
||||
|
||||
// Make the zero -g-r-e-a-t- correct again
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
if (h_pointer[counter] == 0) {
|
||||
ap_x_ptr[counter] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
if (w_pointer[counter] == 0) {
|
||||
ap_y_ptr[counter] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// res = x*y
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_res_ptr[counter] = static_cast<uint64_t>(ap_x_ptr[counter]) * static_cast<uint64_t>(ap_y_ptr[counter]);
|
||||
}
|
||||
|
||||
uint32_t temp;
|
||||
if (approximation_enable == true){
|
||||
// Go through the vector values
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
temp = error_term(ap_y_ptr[counter], ap_x_ptr[counter], ap_mask,
|
||||
number_of_trunc_bits);
|
||||
if (temp > ap_res_ptr[counter]) {
|
||||
ap_res_ptr[counter] = 0;
|
||||
} else {
|
||||
ap_res_ptr[counter] -= temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Cast from int to float
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer[counter] = static_cast<float>(ap_res_ptr[counter]);
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_h_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
|
||||
}
|
||||
|
||||
// devide by the 2^number_of_frac_bits
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
ap_h_exponent_ptr[counter] -= 2 * number_of_frac_bits;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
temp = ap_x_exponent_ptr[counter] + ap_y_exponent_ptr[counter] +
|
||||
ap_h_exponent_ptr[counter];
|
||||
if (temp > 252) {
|
||||
ap_h_exponent_ptr[counter] = temp - 252;
|
||||
} else {
|
||||
// Here I try to catch the case that the new exponent is too small
|
||||
ap_h_exponent_ptr[counter] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the old exponent
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer_mod[counter] = (h_pointer_mod[counter] << 9) >> 9;
|
||||
}
|
||||
|
||||
// Install the new exponent
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer_mod[counter] += ap_h_exponent_ptr[counter] << 23;
|
||||
}
|
||||
|
||||
// Add the sign back
|
||||
#pragma omp simd
|
||||
for (counter = 0; counter < pattern_length; counter++) {
|
||||
h_pointer_mod[counter] += sign_temp_ptr[counter];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
28
network/CPP_NoCuda/error_term.cpp
Normal file
28
network/CPP_NoCuda/error_term.cpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
|
||||
uint32_t error_term(uint32_t a, uint32_t b, uint32_t ap_mask,
|
||||
uint32_t number_of_trunc_bits) {
|
||||
uint32_t error_value = 0;
|
||||
|
||||
uint32_t temp_shift_a = a;
|
||||
uint32_t temp_shift_b = b & ap_mask;
|
||||
|
||||
uint32_t counter_trunc;
|
||||
uint32_t temp;
|
||||
|
||||
// Go through the bits
|
||||
for (counter_trunc = 0; counter_trunc < number_of_trunc_bits;
|
||||
counter_trunc++) {
|
||||
temp = temp_shift_a & 1;
|
||||
if (temp == 1) {
|
||||
error_value += temp_shift_b & ap_mask;
|
||||
}
|
||||
temp_shift_a >>= 1;
|
||||
temp_shift_b <<= 1;
|
||||
}
|
||||
|
||||
return error_value;
|
||||
}
|
23
network/CPP_NoCuda/pybind11_auto_pyi.py
Normal file
23
network/CPP_NoCuda/pybind11_auto_pyi.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# %%
|
||||
# pip install pybind11-stubgen
|
||||
from pybind11_stubgen import ModuleStubsGenerator # type: ignore
|
||||
import glob
|
||||
|
||||
|
||||
def process(module_name: str) -> None:
|
||||
module = ModuleStubsGenerator(module_name)
|
||||
module.parse()
|
||||
module.write_setup_py = False
|
||||
|
||||
with open(module_name + ".pyi", "w") as fp:
|
||||
fp.write("#\n# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!\n#\n\n")
|
||||
fp.write("\n".join(module.to_lines()))
|
||||
|
||||
|
||||
Files = glob.glob("*.so")
|
||||
|
||||
for fid in Files:
|
||||
Idx: int = fid.find(".")
|
||||
module_name: str = fid[:Idx]
|
||||
print("Processing: " + module_name)
|
||||
process(module_name)
|
318
network/Conv2dApproximation.py
Normal file
318
network/Conv2dApproximation.py
Normal file
|
@ -0,0 +1,318 @@
|
|||
import torch
|
||||
import math
|
||||
|
||||
from network.CPP.PyMultiApp import MultiApp
|
||||
|
||||
|
||||
class Conv2dApproximation(torch.nn.Module):
|
||||
|
||||
in_channels: int | None = None
|
||||
out_channels: int | None = None
|
||||
kernel_size: list[int] | None = None
|
||||
stride: list[int] = [1, 1]
|
||||
padding: list[int] = [0, 0]
|
||||
dilation: list[int] = [1, 1]
|
||||
use_bias: bool = False
|
||||
|
||||
approximation_enable: bool = False
|
||||
number_of_trunc_bits: int = -1
|
||||
number_of_frac: int = -1
|
||||
|
||||
number_of_processes: int = 1
|
||||
|
||||
weights: torch.nn.parameter.Parameter
|
||||
bias: torch.nn.parameter.Parameter | None
|
||||
|
||||
device: torch.device
|
||||
dtype: torch.dtype
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
kernel_size: list[int],
|
||||
stride: list[int] = [1, 1],
|
||||
padding: list[int] = [0, 0],
|
||||
dilation: list[int] = [1, 1],
|
||||
bias: bool = True,
|
||||
approximation_enable: bool = False,
|
||||
number_of_trunc_bits: int = -1,
|
||||
number_of_frac: int = -1,
|
||||
number_of_processes: int = 1,
|
||||
device: torch.device | None = None,
|
||||
dtype: torch.dtype | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
assert device is not None
|
||||
self.device = device
|
||||
|
||||
assert dtype is not None
|
||||
self.dtype = dtype
|
||||
|
||||
assert len(kernel_size) == 2
|
||||
assert len(stride) == 2
|
||||
assert len(padding) == 2
|
||||
assert len(dilation) == 2
|
||||
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.kernel_size = kernel_size
|
||||
self.stride = stride
|
||||
self.padding = padding
|
||||
self.dilation = dilation
|
||||
self.use_bias = bias
|
||||
self.number_of_processes = number_of_processes
|
||||
|
||||
self.approximation_enable = approximation_enable
|
||||
self.number_of_trunc_bits = number_of_trunc_bits
|
||||
self.number_of_frac = number_of_frac
|
||||
|
||||
if self.use_bias is True:
|
||||
self.bias: torch.nn.parameter.Parameter | None = (
|
||||
torch.nn.parameter.Parameter(
|
||||
torch.empty(
|
||||
(out_channels),
|
||||
dtype=self.dtype,
|
||||
device=self.device,
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.bias = None
|
||||
|
||||
self.weights: torch.nn.parameter.Parameter = torch.nn.parameter.Parameter(
|
||||
torch.empty(
|
||||
(out_channels, in_channels, *kernel_size),
|
||||
dtype=self.dtype,
|
||||
device=self.device,
|
||||
)
|
||||
)
|
||||
|
||||
self.functional_multi = FunctionalMultiConv2d.apply
|
||||
|
||||
self.reset_parameters()
|
||||
|
||||
def reset_parameters(self) -> None:
|
||||
# Stolen from original torch conv2 code
|
||||
torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5))
|
||||
if self.bias is not None:
|
||||
fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights)
|
||||
if fan_in != 0:
|
||||
bound = 1 / math.sqrt(fan_in)
|
||||
torch.nn.init.uniform_(self.bias, -bound, bound)
|
||||
|
||||
def calculate_output_size(self, value: torch.Tensor) -> None:
|
||||
|
||||
coordinates_0, coordinates_1 = self._get_coordinates(value)
|
||||
|
||||
self.output_size: torch.Tensor = torch.tensor(
|
||||
[
|
||||
coordinates_0.shape[1],
|
||||
coordinates_1.shape[1],
|
||||
],
|
||||
dtype=torch.int64,
|
||||
)
|
||||
self.output_size.requires_grad_(False)
|
||||
|
||||
def _get_coordinates(
|
||||
self, value: torch.Tensor
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Function converts parameter in coordinates
|
||||
for the convolution window"""
|
||||
|
||||
assert value is not None
|
||||
assert torch.is_tensor(value) is True
|
||||
assert value.dim() == 1
|
||||
assert torch.numel(value) == 2
|
||||
assert value.dtype == torch.int64
|
||||
assert value[0] > 0
|
||||
assert value[1] > 0
|
||||
|
||||
assert self.kernel_size is not None
|
||||
assert len(self.kernel_size) == 2
|
||||
assert len(self.stride) == 2
|
||||
assert len(self.dilation) == 2
|
||||
assert len(self.padding) == 2
|
||||
|
||||
unfold_0: torch.nn.Unfold = torch.nn.Unfold(
|
||||
kernel_size=(int(self.kernel_size[0]), 1),
|
||||
dilation=int(self.dilation[0]),
|
||||
padding=int(self.padding[0]),
|
||||
stride=int(self.stride[0]),
|
||||
)
|
||||
|
||||
unfold_1: torch.nn.Unfold = torch.nn.Unfold(
|
||||
kernel_size=(1, int(self.kernel_size[1])),
|
||||
dilation=int(self.dilation[1]),
|
||||
padding=int(self.padding[1]),
|
||||
stride=int(self.stride[1]),
|
||||
)
|
||||
|
||||
coordinates_0: torch.Tensor = (
|
||||
unfold_0(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.arange(0, int(value[0]), dtype=torch.float32),
|
||||
1,
|
||||
),
|
||||
0,
|
||||
),
|
||||
0,
|
||||
)
|
||||
)
|
||||
.squeeze(0)
|
||||
.type(torch.int64)
|
||||
)
|
||||
|
||||
coordinates_1: torch.Tensor = (
|
||||
unfold_1(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.arange(0, int(value[1]), dtype=torch.float32),
|
||||
0,
|
||||
),
|
||||
0,
|
||||
),
|
||||
0,
|
||||
)
|
||||
)
|
||||
.squeeze(0)
|
||||
.type(torch.int64)
|
||||
)
|
||||
|
||||
return coordinates_0, coordinates_1
|
||||
|
||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||
|
||||
assert input.dim() == 4
|
||||
|
||||
assert self.kernel_size is not None
|
||||
|
||||
input_size = torch.Tensor([int(input.shape[-2]), int(input.shape[-1])]).type(
|
||||
dtype=torch.int64
|
||||
)
|
||||
|
||||
self.calculate_output_size(input_size)
|
||||
|
||||
input_fold = torch.nn.functional.fold(
|
||||
torch.nn.functional.unfold(
|
||||
input.requires_grad_(True),
|
||||
tuple(self.kernel_size),
|
||||
tuple(self.dilation),
|
||||
tuple(self.padding),
|
||||
tuple(self.stride),
|
||||
),
|
||||
output_size=(int(self.output_size[0]), int(self.output_size[1])),
|
||||
kernel_size=(1, 1),
|
||||
dilation=(1, 1),
|
||||
padding=(0, 0),
|
||||
stride=(1, 1),
|
||||
)
|
||||
|
||||
weights_fold = torch.nn.functional.unfold(
|
||||
self.weights.requires_grad_(True),
|
||||
tuple(self.kernel_size),
|
||||
tuple(self.dilation),
|
||||
tuple(self.padding),
|
||||
tuple(self.stride),
|
||||
).squeeze(-1)
|
||||
|
||||
if input.device == torch.device("cpu"):
|
||||
number_of_cpu_processes: int = int(self.number_of_processes)
|
||||
else:
|
||||
number_of_cpu_processes = -1
|
||||
|
||||
# Here...
|
||||
parameter_list = torch.tensor(
|
||||
[
|
||||
int(self.approximation_enable), # 0
|
||||
int(self.number_of_trunc_bits), # 1
|
||||
int(self.number_of_frac), # 2
|
||||
int(number_of_cpu_processes), # 3
|
||||
],
|
||||
dtype=torch.int64,
|
||||
)
|
||||
|
||||
output = self.functional_multi(input_fold, weights_fold, parameter_list)
|
||||
|
||||
if self.bias is not None:
|
||||
output += self.bias.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
class FunctionalMultiConv2d(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward( # type: ignore
|
||||
ctx,
|
||||
input: torch.Tensor,
|
||||
weights: torch.Tensor,
|
||||
parameter_list: torch.Tensor,
|
||||
) -> torch.Tensor:
|
||||
|
||||
assert input.ndim == 4
|
||||
assert input.dtype is torch.float32
|
||||
assert input.is_contiguous() is True
|
||||
|
||||
assert weights.ndim == 2
|
||||
assert weights.dtype is torch.float32
|
||||
assert weights.is_contiguous() is True
|
||||
|
||||
assert input.shape[1] == weights.shape[1]
|
||||
|
||||
approximation_enable = bool(parameter_list[0])
|
||||
number_of_trunc_bits = int(parameter_list[1])
|
||||
number_of_frac = int(parameter_list[2])
|
||||
number_of_processes = int(parameter_list[3])
|
||||
|
||||
assert input.device == weights.device
|
||||
|
||||
output = torch.empty(
|
||||
(input.shape[0], weights.shape[0], input.shape[2], input.shape[3]),
|
||||
dtype=weights.dtype,
|
||||
device=weights.device,
|
||||
requires_grad=True,
|
||||
)
|
||||
assert output.is_contiguous() is True
|
||||
|
||||
multiplier: MultiApp = MultiApp()
|
||||
|
||||
multiplier.update_with_init_vector_multi_pattern(
|
||||
input.data_ptr(),
|
||||
weights.data_ptr(),
|
||||
output.data_ptr(),
|
||||
int(output.shape[0]), # pattern
|
||||
int(output.shape[1]), # feature channel
|
||||
int(output.shape[2]), # x
|
||||
int(output.shape[3]), # y
|
||||
int(input.shape[1]), # input channel
|
||||
int(number_of_processes),
|
||||
bool(approximation_enable),
|
||||
int(number_of_trunc_bits),
|
||||
int(number_of_frac),
|
||||
)
|
||||
|
||||
ctx.save_for_backward(
|
||||
input.detach(),
|
||||
weights.detach(),
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
|
||||
(input, weights) = ctx.saved_tensors
|
||||
|
||||
grad_input = (
|
||||
grad_output.unsqueeze(2) * weights.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
|
||||
).sum(1)
|
||||
grad_weights = (
|
||||
(grad_output.unsqueeze(2) * input.unsqueeze(1)).sum(0).sum(-1).sum(-1)
|
||||
)
|
||||
grad_parameter_list = None
|
||||
|
||||
return (grad_input, grad_weights, grad_parameter_list)
|
317
network/Dataset.py
Normal file
317
network/Dataset.py
Normal file
|
@ -0,0 +1,317 @@
|
|||
from abc import ABC, abstractmethod
|
||||
import torch
|
||||
import numpy as np
|
||||
import torchvision as tv # type: ignore
|
||||
from network.Parameter import Config
|
||||
|
||||
|
||||
class DatasetMaster(torch.utils.data.Dataset, ABC):
|
||||
|
||||
path_label: str
|
||||
label_storage: np.ndarray
|
||||
pattern_storage: np.ndarray
|
||||
number_of_pattern: int
|
||||
mean: list[float]
|
||||
|
||||
# Initialize
|
||||
def __init__(
|
||||
self,
|
||||
train: bool = False,
|
||||
path_pattern: str = "./",
|
||||
path_label: str = "./",
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
if train is True:
|
||||
self.label_storage = np.load(path_label + "/TrainLabelStorage.npy")
|
||||
else:
|
||||
self.label_storage = np.load(path_label + "/TestLabelStorage.npy")
|
||||
|
||||
if train is True:
|
||||
self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy")
|
||||
else:
|
||||
self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy")
|
||||
|
||||
self.number_of_pattern = self.label_storage.shape[0]
|
||||
|
||||
self.mean = []
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.number_of_pattern
|
||||
|
||||
# Get one pattern at position index
|
||||
@abstractmethod
|
||||
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
pass
|
||||
|
||||
|
||||
class DatasetMNIST(DatasetMaster):
|
||||
"""Contstructor"""
|
||||
|
||||
# Initialize
|
||||
def __init__(
|
||||
self,
|
||||
train: bool = False,
|
||||
path_pattern: str = "./",
|
||||
path_label: str = "./",
|
||||
) -> None:
|
||||
super().__init__(train, path_pattern, path_label)
|
||||
|
||||
self.pattern_storage = np.ascontiguousarray(
|
||||
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
|
||||
)
|
||||
|
||||
self.pattern_storage /= np.max(self.pattern_storage)
|
||||
|
||||
mean = self.pattern_storage.mean(3).mean(2).mean(0)
|
||||
self.mean = [*mean]
|
||||
|
||||
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
|
||||
|
||||
image = self.pattern_storage[index, 0:1, :, :]
|
||||
target = int(self.label_storage[index])
|
||||
return torch.tensor(image), target
|
||||
|
||||
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
"""0. The test image comes in
|
||||
1. is center cropped
|
||||
2. returned.
|
||||
|
||||
This is a 1 channel version (e.g. one gray channel).
|
||||
"""
|
||||
|
||||
assert len(cfg.image_statistics.mean) == 1
|
||||
assert len(cfg.image_statistics.the_size) == 2
|
||||
assert cfg.image_statistics.the_size[0] > 0
|
||||
assert cfg.image_statistics.the_size[1] > 0
|
||||
|
||||
# Transformation chain
|
||||
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
|
||||
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
|
||||
)
|
||||
scripted_transforms = torch.jit.script(my_transforms)
|
||||
|
||||
# Preprocess the input data
|
||||
pattern = scripted_transforms(pattern)
|
||||
|
||||
gray = pattern[:, 0:1, :, :] + 1e-20
|
||||
|
||||
return gray
|
||||
|
||||
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
"""0. The training image comes in
|
||||
1. is cropped from a random position
|
||||
2. returned.
|
||||
|
||||
This is a 1 channel version (e.g. one gray channel).
|
||||
"""
|
||||
|
||||
assert len(cfg.image_statistics.mean) == 1
|
||||
assert len(cfg.image_statistics.the_size) == 2
|
||||
assert cfg.image_statistics.the_size[0] > 0
|
||||
assert cfg.image_statistics.the_size[1] > 0
|
||||
|
||||
# Transformation chain
|
||||
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
|
||||
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
|
||||
)
|
||||
scripted_transforms = torch.jit.script(my_transforms)
|
||||
|
||||
# Preprocess the input data
|
||||
pattern = scripted_transforms(pattern)
|
||||
|
||||
gray = pattern[:, 0:1, :, :] + 1e-20
|
||||
|
||||
return gray
|
||||
|
||||
|
||||
class DatasetFashionMNIST(DatasetMaster):
|
||||
"""Contstructor"""
|
||||
|
||||
# Initialize
|
||||
def __init__(
|
||||
self,
|
||||
train: bool = False,
|
||||
path_pattern: str = "./",
|
||||
path_label: str = "./",
|
||||
) -> None:
|
||||
super().__init__(train, path_pattern, path_label)
|
||||
|
||||
self.pattern_storage = np.ascontiguousarray(
|
||||
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
|
||||
)
|
||||
|
||||
self.pattern_storage /= np.max(self.pattern_storage)
|
||||
|
||||
mean = self.pattern_storage.mean(3).mean(2).mean(0)
|
||||
self.mean = [*mean]
|
||||
|
||||
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
|
||||
|
||||
image = self.pattern_storage[index, 0:1, :, :]
|
||||
target = int(self.label_storage[index])
|
||||
return torch.tensor(image), target
|
||||
|
||||
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
"""0. The test image comes in
|
||||
1. is center cropped
|
||||
2. returned.
|
||||
|
||||
This is a 1 channel version (e.g. one gray channel).
|
||||
"""
|
||||
|
||||
assert len(cfg.image_statistics.mean) == 1
|
||||
assert len(cfg.image_statistics.the_size) == 2
|
||||
assert cfg.image_statistics.the_size[0] > 0
|
||||
assert cfg.image_statistics.the_size[1] > 0
|
||||
|
||||
# Transformation chain
|
||||
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
|
||||
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
|
||||
)
|
||||
scripted_transforms = torch.jit.script(my_transforms)
|
||||
|
||||
# Preprocess the input data
|
||||
pattern = scripted_transforms(pattern)
|
||||
|
||||
gray = pattern[:, 0:1, :, :] + 1e-20
|
||||
|
||||
return gray
|
||||
|
||||
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
"""0. The training image comes in
|
||||
1. is cropped from a random position
|
||||
2. returned.
|
||||
|
||||
This is a 1 channel version (e.g. one gray channel).
|
||||
"""
|
||||
|
||||
assert len(cfg.image_statistics.mean) == 1
|
||||
assert len(cfg.image_statistics.the_size) == 2
|
||||
assert cfg.image_statistics.the_size[0] > 0
|
||||
assert cfg.image_statistics.the_size[1] > 0
|
||||
|
||||
# Transformation chain
|
||||
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
|
||||
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
|
||||
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
|
||||
tv.transforms.ColorJitter(
|
||||
brightness=cfg.augmentation.jitter_brightness,
|
||||
contrast=cfg.augmentation.jitter_contrast,
|
||||
saturation=cfg.augmentation.jitter_saturation,
|
||||
hue=cfg.augmentation.jitter_hue,
|
||||
),
|
||||
)
|
||||
scripted_transforms = torch.jit.script(my_transforms)
|
||||
|
||||
# Preprocess the input data
|
||||
pattern = scripted_transforms(pattern)
|
||||
|
||||
gray = pattern[:, 0:1, :, :] + 1e-20
|
||||
|
||||
return gray
|
||||
|
||||
|
||||
class DatasetCIFAR(DatasetMaster):
|
||||
"""Contstructor"""
|
||||
|
||||
# Initialize
|
||||
def __init__(
|
||||
self,
|
||||
train: bool = False,
|
||||
path_pattern: str = "./",
|
||||
path_label: str = "./",
|
||||
) -> None:
|
||||
super().__init__(train, path_pattern, path_label)
|
||||
|
||||
self.pattern_storage = np.ascontiguousarray(
|
||||
np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1)
|
||||
)
|
||||
self.pattern_storage /= np.max(self.pattern_storage)
|
||||
|
||||
mean = self.pattern_storage.mean(3).mean(2).mean(0)
|
||||
self.mean = [*mean]
|
||||
|
||||
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
|
||||
|
||||
image = self.pattern_storage[index, :, :, :]
|
||||
target = int(self.label_storage[index])
|
||||
return torch.tensor(image), target
|
||||
|
||||
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
"""0. The test image comes in
|
||||
1. is center cropped
|
||||
2. returned.
|
||||
|
||||
This is a 3 channel version (e.g. r,g,b channels).
|
||||
"""
|
||||
|
||||
assert len(cfg.image_statistics.mean) == 3
|
||||
assert len(cfg.image_statistics.the_size) == 2
|
||||
assert cfg.image_statistics.the_size[0] > 0
|
||||
assert cfg.image_statistics.the_size[1] > 0
|
||||
|
||||
# Transformation chain
|
||||
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
|
||||
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
|
||||
)
|
||||
scripted_transforms = torch.jit.script(my_transforms)
|
||||
|
||||
# Preprocess the input data
|
||||
pattern = scripted_transforms(pattern)
|
||||
|
||||
r = pattern[:, 0:1, :, :] + 1e-20
|
||||
g = pattern[:, 1:2, :, :] + 1e-20
|
||||
b = pattern[:, 2:3, :, :] + 1e-20
|
||||
|
||||
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
|
||||
return new_tensor
|
||||
|
||||
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
|
||||
"""0. The training image comes in
|
||||
1. is cropped from a random position
|
||||
2. is randomly horizontally flipped
|
||||
3. is randomly color jitteres
|
||||
4. returned.
|
||||
|
||||
This is a 3 channel version (e.g. r,g,b channels).
|
||||
"""
|
||||
assert len(cfg.image_statistics.mean) == 3
|
||||
assert len(cfg.image_statistics.the_size) == 2
|
||||
assert cfg.image_statistics.the_size[0] > 0
|
||||
assert cfg.image_statistics.the_size[1] > 0
|
||||
|
||||
# Transformation chain
|
||||
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
|
||||
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
|
||||
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
|
||||
tv.transforms.ColorJitter(
|
||||
brightness=cfg.augmentation.jitter_brightness,
|
||||
contrast=cfg.augmentation.jitter_contrast,
|
||||
saturation=cfg.augmentation.jitter_saturation,
|
||||
hue=cfg.augmentation.jitter_hue,
|
||||
),
|
||||
)
|
||||
scripted_transforms = torch.jit.script(my_transforms)
|
||||
|
||||
# Preprocess the input data
|
||||
pattern = scripted_transforms(pattern)
|
||||
|
||||
r = pattern[:, 0:1, :, :] + 1e-20
|
||||
g = pattern[:, 1:2, :, :] + 1e-20
|
||||
b = pattern[:, 2:3, :, :] + 1e-20
|
||||
|
||||
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
|
||||
return new_tensor
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
185
network/Parameter.py
Normal file
185
network/Parameter.py
Normal file
|
@ -0,0 +1,185 @@
|
|||
# %%
|
||||
from dataclasses import dataclass, field
|
||||
import numpy as np
|
||||
import torch
|
||||
import os
|
||||
|
||||
|
||||
@dataclass
|
||||
class Network:
|
||||
"""Parameters of the network. The details about
|
||||
its layers and the number of output neurons."""
|
||||
|
||||
layer_type: list[str] = field(default_factory=list)
|
||||
forward_neuron_numbers: list[list[int]] = field(default_factory=list)
|
||||
forward_kernel_size: list[list[int]] = field(default_factory=list)
|
||||
strides: list[list[int]] = field(default_factory=list)
|
||||
dilation: list[list[int]] = field(default_factory=list)
|
||||
padding: list[list[int]] = field(default_factory=list)
|
||||
|
||||
number_of_output_neurons: int = field(default=0)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LearningParameters:
|
||||
"""Parameter required for training"""
|
||||
|
||||
learning_active: bool = field(default=True)
|
||||
|
||||
loss_mode: int = field(default=0)
|
||||
loss_coeffs_mse: float = field(default=0.5)
|
||||
loss_coeffs_kldiv: float = field(default=1.0)
|
||||
|
||||
optimizer_name: str = field(default="Adam")
|
||||
|
||||
learning_rate_gamma_w: float = field(default=-1.0)
|
||||
learning_rate_threshold_w: float = field(default=0.00001)
|
||||
|
||||
lr_schedule_name: str = field(default="ReduceLROnPlateau")
|
||||
lr_scheduler_use_performance: bool = field(default=False)
|
||||
lr_scheduler_factor_w: float = field(default=0.75)
|
||||
lr_scheduler_patience_w: int = field(default=-1)
|
||||
lr_scheduler_tau_w: int = field(default=10)
|
||||
|
||||
number_of_batches_for_one_update: int = field(default=1)
|
||||
overload_path: str = field(default="Previous")
|
||||
|
||||
weight_noise_range: list[float] = field(default_factory=list)
|
||||
eps_xy_intitial: float = field(default=0.1)
|
||||
|
||||
# disable_scale_grade: bool = field(default=False)
|
||||
# kepp_last_grad_scale: bool = field(default=True)
|
||||
|
||||
sbs_skip_gradient_calculation: list[bool] = field(default_factory=list)
|
||||
|
||||
adapt_learning_rate_after_minibatch: bool = field(default=True)
|
||||
|
||||
w_trainable: list[bool] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Augmentation:
|
||||
"""Parameters used for data augmentation."""
|
||||
|
||||
crop_width_in_pixel: int = field(default=2)
|
||||
|
||||
flip_p: float = field(default=0.5)
|
||||
|
||||
jitter_brightness: float = field(default=0.5)
|
||||
jitter_contrast: float = field(default=0.1)
|
||||
jitter_saturation: float = field(default=0.1)
|
||||
jitter_hue: float = field(default=0.15)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImageStatistics:
|
||||
"""(Statistical) information about the input. i.e.
|
||||
mean values and the x and y size of the input"""
|
||||
|
||||
mean: list[float] = field(default_factory=list)
|
||||
the_size: list[int] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ApproximationSetting:
|
||||
# Approximation CONV2D Layer
|
||||
approximation_enable: list[bool] = field(default_factory=list)
|
||||
number_of_trunc_bits: list[int] = field(default_factory=list)
|
||||
number_of_frac_bits: list[int] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""Master config class."""
|
||||
|
||||
# Sub classes
|
||||
network_structure: Network = field(default_factory=Network)
|
||||
learning_parameters: LearningParameters = field(default_factory=LearningParameters)
|
||||
augmentation: Augmentation = field(default_factory=Augmentation)
|
||||
image_statistics: ImageStatistics = field(default_factory=ImageStatistics)
|
||||
approximation_setting: ApproximationSetting = field(
|
||||
default_factory=ApproximationSetting
|
||||
)
|
||||
|
||||
# For labeling simulations
|
||||
# (not actively used)
|
||||
simulation_id: int = field(default=0)
|
||||
stage_id: int = field(default=-1)
|
||||
|
||||
# Size of one sub-mini-batch
|
||||
# (the number of pattern processed at the same time)
|
||||
batch_size: int = field(default=500)
|
||||
|
||||
# The data set
|
||||
# Identifier for Dataset.oy
|
||||
data_mode: str = field(default="")
|
||||
# The path to the data set
|
||||
data_path: str = field(default="")
|
||||
|
||||
# The epochs identifier
|
||||
epoch_id: int = field(default=0)
|
||||
# Maximum number of epochs
|
||||
epoch_id_max: int = field(default=10000)
|
||||
|
||||
# Number of cpu threads
|
||||
number_of_cpu_processes: int = field(default=-1)
|
||||
# Adjust the number of pattern processed in
|
||||
# one step to the amount of core or with HT threads
|
||||
# of the cpu
|
||||
enable_cpu_thread_balacing: bool = field(default=True)
|
||||
|
||||
# Path for storing information
|
||||
weight_path: str = field(default="Parameters")
|
||||
log_path: str = field(default="Log")
|
||||
|
||||
# Other SbS Settings
|
||||
|
||||
number_of_spikes: list[int] = field(default_factory=list)
|
||||
cooldown_after_number_of_spikes: int = field(default=-1)
|
||||
reduction_cooldown: float = field(default=25.0)
|
||||
|
||||
epsilon_0: float = field(default=1.0)
|
||||
forgetting_offset: float = field(default=-1.0)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Post init determines the number of cores.
|
||||
Creates the required directory and gives us an optimized
|
||||
(for the amount of cores) batch size."""
|
||||
number_of_cpu_processes_temp = os.cpu_count()
|
||||
|
||||
if self.number_of_cpu_processes < 1:
|
||||
if number_of_cpu_processes_temp is None:
|
||||
self.number_of_cpu_processes = 1
|
||||
else:
|
||||
self.number_of_cpu_processes = number_of_cpu_processes_temp
|
||||
|
||||
os.makedirs(self.weight_path, exist_ok=True)
|
||||
|
||||
if self.enable_cpu_thread_balacing is True:
|
||||
self.batch_size = (
|
||||
self.batch_size // self.number_of_cpu_processes
|
||||
) * self.number_of_cpu_processes
|
||||
|
||||
self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes))
|
||||
self.batch_size = int(self.batch_size)
|
||||
|
||||
def get_epsilon_t(self, number_of_spikes: int):
|
||||
"""Generates the time series of the basic epsilon."""
|
||||
t = np.arange(0, number_of_spikes, dtype=np.float32) + 1
|
||||
np_epsilon_t: np.ndarray = t ** (
|
||||
-1.0 / 2.0
|
||||
) # np.ones((number_of_spikes), dtype=np.float32)
|
||||
|
||||
if (self.cooldown_after_number_of_spikes < number_of_spikes) and (
|
||||
self.cooldown_after_number_of_spikes >= 0
|
||||
):
|
||||
np_epsilon_t[
|
||||
self.cooldown_after_number_of_spikes : number_of_spikes
|
||||
] /= self.reduction_cooldown
|
||||
return torch.tensor(np_epsilon_t)
|
||||
|
||||
def get_update_after_x_pattern(self):
|
||||
"""Tells us after how many pattern we need to update the weights."""
|
||||
return (
|
||||
self.batch_size * self.learning_parameters.number_of_batches_for_one_update
|
||||
)
|
714
network/SbS.py
Normal file
714
network/SbS.py
Normal file
|
@ -0,0 +1,714 @@
|
|||
import torch
|
||||
|
||||
from network.CPP.PySpikeGeneration2DManyIP import SpikeGeneration2DManyIP
|
||||
from network.CPP.PyHDynamicCNNManyIP import HDynamicCNNManyIP
|
||||
from network.calculate_output_size import calculate_output_size
|
||||
|
||||
|
||||
class SbS(torch.nn.Module):
|
||||
|
||||
_epsilon_xy: torch.Tensor | None = None
|
||||
_epsilon_0: float
|
||||
_epsilon_t: torch.Tensor | None = None
|
||||
_weights: torch.nn.parameter.Parameter
|
||||
_weights_exists: bool = False
|
||||
_kernel_size: list[int]
|
||||
_stride: list[int]
|
||||
_dilation: list[int]
|
||||
_padding: list[int]
|
||||
_output_size: torch.Tensor
|
||||
_number_of_spikes: int
|
||||
_number_of_cpu_processes: int
|
||||
_number_of_neurons: int
|
||||
_number_of_input_neurons: int
|
||||
_epsilon_xy_intitial: float
|
||||
_h_initial: torch.Tensor | None = None
|
||||
_w_trainable: bool
|
||||
# _last_grad_scale: torch.nn.parameter.Parameter
|
||||
# _keep_last_grad_scale: bool
|
||||
# _disable_scale_grade: bool
|
||||
_forgetting_offset: torch.Tensor | None = None
|
||||
_weight_noise_range: list[float]
|
||||
_skip_gradient_calculation: bool
|
||||
_is_pooling_layer: bool
|
||||
_input_size: list[int]
|
||||
_output_layer: bool = False
|
||||
_local_learning: bool = False
|
||||
|
||||
device: torch.device
|
||||
default_dtype: torch.dtype
|
||||
_gpu_tuning_factor: int
|
||||
|
||||
_max_grad_weights: torch.Tensor | None = None
|
||||
|
||||
_number_of_grad_weight_contributions: float = 0.0
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
number_of_input_neurons: int,
|
||||
number_of_neurons: int,
|
||||
input_size: list[int],
|
||||
forward_kernel_size: list[int],
|
||||
number_of_spikes: int,
|
||||
epsilon_t: torch.Tensor,
|
||||
epsilon_xy_intitial: float = 0.1,
|
||||
epsilon_0: float = 1.0,
|
||||
weight_noise_range: list[float] = [0.0, 1.0],
|
||||
is_pooling_layer: bool = False,
|
||||
strides: list[int] = [1, 1],
|
||||
dilation: list[int] = [0, 0],
|
||||
padding: list[int] = [0, 0],
|
||||
number_of_cpu_processes: int = 1,
|
||||
w_trainable: bool = False,
|
||||
# keep_last_grad_scale: bool = False,
|
||||
# disable_scale_grade: bool = True,
|
||||
forgetting_offset: float = -1.0,
|
||||
skip_gradient_calculation: bool = False,
|
||||
device: torch.device | None = None,
|
||||
default_dtype: torch.dtype | None = None,
|
||||
gpu_tuning_factor: int = 5,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
assert device is not None
|
||||
assert default_dtype is not None
|
||||
self.device = device
|
||||
self.default_dtype = default_dtype
|
||||
|
||||
self._w_trainable = bool(w_trainable)
|
||||
# self._keep_last_grad_scale = bool(keep_last_grad_scale)
|
||||
self._skip_gradient_calculation = bool(skip_gradient_calculation)
|
||||
# self._disable_scale_grade = bool(disable_scale_grade)
|
||||
self._epsilon_xy_intitial = float(epsilon_xy_intitial)
|
||||
self._stride = strides
|
||||
self._dilation = dilation
|
||||
self._padding = padding
|
||||
self._kernel_size = forward_kernel_size
|
||||
self._number_of_input_neurons = int(number_of_input_neurons)
|
||||
self._number_of_neurons = int(number_of_neurons)
|
||||
self._epsilon_0 = float(epsilon_0)
|
||||
self._number_of_cpu_processes = int(number_of_cpu_processes)
|
||||
self._number_of_spikes = int(number_of_spikes)
|
||||
self._weight_noise_range = weight_noise_range
|
||||
self._is_pooling_layer = bool(is_pooling_layer)
|
||||
|
||||
assert len(input_size) == 2
|
||||
self._input_size = input_size
|
||||
|
||||
# The GPU hates me...
|
||||
# Too many SbS threads == bad
|
||||
# Thus I need to limit them...
|
||||
# (Reminder: We cannot access the mini-batch size here,
|
||||
# which is part of the GPU thread size calculation...)
|
||||
if (self._input_size[0] * self._input_size[1]) > gpu_tuning_factor:
|
||||
self._gpu_tuning_factor = gpu_tuning_factor
|
||||
else:
|
||||
self._gpu_tuning_factor = 0
|
||||
|
||||
# self._last_grad_scale = torch.nn.parameter.Parameter(
|
||||
# torch.tensor(-1.0, dtype=self.default_dtype),
|
||||
# requires_grad=True,
|
||||
# )
|
||||
|
||||
self._forgetting_offset = torch.tensor(
|
||||
forgetting_offset, dtype=self.default_dtype, device=self.device
|
||||
)
|
||||
|
||||
self.epsilon_t = epsilon_t.type(dtype=self.default_dtype).to(device=self.device)
|
||||
|
||||
self._output_size = calculate_output_size(
|
||||
value=input_size,
|
||||
kernel_size=self._kernel_size,
|
||||
stride=self._stride,
|
||||
dilation=self._dilation,
|
||||
padding=self._padding,
|
||||
)
|
||||
|
||||
self.set_h_init_to_uniform()
|
||||
|
||||
self.functional_sbs = FunctionalSbS.apply
|
||||
|
||||
# ###############################################################
|
||||
# Initialize the weights
|
||||
# ###############################################################
|
||||
|
||||
if self._is_pooling_layer is True:
|
||||
self.weights = self._make_pooling_weights()
|
||||
|
||||
else:
|
||||
assert len(self._weight_noise_range) == 2
|
||||
weights = torch.empty(
|
||||
(
|
||||
int(self._kernel_size[0])
|
||||
* int(self._kernel_size[1])
|
||||
* int(self._number_of_input_neurons),
|
||||
int(self._number_of_neurons),
|
||||
),
|
||||
dtype=self.default_dtype,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
torch.nn.init.uniform_(
|
||||
weights,
|
||||
a=float(self._weight_noise_range[0]),
|
||||
b=float(self._weight_noise_range[1]),
|
||||
)
|
||||
self.weights = weights
|
||||
|
||||
####################################################################
|
||||
# Variables in and out #
|
||||
####################################################################
|
||||
|
||||
@property
|
||||
def epsilon_t(self) -> torch.Tensor | None:
|
||||
return self._epsilon_t
|
||||
|
||||
@epsilon_t.setter
|
||||
def epsilon_t(self, value: torch.Tensor):
|
||||
assert value is not None
|
||||
assert torch.is_tensor(value) is True
|
||||
assert value.dim() == 1
|
||||
assert value.dtype == self.default_dtype
|
||||
self._epsilon_t = (
|
||||
value.detach()
|
||||
.clone(memory_format=torch.contiguous_format)
|
||||
.type(dtype=self.default_dtype)
|
||||
.to(device=self.device)
|
||||
.requires_grad_(False)
|
||||
)
|
||||
|
||||
@property
|
||||
def weights(self) -> torch.Tensor | None:
|
||||
if self._weights_exists is False:
|
||||
return None
|
||||
else:
|
||||
return self._weights
|
||||
|
||||
@weights.setter
|
||||
def weights(self, value: torch.Tensor):
|
||||
assert value is not None
|
||||
assert torch.is_tensor(value) is True
|
||||
assert value.dim() == 2
|
||||
temp: torch.Tensor = (
|
||||
value.detach()
|
||||
.clone(memory_format=torch.contiguous_format)
|
||||
.type(dtype=self.default_dtype)
|
||||
.to(device=self.device)
|
||||
)
|
||||
temp /= temp.sum(dim=0, keepdim=True, dtype=self.default_dtype)
|
||||
if self._weights_exists is False:
|
||||
self._weights = torch.nn.parameter.Parameter(temp, requires_grad=True)
|
||||
self._weights_exists = True
|
||||
else:
|
||||
self._weights.data = temp
|
||||
|
||||
@property
|
||||
def h_initial(self) -> torch.Tensor | None:
|
||||
return self._h_initial
|
||||
|
||||
@h_initial.setter
|
||||
def h_initial(self, value: torch.Tensor):
|
||||
assert value is not None
|
||||
assert torch.is_tensor(value) is True
|
||||
assert value.dim() == 1
|
||||
assert value.dtype == self.default_dtype
|
||||
self._h_initial = (
|
||||
value.detach()
|
||||
.clone(memory_format=torch.contiguous_format)
|
||||
.type(dtype=self.default_dtype)
|
||||
.to(device=self.device)
|
||||
.requires_grad_(False)
|
||||
)
|
||||
|
||||
def update_pre_care(self):
|
||||
|
||||
if self._weights.grad is not None:
|
||||
assert self._number_of_grad_weight_contributions > 0
|
||||
self._weights.grad /= self._number_of_grad_weight_contributions
|
||||
self._number_of_grad_weight_contributions = 0.0
|
||||
|
||||
def update_after_care(self, threshold_weight: float):
|
||||
|
||||
if self._w_trainable is True:
|
||||
self.norm_weights()
|
||||
self.threshold_weights(threshold_weight)
|
||||
self.norm_weights()
|
||||
|
||||
# def after_batch(self, new_state: bool = False):
|
||||
# if self._keep_last_grad_scale is True:
|
||||
# self._last_grad_scale.data = self._last_grad_scale.grad
|
||||
# self._keep_last_grad_scale = new_state
|
||||
|
||||
# self._last_grad_scale.grad = torch.zeros_like(self._last_grad_scale.grad)
|
||||
|
||||
####################################################################
|
||||
# Helper functions #
|
||||
####################################################################
|
||||
|
||||
def _make_pooling_weights(self) -> torch.Tensor:
|
||||
"""For generating the pooling weights."""
|
||||
|
||||
assert self._number_of_neurons is not None
|
||||
assert self._kernel_size is not None
|
||||
|
||||
weights: torch.Tensor = torch.zeros(
|
||||
(
|
||||
int(self._kernel_size[0]),
|
||||
int(self._kernel_size[1]),
|
||||
int(self._number_of_neurons),
|
||||
int(self._number_of_neurons),
|
||||
),
|
||||
dtype=self.default_dtype,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
for i in range(0, int(self._number_of_neurons)):
|
||||
weights[:, :, i, i] = 1.0
|
||||
|
||||
weights = weights.moveaxis(-1, 0).moveaxis(-1, 1)
|
||||
|
||||
weights = torch.nn.functional.unfold(
|
||||
input=weights,
|
||||
kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])),
|
||||
dilation=(1, 1),
|
||||
padding=(0, 0),
|
||||
stride=(1, 1),
|
||||
).squeeze()
|
||||
|
||||
weights = torch.moveaxis(weights, 0, 1)
|
||||
|
||||
return weights
|
||||
|
||||
def set_h_init_to_uniform(self) -> None:
|
||||
|
||||
assert self._number_of_neurons > 2
|
||||
|
||||
self.h_initial: torch.Tensor = torch.full(
|
||||
(self._number_of_neurons,),
|
||||
(1.0 / float(self._number_of_neurons)),
|
||||
dtype=self.default_dtype,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
def norm_weights(self) -> None:
|
||||
assert self._weights_exists is True
|
||||
temp: torch.Tensor = (
|
||||
self._weights.data.detach()
|
||||
.clone(memory_format=torch.contiguous_format)
|
||||
.type(dtype=self.default_dtype)
|
||||
.to(device=self.device)
|
||||
)
|
||||
temp /= temp.sum(dim=0, keepdim=True, dtype=self.default_dtype)
|
||||
self._weights.data = temp
|
||||
|
||||
def threshold_weights(self, threshold: float) -> None:
|
||||
assert self._weights_exists is True
|
||||
assert threshold >= 0
|
||||
|
||||
torch.clamp(
|
||||
self._weights.data,
|
||||
min=float(threshold),
|
||||
max=None,
|
||||
out=self._weights.data,
|
||||
)
|
||||
|
||||
####################################################################
|
||||
# Forward #
|
||||
####################################################################
|
||||
|
||||
def forward(
|
||||
self, input: torch.Tensor, labels: torch.Tensor | None = None
|
||||
) -> torch.Tensor:
|
||||
|
||||
# Are we happy with the input?
|
||||
assert input is not None
|
||||
assert torch.is_tensor(input) is True
|
||||
assert input.dim() == 4
|
||||
assert input.dtype == self.default_dtype
|
||||
assert input.shape[1] == self._number_of_input_neurons
|
||||
assert input.shape[2] == self._input_size[0]
|
||||
assert input.shape[3] == self._input_size[1]
|
||||
|
||||
# Are we happy with the rest of the network?
|
||||
assert self._epsilon_0 is not None
|
||||
assert self._epsilon_t is not None
|
||||
|
||||
assert self._h_initial is not None
|
||||
assert self._forgetting_offset is not None
|
||||
|
||||
assert self._weights_exists is True
|
||||
assert self._weights is not None
|
||||
|
||||
input_convolved = torch.nn.functional.fold(
|
||||
torch.nn.functional.unfold(
|
||||
input.requires_grad_(True),
|
||||
kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])),
|
||||
dilation=(int(self._dilation[0]), int(self._dilation[1])),
|
||||
padding=(int(self._padding[0]), int(self._padding[1])),
|
||||
stride=(int(self._stride[0]), int(self._stride[1])),
|
||||
),
|
||||
output_size=tuple(self._output_size.tolist()),
|
||||
kernel_size=(1, 1),
|
||||
dilation=(1, 1),
|
||||
padding=(0, 0),
|
||||
stride=(1, 1),
|
||||
)
|
||||
|
||||
epsilon_t_0: torch.Tensor = (
|
||||
(self._epsilon_t * self._epsilon_0).type(input.dtype).to(input.device)
|
||||
)
|
||||
|
||||
parameter_list = torch.tensor(
|
||||
[
|
||||
int(self._w_trainable), # 0
|
||||
int(0), # int(self._disable_scale_grade), # 1
|
||||
int(0), # int(self._keep_last_grad_scale), # 2
|
||||
int(self._skip_gradient_calculation), # 3
|
||||
int(self._number_of_spikes), # 4
|
||||
int(self._number_of_cpu_processes), # 5
|
||||
int(self._output_size[0]), # 6
|
||||
int(self._output_size[1]), # 7
|
||||
int(self._gpu_tuning_factor), # 8
|
||||
int(self._output_layer), # 9
|
||||
int(self._local_learning), # 10
|
||||
],
|
||||
dtype=torch.int64,
|
||||
)
|
||||
|
||||
if self._epsilon_xy is None:
|
||||
self._epsilon_xy = torch.full(
|
||||
(
|
||||
input_convolved.shape[1],
|
||||
input_convolved.shape[2],
|
||||
input_convolved.shape[3],
|
||||
),
|
||||
float(self._epsilon_xy_intitial),
|
||||
dtype=self.default_dtype,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
assert self._epsilon_xy is not None
|
||||
# In the case somebody tried to replace the matrix with wrong dimensions
|
||||
assert self._epsilon_xy.shape[0] == input_convolved.shape[1]
|
||||
assert self._epsilon_xy.shape[1] == input_convolved.shape[2]
|
||||
assert self._epsilon_xy.shape[2] == input_convolved.shape[3]
|
||||
|
||||
# SbS forward functional
|
||||
output = self.functional_sbs(
|
||||
input_convolved,
|
||||
self._epsilon_xy,
|
||||
epsilon_t_0,
|
||||
self._weights,
|
||||
self._h_initial,
|
||||
parameter_list,
|
||||
# self._last_grad_scale,
|
||||
self._forgetting_offset,
|
||||
)
|
||||
|
||||
self._number_of_grad_weight_contributions += (
|
||||
output.shape[0] * output.shape[-2] * output.shape[-1]
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
class FunctionalSbS(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward( # type: ignore
|
||||
ctx,
|
||||
input: torch.Tensor,
|
||||
epsilon_xy: torch.Tensor,
|
||||
epsilon_t_0: torch.Tensor,
|
||||
weights: torch.Tensor,
|
||||
h_initial: torch.Tensor,
|
||||
parameter_list: torch.Tensor,
|
||||
# grad_output_scale: torch.Tensor,
|
||||
forgetting_offset: torch.Tensor,
|
||||
) -> torch.Tensor:
|
||||
|
||||
assert input.dim() == 4
|
||||
|
||||
number_of_spikes: int = int(parameter_list[4])
|
||||
|
||||
if input.device == torch.device("cpu"):
|
||||
spike_number_of_cpu_processes: int = int(parameter_list[5])
|
||||
else:
|
||||
spike_number_of_cpu_processes = -1
|
||||
|
||||
if input.device == torch.device("cpu"):
|
||||
hdyn_number_of_cpu_processes: int = int(parameter_list[5])
|
||||
else:
|
||||
hdyn_number_of_cpu_processes = -1
|
||||
|
||||
output_size_0: int = int(parameter_list[6])
|
||||
output_size_1: int = int(parameter_list[7])
|
||||
gpu_tuning_factor: int = int(parameter_list[8])
|
||||
|
||||
# ###########################################################
|
||||
# Spike generation
|
||||
# ###########################################################
|
||||
|
||||
# ############################################
|
||||
# Normalized cumsum
|
||||
# (beware of the pytorch bug! Thus .clone()!)
|
||||
# ############################################
|
||||
input_cumsum: torch.Tensor = torch.cumsum(input, dim=1, dtype=input.dtype)
|
||||
input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1).clone()
|
||||
input_cumsum /= input_cumsum_last
|
||||
|
||||
# ############################################
|
||||
# Get the required random numbers
|
||||
# ############################################
|
||||
random_values = torch.rand(
|
||||
size=[
|
||||
input_cumsum.shape[0],
|
||||
number_of_spikes,
|
||||
input_cumsum.shape[2],
|
||||
input_cumsum.shape[3],
|
||||
],
|
||||
dtype=input.dtype,
|
||||
device=input.device,
|
||||
)
|
||||
|
||||
# ############################################
|
||||
# Make space for the results
|
||||
# ############################################
|
||||
spikes = torch.empty_like(random_values, dtype=torch.int64, device=input.device)
|
||||
|
||||
assert input_cumsum.is_contiguous() is True
|
||||
assert random_values.is_contiguous() is True
|
||||
assert spikes.is_contiguous() is True
|
||||
|
||||
# time_start: float = time.perf_counter()
|
||||
spike_generation: SpikeGeneration2DManyIP = SpikeGeneration2DManyIP()
|
||||
|
||||
spike_generation.spike_generation(
|
||||
input_cumsum.data_ptr(),
|
||||
int(input_cumsum.shape[0]),
|
||||
int(input_cumsum.shape[1]),
|
||||
int(input_cumsum.shape[2]),
|
||||
int(input_cumsum.shape[3]),
|
||||
random_values.data_ptr(),
|
||||
int(random_values.shape[0]),
|
||||
int(random_values.shape[1]),
|
||||
int(random_values.shape[2]),
|
||||
int(random_values.shape[3]),
|
||||
spikes.data_ptr(),
|
||||
int(spikes.shape[0]),
|
||||
int(spikes.shape[1]),
|
||||
int(spikes.shape[2]),
|
||||
int(spikes.shape[3]),
|
||||
int(spike_number_of_cpu_processes),
|
||||
)
|
||||
del random_values
|
||||
del input_cumsum
|
||||
|
||||
# ###########################################################
|
||||
# H dynamic
|
||||
# ###########################################################
|
||||
|
||||
assert epsilon_t_0.ndim == 1
|
||||
assert epsilon_t_0.shape[0] >= number_of_spikes
|
||||
|
||||
# ############################################
|
||||
# Make space for the results
|
||||
# ############################################
|
||||
|
||||
output = torch.empty(
|
||||
(
|
||||
int(input.shape[0]),
|
||||
int(weights.shape[1]),
|
||||
output_size_0,
|
||||
output_size_1,
|
||||
),
|
||||
dtype=input.dtype,
|
||||
device=input.device,
|
||||
)
|
||||
|
||||
assert output.is_contiguous() is True
|
||||
assert epsilon_xy.is_contiguous() is True
|
||||
assert epsilon_t_0.is_contiguous() is True
|
||||
assert weights.is_contiguous() is True
|
||||
assert spikes.is_contiguous() is True
|
||||
assert h_initial.is_contiguous() is True
|
||||
|
||||
assert epsilon_xy.ndim == 3
|
||||
assert weights.ndim == 2
|
||||
assert h_initial.ndim == 1
|
||||
|
||||
h_dynamic: HDynamicCNNManyIP = HDynamicCNNManyIP()
|
||||
|
||||
h_dynamic.update(
|
||||
output.data_ptr(),
|
||||
int(output.shape[0]),
|
||||
int(output.shape[1]),
|
||||
int(output.shape[2]),
|
||||
int(output.shape[3]),
|
||||
epsilon_xy.data_ptr(),
|
||||
int(epsilon_xy.shape[0]),
|
||||
int(epsilon_xy.shape[1]),
|
||||
int(epsilon_xy.shape[2]),
|
||||
epsilon_t_0.data_ptr(),
|
||||
int(epsilon_t_0.shape[0]),
|
||||
weights.data_ptr(),
|
||||
int(weights.shape[0]),
|
||||
int(weights.shape[1]),
|
||||
spikes.data_ptr(),
|
||||
int(spikes.shape[0]),
|
||||
int(spikes.shape[1]),
|
||||
int(spikes.shape[2]),
|
||||
int(spikes.shape[3]),
|
||||
h_initial.data_ptr(),
|
||||
int(h_initial.shape[0]),
|
||||
hdyn_number_of_cpu_processes,
|
||||
float(forgetting_offset.item()),
|
||||
int(gpu_tuning_factor),
|
||||
)
|
||||
del spikes
|
||||
|
||||
# ###########################################################
|
||||
# Save the necessary data for the backward pass
|
||||
# ###########################################################
|
||||
|
||||
ctx.save_for_backward(
|
||||
input,
|
||||
weights,
|
||||
output,
|
||||
parameter_list,
|
||||
# grad_output_scale,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
# ##############################################
|
||||
# Get the variables back
|
||||
# ##############################################
|
||||
(
|
||||
input,
|
||||
weights,
|
||||
output,
|
||||
parameter_list,
|
||||
# last_grad_scale,
|
||||
) = ctx.saved_tensors
|
||||
|
||||
# ##############################################
|
||||
# Default output
|
||||
# ##############################################
|
||||
grad_input = None
|
||||
grad_eps_xy = None
|
||||
grad_epsilon_t_0 = None
|
||||
grad_weights = None
|
||||
grad_h_initial = None
|
||||
grad_parameter_list = None
|
||||
grad_forgetting_offset = None
|
||||
|
||||
# ##############################################
|
||||
# Parameters
|
||||
# ##############################################
|
||||
parameter_w_trainable: bool = bool(parameter_list[0])
|
||||
# parameter_disable_scale_grade: bool = bool(parameter_list[1])
|
||||
# parameter_keep_last_grad_scale: bool = bool(parameter_list[2])
|
||||
parameter_skip_gradient_calculation: bool = bool(parameter_list[3])
|
||||
parameter_output_layer: bool = bool(parameter_list[9])
|
||||
parameter_local_learning: bool = bool(parameter_list[10])
|
||||
|
||||
# ##############################################
|
||||
# Dealing with overall scale of the gradient
|
||||
# ##############################################
|
||||
# if parameter_disable_scale_grade is False:
|
||||
# if parameter_keep_last_grad_scale is True:
|
||||
# last_grad_scale = torch.tensor(
|
||||
# [torch.abs(grad_output).max(), last_grad_scale]
|
||||
# ).max()
|
||||
# grad_output /= last_grad_scale
|
||||
# grad_output_scale = last_grad_scale.clone()
|
||||
|
||||
input /= input.sum(dim=1, keepdim=True, dtype=weights.dtype)
|
||||
|
||||
# #################################################
|
||||
# User doesn't want us to calculate the gradients
|
||||
# #################################################
|
||||
|
||||
if parameter_skip_gradient_calculation is True:
|
||||
|
||||
return (
|
||||
grad_input,
|
||||
grad_eps_xy,
|
||||
grad_epsilon_t_0,
|
||||
grad_weights,
|
||||
grad_h_initial,
|
||||
grad_parameter_list,
|
||||
# grad_output_scale,
|
||||
grad_forgetting_offset,
|
||||
)
|
||||
|
||||
# #################################################
|
||||
# Calculate backprop error (grad_input)
|
||||
# #################################################
|
||||
|
||||
backprop_r: torch.Tensor = weights.unsqueeze(0).unsqueeze(-1).unsqueeze(
|
||||
-1
|
||||
) * output.unsqueeze(1)
|
||||
|
||||
backprop_bigr: torch.Tensor = backprop_r.sum(dim=2)
|
||||
|
||||
backprop_z: torch.Tensor = backprop_r * (
|
||||
1.0 / (backprop_bigr + 1e-20)
|
||||
).unsqueeze(2)
|
||||
grad_input: torch.Tensor = (backprop_z * grad_output.unsqueeze(1)).sum(2)
|
||||
del backprop_z
|
||||
|
||||
# #################################################
|
||||
# Calculate weight gradient (grad_weights)
|
||||
# #################################################
|
||||
|
||||
if parameter_w_trainable is False:
|
||||
|
||||
# #################################################
|
||||
# We don't train this weight
|
||||
# #################################################
|
||||
grad_weights = None
|
||||
|
||||
elif (parameter_output_layer is False) and (parameter_local_learning is True):
|
||||
# #################################################
|
||||
# Local learning
|
||||
# #################################################
|
||||
grad_weights = (
|
||||
(-2 * (input - backprop_bigr).unsqueeze(2) * output.unsqueeze(1))
|
||||
.sum(0)
|
||||
.sum(-1)
|
||||
.sum(-1)
|
||||
)
|
||||
|
||||
else:
|
||||
# #################################################
|
||||
# Backprop
|
||||
# #################################################
|
||||
backprop_f: torch.Tensor = output.unsqueeze(1) * (
|
||||
input / (backprop_bigr**2 + 1e-20)
|
||||
).unsqueeze(2)
|
||||
|
||||
result_omega: torch.Tensor = backprop_bigr.unsqueeze(
|
||||
2
|
||||
) * grad_output.unsqueeze(1)
|
||||
result_omega -= (backprop_r * grad_output.unsqueeze(1)).sum(2).unsqueeze(2)
|
||||
result_omega *= backprop_f
|
||||
del backprop_f
|
||||
grad_weights = result_omega.sum(0).sum(-1).sum(-1)
|
||||
del result_omega
|
||||
|
||||
del backprop_bigr
|
||||
del backprop_r
|
||||
|
||||
return (
|
||||
grad_input,
|
||||
grad_eps_xy,
|
||||
grad_epsilon_t_0,
|
||||
grad_weights,
|
||||
grad_h_initial,
|
||||
grad_parameter_list,
|
||||
# grad_output_scale,
|
||||
grad_forgetting_offset,
|
||||
)
|
106
network/SbSLRScheduler.py
Normal file
106
network/SbSLRScheduler.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
import torch
|
||||
|
||||
|
||||
class SbSLRScheduler(torch.optim.lr_scheduler.ReduceLROnPlateau):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer,
|
||||
mode: str = "min",
|
||||
factor: float = 0.1,
|
||||
patience: int = 10,
|
||||
threshold: float = 1e-4,
|
||||
threshold_mode: str = "rel",
|
||||
cooldown: int = 0,
|
||||
min_lr: float = 0,
|
||||
eps: float = 1e-8,
|
||||
verbose: bool = False,
|
||||
tau: float = 10,
|
||||
) -> None:
|
||||
|
||||
super().__init__(
|
||||
optimizer=optimizer,
|
||||
mode=mode,
|
||||
factor=factor,
|
||||
patience=patience,
|
||||
threshold=threshold,
|
||||
threshold_mode=threshold_mode,
|
||||
cooldown=cooldown,
|
||||
min_lr=min_lr,
|
||||
eps=eps,
|
||||
verbose=verbose,
|
||||
)
|
||||
self.lowpass_tau: float = tau
|
||||
self.lowpass_decay_value: float = 1.0 - (1.0 / self.lowpass_tau)
|
||||
|
||||
self.lowpass_number_of_steps: int = 0
|
||||
self.loss_maximum_over_time: float | None = None
|
||||
self.lowpass_memory: float = 0.0
|
||||
self.lowpass_learning_rate_minimum_over_time: float | None = None
|
||||
self.lowpass_learning_rate_minimum_over_time_past_step: float | None = None
|
||||
|
||||
self.previous_learning_rate: float | None = None
|
||||
self.loss_normalized_past_step: float | None = None
|
||||
|
||||
def step(self, metrics, epoch=None) -> None:
|
||||
|
||||
loss = float(metrics)
|
||||
|
||||
if self.loss_maximum_over_time is None:
|
||||
self.loss_maximum_over_time = loss
|
||||
|
||||
if self.loss_normalized_past_step is None:
|
||||
self.loss_normalized_past_step = loss / self.loss_maximum_over_time
|
||||
|
||||
if self.previous_learning_rate is None:
|
||||
self.previous_learning_rate = self.optimizer.param_groups[-1]["lr"] # type: ignore
|
||||
|
||||
# The parent lr scheduler controlls the basic learn rate
|
||||
self.previous_learning_rate = self.optimizer.param_groups[-1]["lr"] # type: ignore
|
||||
super().step(metrics=self.loss_normalized_past_step, epoch=epoch)
|
||||
|
||||
# If the parent changes the base learning rate,
|
||||
# then we reset the adaptive part
|
||||
if self.optimizer.param_groups[-1]["lr"] != self.previous_learning_rate: # type: ignore
|
||||
self.previous_learning_rate = self.optimizer.param_groups[-1]["lr"] # type: ignore
|
||||
|
||||
self.lowpass_number_of_steps = 0
|
||||
self.loss_maximum_over_time = None
|
||||
self.lowpass_memory = 0.0
|
||||
self.lowpass_learning_rate_minimum_over_time = None
|
||||
self.lowpass_learning_rate_minimum_over_time_past_step = None
|
||||
|
||||
if self.loss_maximum_over_time is None:
|
||||
self.loss_maximum_over_time = loss
|
||||
else:
|
||||
self.loss_maximum_over_time = max(self.loss_maximum_over_time, loss)
|
||||
|
||||
self.lowpass_number_of_steps += 1
|
||||
|
||||
self.lowpass_memory = self.lowpass_memory * self.lowpass_decay_value + (
|
||||
loss / self.loss_maximum_over_time
|
||||
) * (1.0 / self.lowpass_tau)
|
||||
|
||||
loss_normalized: float = self.lowpass_memory / (
|
||||
1.0 - self.lowpass_decay_value ** float(self.lowpass_number_of_steps)
|
||||
)
|
||||
|
||||
if self.lowpass_learning_rate_minimum_over_time is None:
|
||||
self.lowpass_learning_rate_minimum_over_time = loss_normalized
|
||||
else:
|
||||
self.lowpass_learning_rate_minimum_over_time = min(
|
||||
self.lowpass_learning_rate_minimum_over_time, loss_normalized
|
||||
)
|
||||
|
||||
if self.lowpass_learning_rate_minimum_over_time_past_step is None:
|
||||
self.lowpass_learning_rate_minimum_over_time_past_step = (
|
||||
self.lowpass_learning_rate_minimum_over_time
|
||||
)
|
||||
|
||||
self.optimizer.param_groups[-1]["lr"] *= ( # type: ignore
|
||||
self.lowpass_learning_rate_minimum_over_time
|
||||
/ self.lowpass_learning_rate_minimum_over_time_past_step
|
||||
)
|
||||
self.lowpass_learning_rate_minimum_over_time_past_step = (
|
||||
self.lowpass_learning_rate_minimum_over_time
|
||||
)
|
||||
self.loss_normalized_past_step = loss_normalized
|
54
network/SplitOnOffLayer.py
Normal file
54
network/SplitOnOffLayer.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import torch
|
||||
|
||||
|
||||
class SplitOnOffLayer(torch.nn.Module):
|
||||
|
||||
device: torch.device
|
||||
default_dtype: torch.dtype
|
||||
|
||||
mean: torch.Tensor | None = None
|
||||
epsilon: float = 0.01
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
device: torch.device | None = None,
|
||||
default_dtype: torch.dtype | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
assert device is not None
|
||||
assert default_dtype is not None
|
||||
self.device = device
|
||||
self.default_dtype = default_dtype
|
||||
|
||||
####################################################################
|
||||
# Forward #
|
||||
####################################################################
|
||||
|
||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||
assert input.ndim == 4
|
||||
|
||||
# self.training is switched by network.eval() and network.train()
|
||||
if self.training is True:
|
||||
mean_temp = (
|
||||
input.mean(dim=0, keepdim=True)
|
||||
.mean(dim=1, keepdim=True)
|
||||
.detach()
|
||||
.clone()
|
||||
)
|
||||
|
||||
if self.mean is None:
|
||||
self.mean = mean_temp
|
||||
else:
|
||||
self.mean = (1.0 - self.epsilon) * self.mean + self.epsilon * mean_temp
|
||||
|
||||
assert self.mean is not None
|
||||
|
||||
temp = input - self.mean.detach().clone()
|
||||
temp_a = torch.nn.functional.relu(temp)
|
||||
temp_b = torch.nn.functional.relu(-temp)
|
||||
output = torch.cat((temp_a, temp_b), dim=1)
|
||||
|
||||
output /= output.sum(dim=1, keepdim=True) + 1e-20
|
||||
|
||||
return output
|
68
network/build_datasets.py
Normal file
68
network/build_datasets.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
# %%
|
||||
import torch
|
||||
from network.Dataset import (
|
||||
DatasetMaster,
|
||||
DatasetCIFAR,
|
||||
DatasetMNIST,
|
||||
DatasetFashionMNIST,
|
||||
)
|
||||
from network.Parameter import Config
|
||||
|
||||
|
||||
def build_datasets(
|
||||
cfg: Config,
|
||||
) -> tuple[
|
||||
DatasetMaster,
|
||||
DatasetMaster,
|
||||
torch.utils.data.DataLoader,
|
||||
torch.utils.data.DataLoader,
|
||||
]:
|
||||
|
||||
# Load the input data
|
||||
the_dataset_train: DatasetMaster
|
||||
the_dataset_test: DatasetMaster
|
||||
if cfg.data_mode == "CIFAR10":
|
||||
the_dataset_train = DatasetCIFAR(
|
||||
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
|
||||
)
|
||||
the_dataset_test = DatasetCIFAR(
|
||||
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
|
||||
)
|
||||
elif cfg.data_mode == "MNIST":
|
||||
the_dataset_train = DatasetMNIST(
|
||||
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
|
||||
)
|
||||
the_dataset_test = DatasetMNIST(
|
||||
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
|
||||
)
|
||||
elif cfg.data_mode == "MNIST_FASHION":
|
||||
the_dataset_train = DatasetFashionMNIST(
|
||||
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
|
||||
)
|
||||
the_dataset_test = DatasetFashionMNIST(
|
||||
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
|
||||
)
|
||||
else:
|
||||
raise Exception("data_mode unknown")
|
||||
|
||||
if len(cfg.image_statistics.mean) == 0:
|
||||
cfg.image_statistics.mean = the_dataset_train.mean
|
||||
|
||||
# The basic size
|
||||
cfg.image_statistics.the_size = [
|
||||
the_dataset_train.pattern_storage.shape[2],
|
||||
the_dataset_train.pattern_storage.shape[3],
|
||||
]
|
||||
|
||||
# Minus the stuff we cut away in the pattern filter
|
||||
cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel
|
||||
cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel
|
||||
|
||||
my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
|
||||
the_dataset_test, batch_size=cfg.batch_size, shuffle=False
|
||||
)
|
||||
my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
|
||||
the_dataset_train, batch_size=cfg.batch_size, shuffle=True
|
||||
)
|
||||
|
||||
return the_dataset_train, the_dataset_test, my_loader_test, my_loader_train
|
86
network/build_lr_scheduler.py
Normal file
86
network/build_lr_scheduler.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
# %%
|
||||
import torch
|
||||
from network.Parameter import Config
|
||||
|
||||
try:
|
||||
from network.SbSLRScheduler import SbSLRScheduler
|
||||
|
||||
sbs_lr_scheduler: bool = True
|
||||
except Exception:
|
||||
sbs_lr_scheduler = False
|
||||
|
||||
|
||||
def build_lr_scheduler(
|
||||
optimizer, cfg: Config, logging
|
||||
) -> list[torch.optim.lr_scheduler.ReduceLROnPlateau | SbSLRScheduler | None]:
|
||||
|
||||
assert len(optimizer) > 0
|
||||
|
||||
lr_scheduler_list: list[
|
||||
torch.optim.lr_scheduler.ReduceLROnPlateau | SbSLRScheduler | None
|
||||
] = []
|
||||
|
||||
for id_optimizer in range(0, len(optimizer)):
|
||||
|
||||
if cfg.learning_parameters.lr_schedule_name == "None":
|
||||
logging.info(f"Using lr scheduler for optimizer {id_optimizer} : None")
|
||||
|
||||
lr_scheduler_list.append(None)
|
||||
|
||||
elif cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau":
|
||||
logging.info(
|
||||
f"Using lr scheduler for optimizer {id_optimizer}: ReduceLROnPlateau"
|
||||
)
|
||||
|
||||
if optimizer[id_optimizer] is None:
|
||||
lr_scheduler_list.append(None)
|
||||
elif (cfg.learning_parameters.lr_scheduler_factor_w <= 0) or (
|
||||
cfg.learning_parameters.lr_scheduler_patience_w <= 0
|
||||
):
|
||||
lr_scheduler_list.append(
|
||||
torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer[id_optimizer],eps=1e-14,
|
||||
)
|
||||
)
|
||||
else:
|
||||
lr_scheduler_list.append(
|
||||
torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer[id_optimizer],
|
||||
factor=cfg.learning_parameters.lr_scheduler_factor_w,
|
||||
patience=cfg.learning_parameters.lr_scheduler_patience_w,
|
||||
eps=1e-14,
|
||||
)
|
||||
)
|
||||
|
||||
elif cfg.learning_parameters.lr_schedule_name == "SbSLRScheduler":
|
||||
logging.info(
|
||||
f"Using lr scheduler for optimizer {id_optimizer}: SbSLRScheduler"
|
||||
)
|
||||
|
||||
if sbs_lr_scheduler is False:
|
||||
raise Exception(
|
||||
f"lr_scheduler for optimizer {id_optimizer}: SbSLRScheduler.py missing"
|
||||
)
|
||||
|
||||
if optimizer[id_optimizer] is None:
|
||||
lr_scheduler_list.append(None)
|
||||
elif (
|
||||
(cfg.learning_parameters.lr_scheduler_factor_w <= 0)
|
||||
or (cfg.learning_parameters.lr_scheduler_patience_w <= 0)
|
||||
or (cfg.learning_parameters.lr_scheduler_tau_w <= 0)
|
||||
):
|
||||
lr_scheduler_list.append(None)
|
||||
else:
|
||||
lr_scheduler_list.append(
|
||||
SbSLRScheduler(
|
||||
optimizer[id_optimizer],
|
||||
factor=cfg.learning_parameters.lr_scheduler_factor_w,
|
||||
patience=cfg.learning_parameters.lr_scheduler_patience_w,
|
||||
tau=cfg.learning_parameters.lr_scheduler_tau_w,
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
raise Exception("lr_scheduler not implemented")
|
||||
|
||||
return lr_scheduler_list
|
354
network/build_network.py
Normal file
354
network/build_network.py
Normal file
|
@ -0,0 +1,354 @@
|
|||
# %%
|
||||
import torch
|
||||
|
||||
from network.calculate_output_size import calculate_output_size
|
||||
from network.Parameter import Config
|
||||
from network.SbS import SbS
|
||||
from network.SplitOnOffLayer import SplitOnOffLayer
|
||||
from network.Conv2dApproximation import Conv2dApproximation
|
||||
|
||||
|
||||
def build_network(
|
||||
cfg: Config, device: torch.device, default_dtype: torch.dtype, logging
|
||||
) -> torch.nn.Sequential:
|
||||
network = torch.nn.Sequential()
|
||||
input_size: list[list[int]] = []
|
||||
input_size.append(cfg.image_statistics.the_size)
|
||||
|
||||
for layer_id in range(0, len(cfg.network_structure.layer_type)):
|
||||
# #############################################################
|
||||
# Show infos about the layer:
|
||||
# #############################################################
|
||||
logging.info("")
|
||||
logging.info(f"Layer ID: {layer_id}")
|
||||
logging.info(f"Layer type: {cfg.network_structure.layer_type[layer_id]}")
|
||||
|
||||
# #############################################################
|
||||
# Fill in the default values
|
||||
# #############################################################
|
||||
|
||||
kernel_size: list[int] = [1, 1]
|
||||
if len(cfg.network_structure.forward_kernel_size) > layer_id:
|
||||
kernel_size = cfg.network_structure.forward_kernel_size[layer_id]
|
||||
|
||||
padding: list[int] = [0, 0]
|
||||
if len(cfg.network_structure.padding) > layer_id:
|
||||
padding = cfg.network_structure.padding[layer_id]
|
||||
|
||||
dilation: list[int] = [1, 1]
|
||||
if len(cfg.network_structure.dilation) > layer_id:
|
||||
dilation = cfg.network_structure.dilation[layer_id]
|
||||
|
||||
strides: list[int] = [1, 1]
|
||||
if len(cfg.network_structure.strides) > layer_id:
|
||||
if len(cfg.network_structure.strides[layer_id]) == 2:
|
||||
strides = cfg.network_structure.strides[layer_id]
|
||||
|
||||
in_channels: int = -1
|
||||
out_channels: int = -1
|
||||
if len(cfg.network_structure.forward_neuron_numbers) > layer_id:
|
||||
if len(cfg.network_structure.forward_neuron_numbers[layer_id]) == 2:
|
||||
in_channels = cfg.network_structure.forward_neuron_numbers[layer_id][0]
|
||||
out_channels = cfg.network_structure.forward_neuron_numbers[layer_id][1]
|
||||
|
||||
weight_noise_range: list[float] = [1.0, 1.1]
|
||||
if len(cfg.learning_parameters.weight_noise_range) == 2:
|
||||
weight_noise_range = [
|
||||
float(cfg.learning_parameters.weight_noise_range[0]),
|
||||
float(cfg.learning_parameters.weight_noise_range[1]),
|
||||
]
|
||||
|
||||
logging.info(f"Input channels: {in_channels}")
|
||||
logging.info(f"Output channels: {out_channels}")
|
||||
logging.info(f"Kernel size: {kernel_size}")
|
||||
logging.info(f"Stride: {strides}")
|
||||
logging.info(f"Dilation: {dilation}")
|
||||
logging.info(f"Padding: {padding}")
|
||||
|
||||
# Conv2D
|
||||
bias: bool = True
|
||||
|
||||
# Approx settings
|
||||
approximation_enable: bool = False
|
||||
if len(cfg.approximation_setting.approximation_enable) > layer_id:
|
||||
approximation_enable = cfg.approximation_setting.approximation_enable[
|
||||
layer_id
|
||||
]
|
||||
logging.info(f"Approximation Enable: {approximation_enable}")
|
||||
elif len(cfg.approximation_setting.approximation_enable) == 1:
|
||||
approximation_enable = cfg.approximation_setting.approximation_enable[0]
|
||||
logging.info(f"Approximation Enable: {approximation_enable}")
|
||||
|
||||
number_of_trunc_bits: int = -1
|
||||
if len(cfg.approximation_setting.number_of_trunc_bits) > layer_id:
|
||||
number_of_trunc_bits = cfg.approximation_setting.number_of_trunc_bits[
|
||||
layer_id
|
||||
]
|
||||
logging.info(f"Number of trunc bits: {number_of_trunc_bits}")
|
||||
elif len(cfg.approximation_setting.number_of_trunc_bits) == 1:
|
||||
number_of_trunc_bits = cfg.approximation_setting.number_of_trunc_bits[0]
|
||||
logging.info(f"Number of trunc bits: {number_of_trunc_bits}")
|
||||
|
||||
number_of_frac_bits: int = -1
|
||||
if len(cfg.approximation_setting.number_of_frac_bits) > layer_id:
|
||||
number_of_frac_bits = cfg.approximation_setting.number_of_frac_bits[
|
||||
layer_id
|
||||
]
|
||||
logging.info(f"Number of frac bits: {number_of_trunc_bits}")
|
||||
elif len(cfg.approximation_setting.number_of_frac_bits) == 1:
|
||||
number_of_frac_bits = cfg.approximation_setting.number_of_frac_bits[0]
|
||||
logging.info(f"Number of frac bits: {number_of_trunc_bits}")
|
||||
|
||||
# Weights: Trainable?
|
||||
w_trainable: bool = False
|
||||
if len(cfg.learning_parameters.w_trainable) > layer_id:
|
||||
w_trainable = cfg.learning_parameters.w_trainable[layer_id]
|
||||
elif len(cfg.learning_parameters.w_trainable) == 1:
|
||||
w_trainable = cfg.learning_parameters.w_trainable[0]
|
||||
logging.info(f"W trainable?: {w_trainable}")
|
||||
|
||||
# SbS Setting
|
||||
sbs_skip_gradient_calculation: bool = False
|
||||
if len(cfg.learning_parameters.sbs_skip_gradient_calculation) > layer_id:
|
||||
sbs_skip_gradient_calculation = (
|
||||
cfg.learning_parameters.sbs_skip_gradient_calculation[layer_id]
|
||||
)
|
||||
elif len(cfg.learning_parameters.sbs_skip_gradient_calculation) == 1:
|
||||
sbs_skip_gradient_calculation = (
|
||||
cfg.learning_parameters.sbs_skip_gradient_calculation[0]
|
||||
)
|
||||
|
||||
# #############################################################
|
||||
# SbS layer:
|
||||
# #############################################################
|
||||
|
||||
if cfg.network_structure.layer_type[layer_id].upper().startswith("SBS") is True:
|
||||
|
||||
assert in_channels > 0
|
||||
assert out_channels > 0
|
||||
|
||||
number_of_spikes: int = -1
|
||||
if len(cfg.number_of_spikes) > layer_id:
|
||||
number_of_spikes = cfg.number_of_spikes[layer_id]
|
||||
elif len(cfg.number_of_spikes) == 1:
|
||||
number_of_spikes = cfg.number_of_spikes[0]
|
||||
|
||||
assert number_of_spikes > 0
|
||||
|
||||
logging.info(
|
||||
f"Layer: {layer_id} -> SbS Layer with {number_of_spikes} spikes"
|
||||
)
|
||||
is_pooling_layer: bool = False
|
||||
if cfg.network_structure.layer_type[layer_id].upper().find("POOLING") != -1:
|
||||
is_pooling_layer = True
|
||||
|
||||
network.append(
|
||||
SbS(
|
||||
number_of_input_neurons=in_channels,
|
||||
number_of_neurons=out_channels,
|
||||
input_size=input_size[-1],
|
||||
forward_kernel_size=kernel_size,
|
||||
number_of_spikes=number_of_spikes,
|
||||
epsilon_t=cfg.get_epsilon_t(number_of_spikes),
|
||||
epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial,
|
||||
epsilon_0=cfg.epsilon_0,
|
||||
weight_noise_range=weight_noise_range,
|
||||
is_pooling_layer=is_pooling_layer,
|
||||
strides=strides,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
number_of_cpu_processes=cfg.number_of_cpu_processes,
|
||||
w_trainable=w_trainable,
|
||||
# keep_last_grad_scale=cfg.learning_parameters.kepp_last_grad_scale,
|
||||
# disable_scale_grade=cfg.learning_parameters.disable_scale_grade,
|
||||
forgetting_offset=cfg.forgetting_offset,
|
||||
skip_gradient_calculation=sbs_skip_gradient_calculation,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
)
|
||||
)
|
||||
# Adding the x,y output dimensions
|
||||
input_size.append(network[-1]._output_size.tolist())
|
||||
|
||||
network[-1]._output_layer = False
|
||||
if layer_id == len(cfg.network_structure.layer_type) - 1:
|
||||
network[-1]._output_layer = True
|
||||
|
||||
network[-1]._local_learning = False
|
||||
if cfg.network_structure.layer_type[layer_id].upper().find("LOCAL") != -1:
|
||||
network[-1]._local_learning = True
|
||||
|
||||
# #############################################################
|
||||
# Split On Off Layer:
|
||||
# #############################################################
|
||||
elif (
|
||||
cfg.network_structure.layer_type[layer_id].upper().startswith("ONOFF")
|
||||
is True
|
||||
):
|
||||
logging.info(f"Layer: {layer_id} -> Split On Off Layer")
|
||||
network.append(
|
||||
SplitOnOffLayer(
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
)
|
||||
)
|
||||
input_size.append(input_size[-1])
|
||||
|
||||
# #############################################################
|
||||
# PyTorch CONV2D layer:
|
||||
# #############################################################
|
||||
elif (
|
||||
cfg.network_structure.layer_type[layer_id].upper().startswith("CONV2D")
|
||||
is True
|
||||
):
|
||||
assert in_channels > 0
|
||||
assert out_channels > 0
|
||||
|
||||
logging.info(f"Layer: {layer_id} -> CONV2D Layer")
|
||||
network.append(
|
||||
torch.nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
|
||||
stride=(int(strides[0]), int(strides[1])),
|
||||
dilation=(int(dilation[0]), int(dilation[1])),
|
||||
bias=bias,
|
||||
padding=(int(padding[0]), int(padding[1])),
|
||||
device=device,
|
||||
dtype=default_dtype,
|
||||
)
|
||||
)
|
||||
|
||||
# I need this later...
|
||||
network[-1]._w_trainable = w_trainable
|
||||
|
||||
# Calculate the x,y output dimensions
|
||||
input_size_temp = calculate_output_size(
|
||||
value=input_size[-1],
|
||||
kernel_size=kernel_size,
|
||||
stride=strides,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
).tolist()
|
||||
input_size.append(input_size_temp)
|
||||
|
||||
# #############################################################
|
||||
# PyTorch RELU layer:
|
||||
# #############################################################
|
||||
|
||||
elif (
|
||||
cfg.network_structure.layer_type[layer_id].upper().startswith("RELU")
|
||||
is True
|
||||
):
|
||||
logging.info(f"Layer: {layer_id} -> RELU Layer")
|
||||
network.append(torch.nn.ReLU())
|
||||
input_size.append(input_size[-1])
|
||||
|
||||
# #############################################################
|
||||
# PyTorch MAX Pooling layer:
|
||||
# #############################################################
|
||||
|
||||
elif (
|
||||
cfg.network_structure.layer_type[layer_id].upper().startswith("MAX POOLING")
|
||||
is True
|
||||
):
|
||||
logging.info(f"Layer: {layer_id} -> MAX POOLING Layer")
|
||||
network.append(
|
||||
torch.nn.MaxPool2d(
|
||||
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
|
||||
stride=(int(strides[0]), int(strides[1])),
|
||||
padding=(int(padding[0]), int(padding[1])),
|
||||
dilation=(int(dilation[0]), int(dilation[1])),
|
||||
)
|
||||
)
|
||||
|
||||
# Calculate the x,y output dimensions
|
||||
input_size_temp = calculate_output_size(
|
||||
value=input_size[-1],
|
||||
kernel_size=kernel_size,
|
||||
stride=strides,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
).tolist()
|
||||
input_size.append(input_size_temp)
|
||||
|
||||
# #############################################################
|
||||
# PyTorch Average Pooling layer:
|
||||
# #############################################################
|
||||
elif (
|
||||
cfg.network_structure.layer_type[layer_id]
|
||||
.upper()
|
||||
.startswith("AVERAGE POOLING")
|
||||
is True
|
||||
):
|
||||
logging.info(f"Layer: {layer_id} -> AVERAGE POOLING Layer")
|
||||
network.append(
|
||||
torch.nn.AvgPool2d(
|
||||
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
|
||||
stride=(int(strides[0]), int(strides[1])),
|
||||
padding=(int(padding[0]), int(padding[1])),
|
||||
)
|
||||
)
|
||||
# Calculate the x,y output dimensions
|
||||
input_size_temp = calculate_output_size(
|
||||
value=input_size[-1],
|
||||
kernel_size=kernel_size,
|
||||
stride=strides,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
).tolist()
|
||||
input_size.append(input_size_temp)
|
||||
|
||||
# #############################################################
|
||||
# Approx CONV2D layer:
|
||||
# #############################################################
|
||||
elif (
|
||||
cfg.network_structure.layer_type[layer_id]
|
||||
.upper()
|
||||
.startswith("APPROX CONV2D")
|
||||
is True
|
||||
):
|
||||
assert in_channels > 0
|
||||
assert out_channels > 0
|
||||
|
||||
logging.info(f"Layer: {layer_id} -> Approximation CONV2D Layer")
|
||||
network.append(
|
||||
Conv2dApproximation(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
|
||||
stride=(int(strides[0]), int(strides[1])),
|
||||
dilation=(int(dilation[0]), int(dilation[1])),
|
||||
bias=bias,
|
||||
padding=(int(padding[0]), int(padding[1])),
|
||||
device=device,
|
||||
dtype=default_dtype,
|
||||
approximation_enable=approximation_enable,
|
||||
number_of_trunc_bits=number_of_trunc_bits,
|
||||
number_of_frac=number_of_frac_bits,
|
||||
number_of_processes=cfg.number_of_cpu_processes,
|
||||
)
|
||||
)
|
||||
|
||||
# I need this later...
|
||||
network[-1]._w_trainable = w_trainable
|
||||
|
||||
# Calculate the x,y output dimensions
|
||||
input_size_temp = calculate_output_size(
|
||||
value=input_size[-1],
|
||||
kernel_size=kernel_size,
|
||||
stride=strides,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
).tolist()
|
||||
input_size.append(input_size_temp)
|
||||
|
||||
# #############################################################
|
||||
# Failure becaue we didn't found the selection of layer
|
||||
# #############################################################
|
||||
else:
|
||||
raise Exception(
|
||||
f"Unknown layer type: {cfg.network_structure.layer_type[layer_id]}"
|
||||
)
|
||||
|
||||
return network
|
83
network/build_optimizer.py
Normal file
83
network/build_optimizer.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
# %%
|
||||
import torch
|
||||
from network.Parameter import Config
|
||||
from network.SbS import SbS
|
||||
from network.Conv2dApproximation import Conv2dApproximation
|
||||
from network.Adam import Adam
|
||||
|
||||
|
||||
def build_optimizer(
|
||||
network: torch.nn.Sequential, cfg: Config, logging
|
||||
) -> list[torch.optim.Optimizer | None]:
|
||||
|
||||
parameter_list_weights: list = []
|
||||
parameter_list_sbs: list = []
|
||||
|
||||
# ###############################################
|
||||
# Put all parameter that needs to be learned
|
||||
# in a parameter list.
|
||||
# ###############################################
|
||||
|
||||
for id in range(0, len(network)):
|
||||
|
||||
if (isinstance(network[id], SbS) is True) and (
|
||||
network[id]._w_trainable is True
|
||||
):
|
||||
parameter_list_weights.append(network[id]._weights)
|
||||
parameter_list_sbs.append(True)
|
||||
|
||||
if (isinstance(network[id], torch.nn.modules.conv.Conv2d) is True) and (
|
||||
network[id]._w_trainable is True
|
||||
):
|
||||
for id_parameter in network[id].parameters():
|
||||
parameter_list_weights.append(id_parameter)
|
||||
parameter_list_sbs.append(False)
|
||||
|
||||
if (isinstance(network[id], Conv2dApproximation) is True) and (
|
||||
network[id]._w_trainable is True
|
||||
):
|
||||
for id_parameter in network[id].parameters():
|
||||
parameter_list_weights.append(id_parameter)
|
||||
parameter_list_sbs.append(False)
|
||||
|
||||
logging.info(
|
||||
f"Number of parameters found to optimize: {len(parameter_list_weights)}"
|
||||
)
|
||||
|
||||
# ###############################################
|
||||
# Connect the parameters to an optimizer
|
||||
# ###############################################
|
||||
|
||||
if cfg.learning_parameters.optimizer_name == "Adam":
|
||||
logging.info("Using optimizer: Adam")
|
||||
|
||||
if len(parameter_list_weights) == 0:
|
||||
optimizer_wf: torch.optim.Optimizer | None = None
|
||||
elif cfg.learning_parameters.learning_rate_gamma_w > 0:
|
||||
optimizer_wf = Adam(
|
||||
parameter_list_weights,
|
||||
parameter_list_sbs,
|
||||
lr=cfg.learning_parameters.learning_rate_gamma_w,
|
||||
)
|
||||
else:
|
||||
optimizer_wf = Adam(parameter_list_weights, parameter_list_sbs)
|
||||
|
||||
elif cfg.learning_parameters.optimizer_name == "SGD":
|
||||
logging.info("Using optimizer: SGD")
|
||||
|
||||
if len(parameter_list_weights) == 0:
|
||||
optimizer_wf = None
|
||||
elif cfg.learning_parameters.learning_rate_gamma_w > 0:
|
||||
optimizer_wf = torch.optim.SGD(
|
||||
parameter_list_weights,
|
||||
lr=cfg.learning_parameters.learning_rate_gamma_w,
|
||||
)
|
||||
else:
|
||||
assert cfg.learning_parameters.learning_rate_gamma_w > 0
|
||||
|
||||
else:
|
||||
raise Exception("Optimizer not implemented")
|
||||
|
||||
optimizer = []
|
||||
optimizer.append(optimizer_wf)
|
||||
return optimizer
|
95
network/calculate_output_size.py
Normal file
95
network/calculate_output_size.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# %%
|
||||
import torch
|
||||
|
||||
|
||||
def calculate_output_size(
|
||||
value: list[int],
|
||||
kernel_size: list[int],
|
||||
stride: list[int],
|
||||
dilation: list[int],
|
||||
padding: list[int],
|
||||
) -> torch.Tensor:
|
||||
|
||||
assert len(value) == 2
|
||||
assert len(kernel_size) == 2
|
||||
assert len(stride) == 2
|
||||
assert len(dilation) == 2
|
||||
assert len(padding) == 2
|
||||
|
||||
coordinates_0, coordinates_1 = get_coordinates(
|
||||
value=value,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
dilation=dilation,
|
||||
padding=padding,
|
||||
)
|
||||
|
||||
output_size: torch.Tensor = torch.tensor(
|
||||
[
|
||||
coordinates_0.shape[1],
|
||||
coordinates_1.shape[1],
|
||||
],
|
||||
dtype=torch.int64,
|
||||
)
|
||||
return output_size
|
||||
|
||||
|
||||
def get_coordinates(
|
||||
value: list[int],
|
||||
kernel_size: list[int],
|
||||
stride: list[int],
|
||||
dilation: list[int],
|
||||
padding: list[int],
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Function converts parameter in coordinates
|
||||
for the convolution window"""
|
||||
|
||||
unfold_0: torch.nn.Unfold = torch.nn.Unfold(
|
||||
kernel_size=(int(kernel_size[0]), 1),
|
||||
dilation=int(dilation[0]),
|
||||
padding=int(padding[0]),
|
||||
stride=int(stride[0]),
|
||||
)
|
||||
|
||||
unfold_1: torch.nn.Unfold = torch.nn.Unfold(
|
||||
kernel_size=(1, int(kernel_size[1])),
|
||||
dilation=int(dilation[1]),
|
||||
padding=int(padding[1]),
|
||||
stride=int(stride[1]),
|
||||
)
|
||||
|
||||
coordinates_0: torch.Tensor = (
|
||||
unfold_0(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.arange(0, int(value[0]), dtype=torch.float32),
|
||||
1,
|
||||
),
|
||||
0,
|
||||
),
|
||||
0,
|
||||
)
|
||||
)
|
||||
.squeeze(0)
|
||||
.type(torch.int64)
|
||||
)
|
||||
|
||||
coordinates_1: torch.Tensor = (
|
||||
unfold_1(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.unsqueeze(
|
||||
torch.arange(0, int(value[1]), dtype=torch.float32),
|
||||
0,
|
||||
),
|
||||
0,
|
||||
),
|
||||
0,
|
||||
)
|
||||
)
|
||||
.squeeze(0)
|
||||
.type(torch.int64)
|
||||
)
|
||||
|
||||
return coordinates_0, coordinates_1
|
126
network/dataset_collection/DATA_CIFAR10/convert.py
Normal file
126
network/dataset_collection/DATA_CIFAR10/convert.py
Normal file
|
@ -0,0 +1,126 @@
|
|||
# MIT License
|
||||
# Copyright 2022 University of Bremen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#
|
||||
# David Rotermund ( davrot@uni-bremen.de )
|
||||
#
|
||||
#
|
||||
# Release history:
|
||||
# ================
|
||||
# 1.0.0 -- 01.05.2022: first release
|
||||
#
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
import pickle
|
||||
|
||||
|
||||
def give_filenames(id: int) -> tuple[str, str, int]:
|
||||
if id == 0:
|
||||
start_id: int = 0
|
||||
prefix: str = "Test"
|
||||
filename: str = "cifar-10-batches-py/test_batch"
|
||||
if id == 1:
|
||||
start_id = 0
|
||||
prefix = "Train"
|
||||
filename = "cifar-10-batches-py/data_batch_1"
|
||||
if id == 2:
|
||||
start_id = 10000
|
||||
prefix = "Train"
|
||||
filename = "cifar-10-batches-py/data_batch_2"
|
||||
if id == 3:
|
||||
start_id = 20000
|
||||
prefix = "Train"
|
||||
filename = "cifar-10-batches-py/data_batch_3"
|
||||
if id == 4:
|
||||
start_id = 30000
|
||||
prefix = "Train"
|
||||
filename = "cifar-10-batches-py/data_batch_4"
|
||||
if id == 5:
|
||||
start_id = 40000
|
||||
prefix = "Train"
|
||||
filename = "cifar-10-batches-py/data_batch_5"
|
||||
return filename, prefix, start_id
|
||||
|
||||
|
||||
def load_data(filename: str) -> tuple[np.ndarray, np.ndarray]:
|
||||
fo = open(filename, "rb")
|
||||
dict_data = pickle.load(fo, encoding="bytes")
|
||||
_, labels_temp, data_temp, _ = dict_data.items()
|
||||
data: np.ndarray = np.array(data_temp[1])
|
||||
labels: np.ndarray = np.array(labels_temp[1])
|
||||
return data, labels
|
||||
|
||||
|
||||
def split_into_three_color_channels(
|
||||
image: np.ndarray,
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
channel_r = image[0:1024].astype(np.float32)
|
||||
channel_r = channel_r.reshape(32, 32)
|
||||
channel_g = image[1024:2048].astype(np.float32)
|
||||
channel_g = channel_g.reshape(32, 32)
|
||||
channel_b = image[2048:3072].astype(np.float32)
|
||||
channel_b = channel_b.reshape(32, 32)
|
||||
return channel_r, channel_g, channel_b
|
||||
|
||||
|
||||
def process_data_set(test_data_mode: bool) -> None:
|
||||
|
||||
if test_data_mode is True:
|
||||
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||
filename_out_label: str = "TestLabelStorage.npy"
|
||||
number_of_pictures: int = 10000
|
||||
start_id: int = 0
|
||||
end_id: int = 0
|
||||
else:
|
||||
filename_out_pattern = "TrainPatternStorage.npy"
|
||||
filename_out_label = "TrainLabelStorage.npy"
|
||||
number_of_pictures = 50000
|
||||
start_id = 1
|
||||
end_id = 5
|
||||
|
||||
np_data: np.ndarray = np.zeros((number_of_pictures, 32, 32, 3), dtype=np.float32)
|
||||
np_label: np.ndarray = np.zeros((number_of_pictures), dtype=np.uint64)
|
||||
|
||||
for id in range(start_id, end_id + 1):
|
||||
filename, _, start_id_pattern = give_filenames(id)
|
||||
pictures, labels = load_data(filename)
|
||||
|
||||
for i in range(0, pictures.shape[0]):
|
||||
channel_r, channel_g, channel_b = split_into_three_color_channels(
|
||||
pictures[i, :]
|
||||
)
|
||||
np_data[i + start_id_pattern, :, :, 0] = channel_r
|
||||
np_data[i + start_id_pattern, :, :, 1] = channel_g
|
||||
np_data[i + start_id_pattern, :, :, 2] = channel_b
|
||||
np_label[i + start_id_pattern] = labels[i]
|
||||
|
||||
np_data /= np.max(np_data)
|
||||
|
||||
label_storage: np.ndarray = np_label.astype(dtype=np.uint64)
|
||||
pattern_storage: np.ndarray = np_data.astype(dtype=np.float32)
|
||||
|
||||
np.save(filename_out_pattern, pattern_storage)
|
||||
np.save(filename_out_label, label_storage)
|
||||
|
||||
|
||||
process_data_set(True)
|
||||
process_data_set(False)
|
8
network/dataset_collection/DATA_CIFAR10/data_url.txt
Normal file
8
network/dataset_collection/DATA_CIFAR10/data_url.txt
Normal file
|
@ -0,0 +1,8 @@
|
|||
https://www.cs.toronto.edu/~kriz/cifar.html
|
||||
|
||||
Download the CIFAR-10 python version
|
||||
https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
|
||||
|
||||
Then
|
||||
tar -xvzf cifar-10-python.tar.gz
|
||||
python convert.py
|
4
network/dataset_collection/DATA_CIFAR10/dataset.json
Normal file
4
network/dataset_collection/DATA_CIFAR10/dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"data_path": "./DATA_CIFAR10/",
|
||||
"data_mode": "CIFAR10"
|
||||
}
|
161
network/dataset_collection/DATA_FASHION_MNIST/convert.py
Normal file
161
network/dataset_collection/DATA_FASHION_MNIST/convert.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
# MIT License
|
||||
# Copyright 2022 University of Bremen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#
|
||||
# David Rotermund ( davrot@uni-bremen.de )
|
||||
#
|
||||
#
|
||||
# Release history:
|
||||
# ================
|
||||
# 1.0.0 -- 01.05.2022: first release
|
||||
#
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
|
||||
# [offset] [type] [value] [description]
|
||||
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
|
||||
# 0004 32 bit integer 60000 number of items
|
||||
# 0008 unsigned byte ?? label
|
||||
# 0009 unsigned byte ?? label
|
||||
# ........
|
||||
# xxxx unsigned byte ?? label
|
||||
# The labels values are 0 to 9.
|
||||
|
||||
|
||||
class ReadLabel:
|
||||
"""Class for reading the labels from an MNIST label file"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename: str = filename
|
||||
self.data = self.read_from_file(filename)
|
||||
|
||||
def read_from_file(self, filename):
|
||||
int32_data = np.dtype(np.uint32)
|
||||
int32_data = int32_data.newbyteorder(">")
|
||||
file = open(filename, "rb")
|
||||
|
||||
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if magic_flag != 2049:
|
||||
data = np.zeros(0)
|
||||
number_of_elements = 0
|
||||
else:
|
||||
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_elements < 1:
|
||||
data = np.zeros(0)
|
||||
else:
|
||||
data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8)
|
||||
|
||||
file.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# [offset] [type] [value] [description]
|
||||
# 0000 32 bit integer 0x00000803(2051) magic number
|
||||
# 0004 32 bit integer 60000 number of images
|
||||
# 0008 32 bit integer 28 number of rows
|
||||
# 0012 32 bit integer 28 number of columns
|
||||
# 0016 unsigned byte ?? pixel
|
||||
# 0017 unsigned byte ?? pixel
|
||||
# ........
|
||||
# xxxx unsigned byte ?? pixel
|
||||
# Pixels are organized row-wise.
|
||||
# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
|
||||
|
||||
|
||||
class ReadPicture:
|
||||
"""Class for reading the images from an MNIST image file"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename: str = filename
|
||||
self.data = self.read_from_file(filename)
|
||||
|
||||
def read_from_file(self, filename):
|
||||
int32_data = np.dtype(np.uint32)
|
||||
int32_data = int32_data.newbyteorder(">")
|
||||
file = open(filename, "rb")
|
||||
|
||||
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if magic_flag != 2051:
|
||||
data = np.zeros(0)
|
||||
number_of_elements = 0
|
||||
else:
|
||||
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_elements < 1:
|
||||
data = np.zeros(0)
|
||||
number_of_rows = 0
|
||||
else:
|
||||
number_of_rows = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_rows != 28:
|
||||
data = np.zeros(0)
|
||||
number_of_columns = 0
|
||||
else:
|
||||
number_of_columns = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_columns != 28:
|
||||
data = np.zeros(0)
|
||||
else:
|
||||
data = np.frombuffer(
|
||||
file.read(number_of_elements * number_of_rows * number_of_columns),
|
||||
dtype=np.uint8,
|
||||
)
|
||||
data = data.reshape(number_of_elements, number_of_columns, number_of_rows)
|
||||
|
||||
file.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def proprocess_data_set(test_mode):
|
||||
|
||||
if test_mode is True:
|
||||
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||
filename_out_label: str = "TestLabelStorage.npy"
|
||||
filename_in_image: str = "t10k-images-idx3-ubyte"
|
||||
filename_in_label = "t10k-labels-idx1-ubyte"
|
||||
else:
|
||||
filename_out_pattern = "TrainPatternStorage.npy"
|
||||
filename_out_label = "TrainLabelStorage.npy"
|
||||
filename_in_image = "train-images-idx3-ubyte"
|
||||
filename_in_label = "train-labels-idx1-ubyte"
|
||||
|
||||
pictures = ReadPicture(filename_in_image)
|
||||
labels = ReadLabel(filename_in_label)
|
||||
|
||||
# Down to 0 ... 1.0
|
||||
max_value = np.max(pictures.data.astype(np.float32))
|
||||
d = np.float32(pictures.data.astype(np.float32) / max_value)
|
||||
|
||||
label_storage = np.uint64(labels.data)
|
||||
pattern_storage = d.astype(np.float32)
|
||||
|
||||
np.save(filename_out_pattern, pattern_storage)
|
||||
np.save(filename_out_label, label_storage)
|
||||
|
||||
|
||||
proprocess_data_set(True)
|
||||
proprocess_data_set(False)
|
|
@ -0,0 +1,8 @@
|
|||
https://github.com/zalandoresearch/fashion-mnist
|
||||
|
||||
We need:
|
||||
t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
|
||||
|
||||
Then
|
||||
gzip -d *.gz
|
||||
python convert.py
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"data_path": "./DATA_FASHION_MNIST/",
|
||||
"data_mode": "MNIST_FASHION"
|
||||
}
|
161
network/dataset_collection/DATA_MNIST/convert.py
Normal file
161
network/dataset_collection/DATA_MNIST/convert.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
# MIT License
|
||||
# Copyright 2022 University of Bremen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#
|
||||
# David Rotermund ( davrot@uni-bremen.de )
|
||||
#
|
||||
#
|
||||
# Release history:
|
||||
# ================
|
||||
# 1.0.0 -- 01.05.2022: first release
|
||||
#
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
|
||||
# [offset] [type] [value] [description]
|
||||
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
|
||||
# 0004 32 bit integer 60000 number of items
|
||||
# 0008 unsigned byte ?? label
|
||||
# 0009 unsigned byte ?? label
|
||||
# ........
|
||||
# xxxx unsigned byte ?? label
|
||||
# The labels values are 0 to 9.
|
||||
|
||||
|
||||
class ReadLabel:
|
||||
"""Class for reading the labels from an MNIST label file"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename: str = filename
|
||||
self.data = self.read_from_file(filename)
|
||||
|
||||
def read_from_file(self, filename):
|
||||
int32_data = np.dtype(np.uint32)
|
||||
int32_data = int32_data.newbyteorder(">")
|
||||
file = open(filename, "rb")
|
||||
|
||||
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if magic_flag != 2049:
|
||||
data = np.zeros(0)
|
||||
number_of_elements = 0
|
||||
else:
|
||||
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_elements < 1:
|
||||
data = np.zeros(0)
|
||||
else:
|
||||
data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8)
|
||||
|
||||
file.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# [offset] [type] [value] [description]
|
||||
# 0000 32 bit integer 0x00000803(2051) magic number
|
||||
# 0004 32 bit integer 60000 number of images
|
||||
# 0008 32 bit integer 28 number of rows
|
||||
# 0012 32 bit integer 28 number of columns
|
||||
# 0016 unsigned byte ?? pixel
|
||||
# 0017 unsigned byte ?? pixel
|
||||
# ........
|
||||
# xxxx unsigned byte ?? pixel
|
||||
# Pixels are organized row-wise.
|
||||
# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
|
||||
|
||||
|
||||
class ReadPicture:
|
||||
"""Class for reading the images from an MNIST image file"""
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename: str = filename
|
||||
self.data = self.read_from_file(filename)
|
||||
|
||||
def read_from_file(self, filename):
|
||||
int32_data = np.dtype(np.uint32)
|
||||
int32_data = int32_data.newbyteorder(">")
|
||||
file = open(filename, "rb")
|
||||
|
||||
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if magic_flag != 2051:
|
||||
data = np.zeros(0)
|
||||
number_of_elements = 0
|
||||
else:
|
||||
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_elements < 1:
|
||||
data = np.zeros(0)
|
||||
number_of_rows = 0
|
||||
else:
|
||||
number_of_rows = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_rows != 28:
|
||||
data = np.zeros(0)
|
||||
number_of_columns = 0
|
||||
else:
|
||||
number_of_columns = np.frombuffer(file.read(4), int32_data)[0]
|
||||
|
||||
if number_of_columns != 28:
|
||||
data = np.zeros(0)
|
||||
else:
|
||||
data = np.frombuffer(
|
||||
file.read(number_of_elements * number_of_rows * number_of_columns),
|
||||
dtype=np.uint8,
|
||||
)
|
||||
data = data.reshape(number_of_elements, number_of_columns, number_of_rows)
|
||||
|
||||
file.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def proprocess_data_set(test_mode):
|
||||
|
||||
if test_mode is True:
|
||||
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||
filename_out_label: str = "TestLabelStorage.npy"
|
||||
filename_in_image: str = "t10k-images-idx3-ubyte"
|
||||
filename_in_label = "t10k-labels-idx1-ubyte"
|
||||
else:
|
||||
filename_out_pattern = "TrainPatternStorage.npy"
|
||||
filename_out_label = "TrainLabelStorage.npy"
|
||||
filename_in_image = "train-images-idx3-ubyte"
|
||||
filename_in_label = "train-labels-idx1-ubyte"
|
||||
|
||||
pictures = ReadPicture(filename_in_image)
|
||||
labels = ReadLabel(filename_in_label)
|
||||
|
||||
# Down to 0 ... 1.0
|
||||
max_value = np.max(pictures.data.astype(np.float32))
|
||||
d = np.float32(pictures.data.astype(np.float32) / max_value)
|
||||
|
||||
label_storage = np.uint64(labels.data)
|
||||
pattern_storage = d.astype(np.float32)
|
||||
|
||||
np.save(filename_out_pattern, pattern_storage)
|
||||
np.save(filename_out_label, label_storage)
|
||||
|
||||
|
||||
proprocess_data_set(True)
|
||||
proprocess_data_set(False)
|
8
network/dataset_collection/DATA_MNIST/data_url.txt
Normal file
8
network/dataset_collection/DATA_MNIST/data_url.txt
Normal file
|
@ -0,0 +1,8 @@
|
|||
http://yann.lecun.com/exdb/mnist/
|
||||
|
||||
We need:
|
||||
t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
|
||||
|
||||
Then
|
||||
gzip -d *.gz
|
||||
python convert.py
|
4
network/dataset_collection/DATA_MNIST/dataset.json
Normal file
4
network/dataset_collection/DATA_MNIST/dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"data_path": "./DATA_MNIST/",
|
||||
"data_mode": "MNIST"
|
||||
}
|
144
network/load_previous_weights.py
Normal file
144
network/load_previous_weights.py
Normal file
|
@ -0,0 +1,144 @@
|
|||
# %%
|
||||
import torch
|
||||
import glob
|
||||
import numpy as np
|
||||
|
||||
from network.SbS import SbS
|
||||
from network.SplitOnOffLayer import SplitOnOffLayer
|
||||
from network.Conv2dApproximation import Conv2dApproximation
|
||||
|
||||
|
||||
def load_previous_weights(
|
||||
network: torch.nn.Sequential,
|
||||
overload_path: str,
|
||||
logging,
|
||||
device: torch.device,
|
||||
default_dtype: torch.dtype,
|
||||
) -> None:
|
||||
|
||||
for id in range(0, len(network)):
|
||||
|
||||
# #################################################
|
||||
# SbS
|
||||
# #################################################
|
||||
|
||||
if isinstance(network[id], SbS) is True:
|
||||
# Are there weights that overwrite the initial weights?
|
||||
file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy")
|
||||
|
||||
if len(file_to_load) > 1:
|
||||
raise Exception(
|
||||
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
|
||||
)
|
||||
|
||||
if len(file_to_load) == 1:
|
||||
network[id].weights = torch.tensor(
|
||||
np.load(file_to_load[0]),
|
||||
dtype=default_dtype,
|
||||
device=device,
|
||||
)
|
||||
logging.info(f"Weights file used for layer {id} : {file_to_load[0]}")
|
||||
|
||||
if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True:
|
||||
|
||||
# #################################################
|
||||
# Conv2d weights
|
||||
# #################################################
|
||||
|
||||
# Are there weights that overwrite the initial weights?
|
||||
file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy")
|
||||
|
||||
if len(file_to_load) > 1:
|
||||
raise Exception(
|
||||
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
|
||||
)
|
||||
|
||||
if len(file_to_load) == 1:
|
||||
network[id]._parameters["weight"].data = torch.tensor(
|
||||
np.load(file_to_load[0]),
|
||||
dtype=default_dtype,
|
||||
device=device,
|
||||
)
|
||||
logging.info(f"Weights file used for layer {id} : {file_to_load[0]}")
|
||||
|
||||
# #################################################
|
||||
# Conv2d bias
|
||||
# #################################################
|
||||
|
||||
# Are there biases that overwrite the initial weights?
|
||||
file_to_load = glob.glob(overload_path + "/Bias_L" + str(id) + "_*.npy")
|
||||
|
||||
if len(file_to_load) > 1:
|
||||
raise Exception(
|
||||
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
|
||||
)
|
||||
|
||||
if len(file_to_load) == 1:
|
||||
network[id]._parameters["bias"].data = torch.tensor(
|
||||
np.load(file_to_load[0]),
|
||||
dtype=default_dtype,
|
||||
device=device,
|
||||
)
|
||||
logging.info(f"Bias file used for layer {id} : {file_to_load[0]}")
|
||||
|
||||
if isinstance(network[id], Conv2dApproximation) is True:
|
||||
|
||||
# #################################################
|
||||
# Approximate Conv2d weights
|
||||
# #################################################
|
||||
|
||||
# Are there weights that overwrite the initial weights?
|
||||
file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy")
|
||||
|
||||
if len(file_to_load) > 1:
|
||||
raise Exception(
|
||||
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
|
||||
)
|
||||
|
||||
if len(file_to_load) == 1:
|
||||
network[id].weights.data = torch.tensor(
|
||||
np.load(file_to_load[0]),
|
||||
dtype=default_dtype,
|
||||
device=device,
|
||||
)
|
||||
logging.info(f"Weights file used for layer {id} : {file_to_load[0]}")
|
||||
|
||||
# #################################################
|
||||
# Approximate Conv2d bias
|
||||
# #################################################
|
||||
|
||||
# Are there biases that overwrite the initial weights?
|
||||
file_to_load = glob.glob(overload_path + "/Bias_L" + str(id) + "_*.npy")
|
||||
|
||||
if len(file_to_load) > 1:
|
||||
raise Exception(
|
||||
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
|
||||
)
|
||||
|
||||
if len(file_to_load) == 1:
|
||||
network[id].bias.data = torch.tensor(
|
||||
np.load(file_to_load[0]),
|
||||
dtype=default_dtype,
|
||||
device=device,
|
||||
)
|
||||
logging.info(f"Bias file used for layer {id} : {file_to_load[0]}")
|
||||
|
||||
# #################################################
|
||||
# SplitOnOffLayer
|
||||
# #################################################
|
||||
if isinstance(network[id], SplitOnOffLayer) is True:
|
||||
# Are there weights that overwrite the initial weights?
|
||||
file_to_load = glob.glob(overload_path + "/Mean_L" + str(id) + "_*.npy")
|
||||
|
||||
if len(file_to_load) > 1:
|
||||
raise Exception(
|
||||
f"Too many previous mean files {overload_path}/Mean_L{id}*.npy"
|
||||
)
|
||||
|
||||
if len(file_to_load) == 1:
|
||||
network[id].mean = torch.tensor(
|
||||
np.load(file_to_load[0]),
|
||||
dtype=default_dtype,
|
||||
device=device,
|
||||
)
|
||||
logging.info(f"Meanfile used for layer {id} : {file_to_load[0]}")
|
512
network/loop_train_test.py
Normal file
512
network/loop_train_test.py
Normal file
|
@ -0,0 +1,512 @@
|
|||
import torch
|
||||
import time
|
||||
from network.Parameter import Config
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from network.SbS import SbS
|
||||
from network.save_weight_and_bias import save_weight_and_bias
|
||||
|
||||
|
||||
def add_weight_and_bias_to_histogram(
|
||||
network: torch.nn.modules.container.Sequential,
|
||||
tb: SummaryWriter,
|
||||
iteration_number: int,
|
||||
) -> None:
|
||||
|
||||
for id in range(0, len(network)):
|
||||
|
||||
# ################################################
|
||||
# Log the SbS Weights
|
||||
# ################################################
|
||||
if isinstance(network[id], SbS) is True:
|
||||
if network[id]._w_trainable is True:
|
||||
|
||||
try:
|
||||
tb.add_histogram(
|
||||
f"Weights Layer {id}",
|
||||
network[id].weights,
|
||||
iteration_number,
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# ################################################
|
||||
# Log the Conv2 Weights and Biases
|
||||
# ################################################
|
||||
if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True:
|
||||
if network[id]._w_trainable is True:
|
||||
|
||||
try:
|
||||
tb.add_histogram(
|
||||
f"Weights Layer {id}",
|
||||
network[id]._parameters["weight"].data,
|
||||
iteration_number,
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
tb.add_histogram(
|
||||
f"Bias Layer {id}",
|
||||
network[id]._parameters["bias"].data,
|
||||
iteration_number,
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
tb.flush()
|
||||
|
||||
|
||||
# loss_mode == 0: "normal" SbS loss function mixture
|
||||
# loss_mode == 1: cross_entropy
|
||||
def loss_function(
|
||||
h: torch.Tensor,
|
||||
labels: torch.Tensor,
|
||||
device: torch.device,
|
||||
default_dtype: torch.dtype,
|
||||
loss_mode: int = 0,
|
||||
number_of_output_neurons: int = 10,
|
||||
loss_coeffs_mse: float = 0.0,
|
||||
loss_coeffs_kldiv: float = 0.0,
|
||||
) -> torch.Tensor | None:
|
||||
|
||||
assert loss_mode >= 0
|
||||
assert loss_mode <= 1
|
||||
|
||||
h = h.squeeze(-1).squeeze(-1)
|
||||
assert h.ndim == 2
|
||||
|
||||
if loss_mode == 0:
|
||||
|
||||
# Convert label into one hot
|
||||
target_one_hot: torch.Tensor = torch.zeros(
|
||||
(
|
||||
labels.shape[0],
|
||||
number_of_output_neurons,
|
||||
),
|
||||
device=device,
|
||||
dtype=default_dtype,
|
||||
)
|
||||
|
||||
target_one_hot.scatter_(
|
||||
1,
|
||||
labels.to(device).unsqueeze(1),
|
||||
torch.ones(
|
||||
(labels.shape[0], 1),
|
||||
device=device,
|
||||
dtype=default_dtype,
|
||||
),
|
||||
).unsqueeze(-1).unsqueeze(-1)
|
||||
|
||||
h_y1 = torch.log(h + 1e-20)
|
||||
|
||||
my_loss: torch.Tensor = (
|
||||
torch.nn.functional.mse_loss(
|
||||
h,
|
||||
target_one_hot,
|
||||
reduction="sum",
|
||||
)
|
||||
* loss_coeffs_mse
|
||||
+ torch.nn.functional.kl_div(h_y1, target_one_hot + 1e-20, reduction="sum")
|
||||
* loss_coeffs_kldiv
|
||||
) / (loss_coeffs_kldiv + loss_coeffs_mse)
|
||||
|
||||
return my_loss
|
||||
elif loss_mode == 1:
|
||||
my_loss = torch.nn.functional.cross_entropy(
|
||||
h.squeeze(-1).squeeze(-1), labels.to(device)
|
||||
)
|
||||
return my_loss
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def forward_pass_train(
|
||||
input: torch.Tensor,
|
||||
labels: torch.Tensor,
|
||||
the_dataset_train,
|
||||
cfg: Config,
|
||||
network: torch.nn.modules.container.Sequential,
|
||||
device: torch.device,
|
||||
default_dtype: torch.dtype,
|
||||
) -> list[torch.Tensor]:
|
||||
|
||||
h_collection = []
|
||||
h_collection.append(
|
||||
the_dataset_train.pattern_filter_train(input, cfg)
|
||||
.type(dtype=default_dtype)
|
||||
.to(device=device)
|
||||
)
|
||||
for id in range(0, len(network)):
|
||||
if isinstance(network[id], SbS) is True:
|
||||
h_collection.append(network[id](h_collection[-1], labels))
|
||||
else:
|
||||
h_collection.append(network[id](h_collection[-1]))
|
||||
|
||||
return h_collection
|
||||
|
||||
|
||||
def forward_pass_test(
|
||||
input: torch.Tensor,
|
||||
the_dataset_test,
|
||||
cfg: Config,
|
||||
network: torch.nn.modules.container.Sequential,
|
||||
device: torch.device,
|
||||
default_dtype: torch.dtype,
|
||||
) -> list[torch.Tensor]:
|
||||
|
||||
h_collection = []
|
||||
h_collection.append(
|
||||
the_dataset_test.pattern_filter_test(input, cfg)
|
||||
.type(dtype=default_dtype)
|
||||
.to(device=device)
|
||||
)
|
||||
for id in range(0, len(network)):
|
||||
h_collection.append(network[id](h_collection[-1]))
|
||||
|
||||
return h_collection
|
||||
|
||||
|
||||
def run_optimizer(
|
||||
network: torch.nn.modules.container.Sequential,
|
||||
optimizer: list,
|
||||
cfg: Config,
|
||||
) -> None:
|
||||
for id in range(0, len(network)):
|
||||
if isinstance(network[id], SbS) is True:
|
||||
network[id].update_pre_care()
|
||||
|
||||
for optimizer_item in optimizer:
|
||||
if optimizer_item is not None:
|
||||
optimizer_item.step()
|
||||
|
||||
for id in range(0, len(network)):
|
||||
if isinstance(network[id], SbS) is True:
|
||||
network[id].update_after_care(
|
||||
cfg.learning_parameters.learning_rate_threshold_w
|
||||
/ float(
|
||||
network[id]._number_of_input_neurons
|
||||
# * network[id]._kernel_size[0]
|
||||
# * network[id]._kernel_size[1]
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ####################################
|
||||
# Update the learning rate
|
||||
# ####################################
|
||||
def run_lr_scheduler(
|
||||
cfg: Config,
|
||||
lr_scheduler,
|
||||
optimizer,
|
||||
performance_for_batch: float,
|
||||
my_loss_for_batch: float,
|
||||
tb,
|
||||
logging,
|
||||
) -> None:
|
||||
# Inter-epoch learning rate adaptation
|
||||
for lr_scheduler_item in lr_scheduler:
|
||||
if (
|
||||
(lr_scheduler_item is not None)
|
||||
and (performance_for_batch >= 0.0)
|
||||
and (my_loss_for_batch >= 0.0)
|
||||
):
|
||||
if cfg.learning_parameters.lr_scheduler_use_performance is True:
|
||||
lr_scheduler_item.step(100.0 - performance_for_batch)
|
||||
else:
|
||||
lr_scheduler_item.step(my_loss_for_batch)
|
||||
|
||||
tb.add_scalar(
|
||||
"Train Error",
|
||||
100.0 - performance_for_batch,
|
||||
cfg.epoch_id,
|
||||
)
|
||||
tb.add_scalar("Train Loss", my_loss_for_batch, cfg.epoch_id)
|
||||
tb.add_scalar(
|
||||
"Learning Rate Scale WF",
|
||||
optimizer[0].param_groups[-1]["lr"],
|
||||
cfg.epoch_id,
|
||||
)
|
||||
tb.flush()
|
||||
|
||||
|
||||
# def deal_with_gradient_scale(epoch_id: int, mini_batch_number: int, network):
|
||||
# if (epoch_id == 0) and (mini_batch_number == 0):
|
||||
# for id in range(0, len(network)):
|
||||
# if isinstance(network[id], SbS) is True:
|
||||
# network[id].after_batch(True)
|
||||
# else:
|
||||
# for id in range(0, len(network)):
|
||||
# if isinstance(network[id], SbS) is True:
|
||||
# network[id].after_batch()
|
||||
|
||||
|
||||
def loop_train(
|
||||
cfg: Config,
|
||||
network: torch.nn.modules.container.Sequential,
|
||||
my_loader_train: torch.utils.data.dataloader.DataLoader,
|
||||
the_dataset_train,
|
||||
optimizer: list,
|
||||
device: torch.device,
|
||||
default_dtype: torch.dtype,
|
||||
logging,
|
||||
adapt_learning_rate: bool,
|
||||
tb: SummaryWriter,
|
||||
lr_scheduler,
|
||||
last_test_performance: float,
|
||||
) -> tuple[float, float, float, float]:
|
||||
|
||||
correct_in_minibatch: int = 0
|
||||
loss_in_minibatch: float = 0.0
|
||||
number_of_pattern_in_minibatch: int = 0
|
||||
|
||||
mini_batch_number: int = -1
|
||||
|
||||
full_loss: float = 0.0
|
||||
full_correct: float = 0.0
|
||||
full_count: float = 0.0
|
||||
|
||||
epoch_id: int = cfg.epoch_id
|
||||
|
||||
my_loss_for_batch: float = -1.0
|
||||
performance_for_batch: float = -1.0
|
||||
|
||||
time_forward: float = 0.0
|
||||
time_backward: float = 0.0
|
||||
|
||||
with torch.enable_grad():
|
||||
|
||||
for h_x, h_x_labels in my_loader_train:
|
||||
|
||||
time_mini_batch_start: float = time.perf_counter()
|
||||
|
||||
# ############################################################
|
||||
# Reset the gradient after an update (or the first loop pass)
|
||||
# ############################################################
|
||||
if number_of_pattern_in_minibatch == 0:
|
||||
|
||||
# Reset the gradient of the torch optimizers
|
||||
for optimizer_item in optimizer:
|
||||
if optimizer_item is not None:
|
||||
optimizer_item.zero_grad()
|
||||
|
||||
loss_in_minibatch = 0.0
|
||||
mini_batch_number += 1
|
||||
correct_in_minibatch = 0
|
||||
time_forward = 0.0
|
||||
time_backward = 0.0
|
||||
|
||||
# ####################################
|
||||
# Update the learning rate
|
||||
# ####################################
|
||||
if adapt_learning_rate is True:
|
||||
run_lr_scheduler(
|
||||
cfg=cfg,
|
||||
lr_scheduler=lr_scheduler,
|
||||
optimizer=optimizer,
|
||||
performance_for_batch=performance_for_batch,
|
||||
my_loss_for_batch=my_loss_for_batch,
|
||||
tb=tb,
|
||||
logging=logging,
|
||||
)
|
||||
|
||||
logging.info(
|
||||
(
|
||||
f"\t\t\tLearning rate: "
|
||||
f"weights:{optimizer[0].param_groups[-1]['lr']:^15.3e} "
|
||||
)
|
||||
)
|
||||
|
||||
if last_test_performance < 0:
|
||||
logging.info("")
|
||||
else:
|
||||
logging.info(
|
||||
(
|
||||
f"\t\t\tLast test performance: "
|
||||
f"{last_test_performance/100.0:^6.2%}"
|
||||
)
|
||||
)
|
||||
logging.info("----------------")
|
||||
|
||||
number_of_pattern_in_minibatch += h_x_labels.shape[0]
|
||||
full_count += h_x_labels.shape[0]
|
||||
|
||||
# #####################################################
|
||||
# The network does the forward pass (training)
|
||||
# #####################################################
|
||||
h_collection = forward_pass_train(
|
||||
input=h_x,
|
||||
labels=h_x_labels,
|
||||
the_dataset_train=the_dataset_train,
|
||||
cfg=cfg,
|
||||
network=network,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
)
|
||||
|
||||
# #####################################################
|
||||
# Calculate the loss function
|
||||
# #####################################################
|
||||
my_loss: torch.Tensor | None = loss_function(
|
||||
h=h_collection[-1],
|
||||
labels=h_x_labels,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
loss_mode=cfg.learning_parameters.loss_mode,
|
||||
number_of_output_neurons=int(
|
||||
cfg.network_structure.number_of_output_neurons
|
||||
),
|
||||
loss_coeffs_mse=float(cfg.learning_parameters.loss_coeffs_mse),
|
||||
loss_coeffs_kldiv=float(cfg.learning_parameters.loss_coeffs_kldiv),
|
||||
)
|
||||
assert my_loss is not None
|
||||
|
||||
time_after_forward_and_loss: float = time.perf_counter()
|
||||
|
||||
# #####################################################
|
||||
# Backward pass
|
||||
# #####################################################
|
||||
my_loss.backward()
|
||||
loss_in_minibatch += my_loss.item()
|
||||
full_loss += my_loss.item()
|
||||
|
||||
time_after_backward: float = time.perf_counter()
|
||||
|
||||
# #####################################################
|
||||
# Performance measures
|
||||
# #####################################################
|
||||
|
||||
correct_in_minibatch += (
|
||||
(h_collection[-1].argmax(dim=1).squeeze().cpu() == h_x_labels)
|
||||
.sum()
|
||||
.item()
|
||||
)
|
||||
full_correct += (
|
||||
(h_collection[-1].argmax(dim=1).squeeze().cpu() == h_x_labels)
|
||||
.sum()
|
||||
.item()
|
||||
)
|
||||
|
||||
# We measure the scale of the propagated error
|
||||
# during the first minibatch
|
||||
# then we remember this size and scale
|
||||
# the future error with it
|
||||
# Kind of deals with the vanishing /
|
||||
# exploding gradients
|
||||
# deal_with_gradient_scale(
|
||||
# epoch_id=epoch_id,
|
||||
# mini_batch_number=mini_batch_number,
|
||||
# network=network,
|
||||
# )
|
||||
|
||||
# Measure the time for one mini-batch
|
||||
time_forward += time_after_forward_and_loss - time_mini_batch_start
|
||||
time_backward += time_after_backward - time_after_forward_and_loss
|
||||
|
||||
if number_of_pattern_in_minibatch >= cfg.get_update_after_x_pattern():
|
||||
|
||||
logging.info(
|
||||
(
|
||||
f"{epoch_id:^6}=>{mini_batch_number:^6} "
|
||||
f"\t\tTraining {number_of_pattern_in_minibatch^6} pattern "
|
||||
f"with {correct_in_minibatch/number_of_pattern_in_minibatch:^6.2%} "
|
||||
f"\tForward time: \t{time_forward:^6.2f}sec"
|
||||
)
|
||||
)
|
||||
|
||||
logging.info(
|
||||
(
|
||||
f"\t\t\tLoss: {loss_in_minibatch/number_of_pattern_in_minibatch:^15.3e} "
|
||||
f"\t\t\tBackward time: \t{time_backward:^6.2f}sec "
|
||||
)
|
||||
)
|
||||
|
||||
my_loss_for_batch = loss_in_minibatch / number_of_pattern_in_minibatch
|
||||
|
||||
performance_for_batch = (
|
||||
100.0 * correct_in_minibatch / number_of_pattern_in_minibatch
|
||||
)
|
||||
|
||||
# ################################################
|
||||
# Update the weights and biases
|
||||
# ################################################
|
||||
run_optimizer(network=network, optimizer=optimizer, cfg=cfg)
|
||||
|
||||
# ################################################
|
||||
# Save the Weights and Biases
|
||||
# ################################################
|
||||
save_weight_and_bias(
|
||||
cfg=cfg, network=network, iteration_number=epoch_id
|
||||
)
|
||||
|
||||
# ################################################
|
||||
# Log the Weights and Biases
|
||||
# ################################################
|
||||
add_weight_and_bias_to_histogram(
|
||||
network=network,
|
||||
tb=tb,
|
||||
iteration_number=epoch_id,
|
||||
)
|
||||
|
||||
# ################################################
|
||||
# Mark mini batch as done
|
||||
# ################################################
|
||||
number_of_pattern_in_minibatch = 0
|
||||
|
||||
return (
|
||||
my_loss_for_batch,
|
||||
performance_for_batch,
|
||||
(full_loss / full_count),
|
||||
(100.0 * full_correct / full_count),
|
||||
)
|
||||
|
||||
|
||||
def loop_test(
|
||||
epoch_id: int,
|
||||
cfg: Config,
|
||||
network: torch.nn.modules.container.Sequential,
|
||||
my_loader_test: torch.utils.data.dataloader.DataLoader,
|
||||
the_dataset_test,
|
||||
device: torch.device,
|
||||
default_dtype: torch.dtype,
|
||||
logging,
|
||||
tb: SummaryWriter,
|
||||
) -> float:
|
||||
|
||||
test_correct = 0
|
||||
test_count = 0
|
||||
test_complete: int = the_dataset_test.__len__()
|
||||
|
||||
logging.info("")
|
||||
logging.info("Testing:")
|
||||
|
||||
for h_x, h_x_labels in my_loader_test:
|
||||
time_0 = time.perf_counter()
|
||||
|
||||
h_collection = forward_pass_test(
|
||||
input=h_x,
|
||||
the_dataset_test=the_dataset_test,
|
||||
cfg=cfg,
|
||||
network=network,
|
||||
device=device,
|
||||
default_dtype=default_dtype,
|
||||
)
|
||||
h_h: torch.Tensor = h_collection[-1].detach().clone().cpu()
|
||||
|
||||
test_correct += (h_h.argmax(dim=1).squeeze() == h_x_labels).sum().numpy()
|
||||
test_count += h_h.shape[0]
|
||||
performance = 100.0 * test_correct / test_count
|
||||
time_1 = time.perf_counter()
|
||||
time_measure_a = time_1 - time_0
|
||||
|
||||
logging.info(
|
||||
(
|
||||
f"\t\t{test_count} of {test_complete}"
|
||||
f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec"
|
||||
)
|
||||
)
|
||||
|
||||
logging.info("")
|
||||
|
||||
tb.add_scalar("Test Error", 100.0 - performance, epoch_id)
|
||||
tb.flush()
|
||||
|
||||
return performance
|
71
network/save_weight_and_bias.py
Normal file
71
network/save_weight_and_bias.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
import torch
|
||||
|
||||
from network.Parameter import Config
|
||||
|
||||
import numpy as np
|
||||
from network.SbS import SbS
|
||||
from network.SplitOnOffLayer import SplitOnOffLayer
|
||||
from network.Conv2dApproximation import Conv2dApproximation
|
||||
|
||||
|
||||
def save_weight_and_bias(
|
||||
cfg: Config, network: torch.nn.modules.container.Sequential, iteration_number: int
|
||||
) -> None:
|
||||
|
||||
for id in range(0, len(network)):
|
||||
|
||||
# ################################################
|
||||
# Save the SbS Weights
|
||||
# ################################################
|
||||
|
||||
if isinstance(network[id], SbS) is True:
|
||||
if network[id]._w_trainable is True:
|
||||
|
||||
np.save(
|
||||
f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy",
|
||||
network[id].weights.detach().cpu().numpy(),
|
||||
)
|
||||
|
||||
# ################################################
|
||||
# Save the Conv2 Weights and Biases
|
||||
# ################################################
|
||||
|
||||
if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True:
|
||||
if network[id]._w_trainable is True:
|
||||
# Save the new values
|
||||
np.save(
|
||||
f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy",
|
||||
network[id]._parameters["weight"].data.detach().cpu().numpy(),
|
||||
)
|
||||
|
||||
# Save the new values
|
||||
np.save(
|
||||
f"{cfg.weight_path}/Bias_L{id}_S{iteration_number}.npy",
|
||||
network[id]._parameters["bias"].data.detach().cpu().numpy(),
|
||||
)
|
||||
|
||||
# ################################################
|
||||
# Save the Approximate Conv2 Weights and Biases
|
||||
# ################################################
|
||||
|
||||
if isinstance(network[id], Conv2dApproximation) is True:
|
||||
if network[id]._w_trainable is True:
|
||||
# Save the new values
|
||||
np.save(
|
||||
f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy",
|
||||
network[id].weights.data.detach().cpu().numpy(),
|
||||
)
|
||||
|
||||
# Save the new values
|
||||
if network[id].bias is not None:
|
||||
np.save(
|
||||
f"{cfg.weight_path}/Bias_L{id}_S{iteration_number}.npy",
|
||||
network[id].bias.data.detach().cpu().numpy(),
|
||||
)
|
||||
|
||||
if isinstance(network[id], SplitOnOffLayer) is True:
|
||||
|
||||
np.save(
|
||||
f"{cfg.weight_path}/Mean_L{id}_S{iteration_number}.npy",
|
||||
network[id].mean.detach().cpu().numpy(),
|
||||
)
|
4
network/test_settings_fashion/dataset.json
Normal file
4
network/test_settings_fashion/dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"data_path": "./DATA_FASHION_MNIST/",
|
||||
"data_mode": "MNIST_FASHION"
|
||||
}
|
29
network/test_settings_fashion/def_approx_cnn.json
Normal file
29
network/test_settings_fashion/def_approx_cnn.json
Normal file
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"epoch_id_max": 200,
|
||||
"batch_size": 24,
|
||||
"stage_id": 0,
|
||||
"simulation_id": 0,
|
||||
"learning_parameters": {
|
||||
"loss_mode": 1, // X-Entropy
|
||||
"number_of_batches_for_one_update": 20,
|
||||
"learning_rate_gamma_w": 0.001,
|
||||
"lr_scheduler_patience_w": -1,
|
||||
"adapt_learning_rate_after_minibatch": false,
|
||||
"w_trainable": [
|
||||
true
|
||||
]
|
||||
},
|
||||
"augmentation": {},
|
||||
"image_statistics": {},
|
||||
"approximation_setting": {
|
||||
"number_of_frac_bits": [
|
||||
30
|
||||
], // Quantization of the mantissa
|
||||
"approximation_enable": [
|
||||
true
|
||||
], // In addition to the quantization more approx
|
||||
"number_of_trunc_bits": [
|
||||
1
|
||||
]
|
||||
}
|
||||
}
|
19
network/test_settings_fashion/def_cnn.json
Normal file
19
network/test_settings_fashion/def_cnn.json
Normal file
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"epoch_id_max": 200,
|
||||
"batch_size": 24,
|
||||
"stage_id": 0,
|
||||
"simulation_id": 0,
|
||||
"learning_parameters": {
|
||||
"loss_mode": 1, // X-Entropy
|
||||
"number_of_batches_for_one_update": 20,
|
||||
"learning_rate_gamma_w": 0.001,
|
||||
"lr_scheduler_patience_w": -1,
|
||||
"adapt_learning_rate_after_minibatch": false,
|
||||
"w_trainable": [
|
||||
true
|
||||
]
|
||||
},
|
||||
"augmentation": {},
|
||||
"image_statistics": {},
|
||||
"approximation_setting": {}
|
||||
}
|
97
network/test_settings_fashion/network_approx_cnn.json
Normal file
97
network/test_settings_fashion/network_approx_cnn.json
Normal file
|
@ -0,0 +1,97 @@
|
|||
{
|
||||
"network_structure": {
|
||||
"number_of_output_neurons": 10,
|
||||
"layer_type": [
|
||||
"APPROX CONV2D",
|
||||
"RELU",
|
||||
"MAX POOLING",
|
||||
"APPROX CONV2D",
|
||||
"RELU",
|
||||
"MAX POOLING",
|
||||
"APPROX CONV2D",
|
||||
"RELU",
|
||||
"APPROX CONV2D"
|
||||
],
|
||||
"strides": [
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "CONV2D"
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "CONV2D"
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "CONV2D"
|
||||
[], // "RELU",
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "CONV2D"
|
||||
],
|
||||
"forward_neuron_numbers": [
|
||||
[
|
||||
1,
|
||||
32
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[], // "MAX POOLING",
|
||||
[
|
||||
32,
|
||||
64
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[], // "MAX POOLING",
|
||||
[
|
||||
64,
|
||||
96
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
96,
|
||||
10
|
||||
] // "CONV2D",
|
||||
],
|
||||
"forward_kernel_size": [
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
3,
|
||||
3
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "CONV2D",
|
||||
]
|
||||
}
|
||||
}
|
97
network/test_settings_fashion/network_cnn.json
Normal file
97
network/test_settings_fashion/network_cnn.json
Normal file
|
@ -0,0 +1,97 @@
|
|||
{
|
||||
"network_structure": {
|
||||
"number_of_output_neurons": 10,
|
||||
"layer_type": [
|
||||
"CONV2D",
|
||||
"RELU",
|
||||
"MAX POOLING",
|
||||
"CONV2D",
|
||||
"RELU",
|
||||
"MAX POOLING",
|
||||
"CONV2D",
|
||||
"RELU",
|
||||
"CONV2D"
|
||||
],
|
||||
"strides": [
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "CONV2D"
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "CONV2D"
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "CONV2D"
|
||||
[], // "RELU",
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "CONV2D"
|
||||
],
|
||||
"forward_neuron_numbers": [
|
||||
[
|
||||
1,
|
||||
32
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[], // "MAX POOLING",
|
||||
[
|
||||
32,
|
||||
64
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[], // "MAX POOLING",
|
||||
[
|
||||
64,
|
||||
96
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
96,
|
||||
10
|
||||
] // "CONV2D",
|
||||
],
|
||||
"forward_kernel_size": [
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // "MAX POOLING",
|
||||
[
|
||||
3,
|
||||
3
|
||||
], // "CONV2D",
|
||||
[], // "RELU",
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "CONV2D",
|
||||
]
|
||||
}
|
||||
}
|
186
network/unused_code/LinearApproximation.py
Normal file
186
network/unused_code/LinearApproximation.py
Normal file
|
@ -0,0 +1,186 @@
|
|||
import torch
|
||||
import math
|
||||
|
||||
from network.CPP.PyMultiApp import MultiApp
|
||||
|
||||
|
||||
class LinearApproximation(torch.nn.Module):
|
||||
|
||||
in_features: int | None = None
|
||||
out_features: int | None = None
|
||||
use_bias: bool = False
|
||||
|
||||
approximation_enable: bool = False
|
||||
number_of_trunc_bits: int = -1
|
||||
number_of_frac: int = -1
|
||||
|
||||
number_of_processes: int = 1
|
||||
|
||||
weights: torch.nn.parameter.Parameter
|
||||
bias: torch.nn.parameter.Parameter | None
|
||||
|
||||
device: torch.device
|
||||
dtype: torch.dtype
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_features: int,
|
||||
out_features: int,
|
||||
bias: bool = True,
|
||||
approximation_enable: bool = False,
|
||||
number_of_trunc_bits: int = -1,
|
||||
number_of_frac: int = -1,
|
||||
number_of_processes: int = 1,
|
||||
device: torch.device | None = None,
|
||||
dtype: torch.dtype | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
assert device is not None
|
||||
self.device = device
|
||||
|
||||
assert dtype is not None
|
||||
self.dtype = dtype
|
||||
|
||||
self.in_features = in_features
|
||||
self.out_channels = out_features
|
||||
self.use_bias = bias
|
||||
|
||||
self.approximation_enable = approximation_enable
|
||||
self.number_of_trunc_bits = number_of_trunc_bits
|
||||
self.number_of_frac = number_of_frac
|
||||
|
||||
self.number_of_processes = number_of_processes
|
||||
|
||||
if self.use_bias is True:
|
||||
self.bias: torch.nn.parameter.Parameter | None = (
|
||||
torch.nn.parameter.Parameter(
|
||||
torch.empty(
|
||||
(out_features),
|
||||
dtype=self.dtype,
|
||||
device=self.device,
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.bias = None
|
||||
|
||||
self.weights: torch.nn.parameter.Parameter = torch.nn.parameter.Parameter(
|
||||
torch.empty(
|
||||
(out_features, in_features),
|
||||
dtype=self.dtype,
|
||||
device=self.device,
|
||||
)
|
||||
)
|
||||
|
||||
self.functional_multi = FunctionalMultiLinear.apply
|
||||
|
||||
self.reset_parameters()
|
||||
|
||||
def reset_parameters(self) -> None:
|
||||
# Stolen from original torch conv2 code
|
||||
torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5))
|
||||
if self.bias is not None:
|
||||
fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights)
|
||||
if fan_in != 0:
|
||||
bound = 1 / math.sqrt(fan_in)
|
||||
torch.nn.init.uniform_(self.bias, -bound, bound)
|
||||
|
||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||
|
||||
assert input.dim() == 2
|
||||
|
||||
parameter_list = torch.tensor(
|
||||
[
|
||||
int(self.approximation_enable), # 0
|
||||
int(self.number_of_trunc_bits), # 1
|
||||
int(self.number_of_frac), # 2
|
||||
int(self.number_of_processes), # 3
|
||||
],
|
||||
dtype=torch.int64,
|
||||
)
|
||||
|
||||
output = self.functional_multi(
|
||||
input.unsqueeze(-1).unsqueeze(-1), self.weights, parameter_list
|
||||
)
|
||||
|
||||
output = output.squeeze(-1).squeeze(-1)
|
||||
|
||||
if self.bias is not None:
|
||||
output += self.bias.unsqueeze(0)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
class FunctionalMultiLinear(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward( # type: ignore
|
||||
ctx,
|
||||
input: torch.Tensor,
|
||||
weights: torch.Tensor,
|
||||
parameter_list: torch.Tensor,
|
||||
) -> torch.Tensor:
|
||||
|
||||
assert input.ndim == 4
|
||||
assert input.dtype is torch.float32
|
||||
assert input.is_contiguous() is True
|
||||
|
||||
assert weights.ndim == 2
|
||||
assert weights.dtype is torch.float32
|
||||
assert weights.is_contiguous() is True
|
||||
|
||||
assert input.shape[1] == weights.shape[1]
|
||||
|
||||
approximation_enable = bool(parameter_list[0])
|
||||
number_of_trunc_bits = int(parameter_list[1])
|
||||
number_of_frac = int(parameter_list[2])
|
||||
number_of_processes = int(parameter_list[3])
|
||||
|
||||
assert input.device == weights.device
|
||||
|
||||
output = torch.zeros(
|
||||
(input.shape[0], weights.shape[0], input.shape[2], input.shape[3]),
|
||||
dtype=weights.dtype,
|
||||
device=weights.device,
|
||||
requires_grad=True,
|
||||
)
|
||||
assert output.is_contiguous() is True
|
||||
|
||||
multiplier: MultiApp = MultiApp()
|
||||
|
||||
multiplier.update_with_init_vector_multi_pattern(
|
||||
input.data_ptr(),
|
||||
weights.data_ptr(),
|
||||
output.data_ptr(),
|
||||
int(output.shape[0]), # pattern
|
||||
int(output.shape[1]), # feature channel
|
||||
int(output.shape[2]), # x
|
||||
int(output.shape[3]), # y
|
||||
int(input.shape[1]), # input channel
|
||||
int(number_of_processes),
|
||||
bool(approximation_enable),
|
||||
int(number_of_trunc_bits),
|
||||
int(number_of_frac),
|
||||
)
|
||||
|
||||
ctx.save_for_backward(
|
||||
input.detach(),
|
||||
weights.detach(),
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
|
||||
(input, weights) = ctx.saved_tensors
|
||||
|
||||
grad_input = (
|
||||
grad_output.unsqueeze(2) * weights.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
|
||||
).sum(1)
|
||||
grad_weights = (
|
||||
(grad_output.unsqueeze(2) * input.unsqueeze(1)).sum(0).sum(-1).sum(-1)
|
||||
)
|
||||
grad_parameter_list = None
|
||||
|
||||
return (grad_input, grad_weights, grad_parameter_list)
|
39
network_sbs_L0.json
Normal file
39
network_sbs_L0.json
Normal file
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"network_structure": {
|
||||
"number_of_output_neurons": 10,
|
||||
"layer_type": [
|
||||
"SbS",
|
||||
"SbS"
|
||||
],
|
||||
"strides": [
|
||||
[
|
||||
1,
|
||||
1
|
||||
],
|
||||
[
|
||||
1,
|
||||
1
|
||||
]
|
||||
],
|
||||
"forward_neuron_numbers": [
|
||||
[
|
||||
1,
|
||||
32
|
||||
],
|
||||
[
|
||||
32,
|
||||
10
|
||||
]
|
||||
],
|
||||
"forward_kernel_size": [
|
||||
[
|
||||
5,
|
||||
5
|
||||
],
|
||||
[
|
||||
20,
|
||||
20
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
91
network_sbs_mp.json
Normal file
91
network_sbs_mp.json
Normal file
|
@ -0,0 +1,91 @@
|
|||
{
|
||||
"network_structure": {
|
||||
"number_of_output_neurons": 10,
|
||||
"layer_type": [
|
||||
"SbS",
|
||||
"MAX POOLING",
|
||||
"SbS",
|
||||
"MAX POOLING",
|
||||
"SbS",
|
||||
"SbS"
|
||||
],
|
||||
"strides": [
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
1,
|
||||
1
|
||||
], // "SbS"
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "SbS"
|
||||
],
|
||||
"forward_neuron_numbers": [
|
||||
[
|
||||
1,
|
||||
32
|
||||
], // "SbS"
|
||||
[
|
||||
32,
|
||||
32
|
||||
], // POOLING
|
||||
[
|
||||
32,
|
||||
64
|
||||
], // "SbS"
|
||||
[
|
||||
64,
|
||||
64
|
||||
], // POOLING
|
||||
[
|
||||
64,
|
||||
96
|
||||
], // "SbS"
|
||||
[
|
||||
96,
|
||||
10
|
||||
] // "SbS"
|
||||
],
|
||||
"forward_kernel_size": [
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
5,
|
||||
5
|
||||
], // "SbS"
|
||||
[
|
||||
2,
|
||||
2
|
||||
], // POOLING
|
||||
[
|
||||
3,
|
||||
3
|
||||
], // "SbS"
|
||||
[
|
||||
1,
|
||||
1
|
||||
] // "SbS"
|
||||
]
|
||||
}
|
||||
}
|
19
run_spikes.sh
Normal file
19
run_spikes.sh
Normal file
|
@ -0,0 +1,19 @@
|
|||
python=~/P3.10GPU/bin/python3
|
||||
|
||||
if [ -f "./def.json" ]
|
||||
then
|
||||
echo "json file found"
|
||||
else
|
||||
echo "No json file exists"
|
||||
exit
|
||||
fi
|
||||
|
||||
|
||||
$python learn_it.py def.json
|
||||
status=$?
|
||||
if [ $status != "0" ]
|
||||
then
|
||||
echo "Problem detected (learn_it.py)!!!"
|
||||
exit 1
|
||||
fi
|
||||
|
Loading…
Reference in a new issue