Merge pull request #1 from davrot/v2

V2
This commit is contained in:
David Rotermund 2023-01-05 13:32:59 +01:00 committed by GitHub
commit 76ea2096c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
111 changed files with 7171 additions and 5655 deletions

View file

@ -1,294 +0,0 @@
// MIT License
// Copyright 2022 University of Bremen
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
// David Rotermund ( davrot@uni-bremen.de )
//
//
// Release history:
// ================
// 1.0.0 -- 01.05.2022: first release
//
//
#include "HDynamicCNNManyIP.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
#include <vector>
HDynamicCNNManyIP::HDynamicCNNManyIP(){
};
HDynamicCNNManyIP::~HDynamicCNNManyIP(){
};
bool HDynamicCNNManyIP::update(
int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1,
int64_t np_h_dim_2, int64_t np_h_dim_3, int64_t np_epsilon_xy_pointer_addr,
int64_t np_epsilon_xy_dim_0, int64_t np_epsilon_xy_dim_1,
int64_t np_epsilon_xy_dim_2, int64_t np_epsilon_t_pointer_addr,
int64_t np_epsilon_t_dim_0, int64_t np_weights_pointer_addr,
int64_t np_weights_dim_0, int64_t np_weights_dim_1,
int64_t np_input_pointer_addr, int64_t np_input_dim_0,
int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3,
float *np_init_vector_pointer_ptr, int64_t np_init_vector_dim_0,
int64_t id_pattern) {
float *np_h_pointer = (float *)np_h_pointer_addr;
float *np_epsilon_xy_pointer = (float *)np_epsilon_xy_pointer_addr;
float *np_epsilon_t_pointer = (float *)np_epsilon_t_pointer_addr;
float *np_weights_pointer = (float *)np_weights_pointer_addr;
int64_t *np_input_pointer = (int64_t *)np_input_pointer_addr;
int64_t number_of_pattern = np_input_dim_0;
assert((id_pattern >= 0));
assert((id_pattern < number_of_pattern));
assert((np_h_pointer != nullptr));
assert((np_h_dim_0 > 0));
assert((np_h_dim_1 > 0));
assert((np_h_dim_2 > 0));
assert((np_h_dim_3 > 0));
int64_t np_h_dim_c0 = np_h_dim_1 * np_h_dim_2 * np_h_dim_3;
int64_t np_h_dim_c1 = np_h_dim_2 * np_h_dim_3;
int64_t np_h_dim_c2 = np_h_dim_3;
float *np_h_pointer_pattern;
float *np_h_pointer_pattern_0;
float *np_h_pointer_pattern_01;
assert((np_epsilon_xy_pointer != nullptr));
assert((np_epsilon_xy_dim_0 > 0));
assert((np_epsilon_xy_dim_1 > 0));
int64_t np_epsilon_xy_dim_c0 = np_epsilon_xy_dim_2 * np_epsilon_xy_dim_1;
int64_t np_epsilon_xy_dim_c1 = np_epsilon_xy_dim_2;
float *np_epsilon_xy_pointer_0;
float *np_epsilon_xy_pointer_01;
assert((np_epsilon_t_pointer != nullptr));
assert((np_epsilon_t_dim_0 > 0));
assert((np_weights_pointer != nullptr));
assert((np_weights_dim_0 > 0));
assert((np_weights_dim_1 > 0));
int64_t np_weights_dim_c0 = np_weights_dim_1;
float *w_ptr;
assert((np_input_pointer != nullptr));
assert((np_input_dim_0 > 0));
assert((np_input_dim_1 > 0));
assert((np_input_dim_2 > 0));
assert((np_input_dim_3 > 0));
int64_t np_input_dim_c0 = np_input_dim_1 * np_input_dim_2 * np_input_dim_3;
int64_t np_input_dim_c1 = np_input_dim_2 * np_input_dim_3;
int64_t np_input_dim_c2 = np_input_dim_3;
int64_t *np_input_pointer_pattern;
int64_t *np_input_pointer_pattern_0;
int64_t *np_input_pointer_pattern_01;
int64_t *np_input_pointer_pattern_01_spike;
assert((np_init_vector_pointer_ptr != nullptr));
assert((np_init_vector_dim_0 == np_weights_dim_1));
int64_t number_of_spikes = np_input_dim_1;
int64_t h_dim = np_weights_dim_1;
std::vector<float> h_temp_vector;
h_temp_vector.resize(h_dim);
float *h_temp = h_temp_vector.data();
std::vector<float> h_subsegment_vector;
h_subsegment_vector.resize(h_dim);
float *h_subsegment = h_subsegment_vector.data();
float h_temp_sum;
int64_t id_0;
int64_t id_1;
int64_t id_spike;
int64_t counter;
float temp_value;
float epsilon_scale;
float epsilon_subsegment;
// epsilon_subsegment = np_epsilon_xy_pointer[
// id_0 * np_epsilon_xy_dim_c0 +
// id_1 ]
// * np_epsilon_t_pointer[id_spike];
// spike = np_input_pointer[
// id_pattern * np_input_dim_c0 +
// id_spike * np_input_dim_c1 +
// id_0 * np_input_dim_c2 +
// id_1];
// w_ptr = np_weights_pointer +
// spike * np_weights_dim_c0;
// h_ptr = np_h_pointer +
// id_pattern * np_h_dim_c0 +
// id_0 * np_h_dim_c2 +
// id_1;
// // 0 * np_h_dim_c1 +
np_input_pointer_pattern = np_input_pointer + id_pattern * np_input_dim_c0;
np_h_pointer_pattern = np_h_pointer + id_pattern * np_h_dim_c0;
for (id_0 = 0; id_0 < np_input_dim_2; id_0++) {
np_epsilon_xy_pointer_0 =
np_epsilon_xy_pointer + id_0 * np_epsilon_xy_dim_c1;
np_h_pointer_pattern_0 = np_h_pointer_pattern + id_0 * np_h_dim_c2;
np_input_pointer_pattern_0 =
np_input_pointer_pattern + id_0 * np_input_dim_c2;
for (id_1 = 0; id_1 < np_input_dim_3; id_1++) {
np_epsilon_xy_pointer_01 = np_epsilon_xy_pointer_0 + id_1;
np_h_pointer_pattern_01 = np_h_pointer_pattern_0 + id_1;
np_input_pointer_pattern_01 = np_input_pointer_pattern_0 + id_1;
memcpy(h_subsegment, np_init_vector_pointer_ptr, sizeof(float) * h_dim);
epsilon_scale = 1.0;
for (id_spike = 0; id_spike < number_of_spikes; id_spike++) {
if (epsilon_scale > 1E10) {
temp_value = 1.0 / epsilon_scale;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++) {
h_subsegment[counter] *= temp_value;
}
epsilon_scale = 1.0;
}
np_input_pointer_pattern_01_spike =
np_input_pointer_pattern_01 + id_spike * np_input_dim_c1;
epsilon_subsegment =
np_epsilon_xy_pointer_01[np_input_pointer_pattern_01_spike[0] *
np_epsilon_xy_dim_c0] *
np_epsilon_t_pointer[id_spike];
w_ptr = np_weights_pointer +
np_input_pointer_pattern_01_spike[0] * np_weights_dim_c0;
memcpy(h_temp, h_subsegment, sizeof(float) * h_dim);
#pragma omp simd
for (counter = 0; counter < h_dim; counter++) {
h_temp[counter] *= w_ptr[counter];
}
h_temp_sum = 0.0;
#pragma omp simd reduction(+ : h_temp_sum)
for (counter = 0; counter < h_dim; counter++) {
h_temp_sum += h_temp[counter];
}
if (h_temp_sum > 1E-10) {
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++) {
h_temp[counter] *= temp_value;
}
#pragma omp simd
for (counter = 0; counter < h_dim; counter++) {
h_subsegment[counter] += h_temp[counter];
}
epsilon_scale *= 1.0 + epsilon_subsegment;
// IF
}
// spike End
}
temp_value = 1.0 / epsilon_scale;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++) {
np_h_pointer_pattern_01[counter * np_h_dim_c1] =
h_subsegment[counter] * temp_value;
}
// id_1 End
}
// id_0 End
}
return true;
};
bool HDynamicCNNManyIP::update_with_init_vector_multi_pattern(
int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1,
int64_t np_h_dim_2, int64_t np_h_dim_3, int64_t np_epsilon_xy_pointer_addr,
int64_t np_epsilon_xy_dim_0, int64_t np_epsilon_xy_dim_1,
int64_t np_epsilon_xy_dim_2, int64_t np_epsilon_t_pointer_addr,
int64_t np_epsilon_t_dim_0, int64_t np_weights_pointer_addr,
int64_t np_weights_dim_0, int64_t np_weights_dim_1,
int64_t np_input_pointer_addr, int64_t np_input_dim_0,
int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3,
int64_t np_init_vector_pointer_addr, int64_t np_init_vector_dim_0,
int64_t number_of_processes) {
int64_t number_of_pattern = np_input_dim_0;
int64_t pattern_id;
int64_t h_dim = np_init_vector_dim_0;
float *h_init_ptr = (float *)np_init_vector_pointer_addr;
omp_set_num_threads(number_of_processes);
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) {
update(np_h_pointer_addr, np_h_dim_0, np_h_dim_1, np_h_dim_2, np_h_dim_3,
np_epsilon_xy_pointer_addr, np_epsilon_xy_dim_0, np_epsilon_xy_dim_1,
np_epsilon_xy_dim_2, np_epsilon_t_pointer_addr, np_epsilon_t_dim_0,
np_weights_pointer_addr, np_weights_dim_0, np_weights_dim_1,
np_input_pointer_addr, np_input_dim_0, np_input_dim_1,
np_input_dim_2, np_input_dim_3, h_init_ptr, h_dim, pattern_id);
}
return true;
};

View file

@ -1,72 +0,0 @@
// MIT License
// Copyright 2022 University of Bremen
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
// David Rotermund ( davrot@uni-bremen.de )
//
//
// Release history:
// ================
// 1.0.0 -- 01.05.2022: first release
//
//
#ifndef SRC_HDYNAMICCNNMANYIP_H_
#define SRC_HDYNAMICCNNMANYIP_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class HDynamicCNNManyIP {
public:
HDynamicCNNManyIP();
~HDynamicCNNManyIP();
bool update(int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1,
int64_t np_h_dim_2, int64_t np_h_dim_3,
int64_t np_epsilon_xy_pointer_addr, int64_t np_epsilon_xy_dim_0,
int64_t np_epsilon_xy_dim_1, int64_t np_epsilon_xy_dim_2,
int64_t np_epsilon_t_pointer_addr, int64_t np_epsilon_t_dim_0,
int64_t np_weights_pointer_addr, int64_t np_weights_dim_0,
int64_t np_weights_dim_1, int64_t np_input_pointer_addr,
int64_t np_input_dim_0, int64_t np_input_dim_1,
int64_t np_input_dim_2, int64_t np_input_dim_3,
float *np_init_vector_pointer_ptr, int64_t np_init_vector_dim_0,
int64_t id_pattern);
bool update_with_init_vector_multi_pattern(
int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1,
int64_t np_h_dim_2, int64_t np_h_dim_3,
int64_t np_epsilon_xy_pointer_addr, int64_t np_epsilon_xy_dim_0,
int64_t np_epsilon_xy_dim_1, int64_t np_epsilon_xy_dim_2,
int64_t np_epsilon_t_pointer_addr, int64_t np_epsilon_t_dim_0,
int64_t np_weights_pointer_addr, int64_t np_weights_dim_0,
int64_t np_weights_dim_1, int64_t np_input_pointer_addr,
int64_t np_input_dim_0, int64_t np_input_dim_1, int64_t np_input_dim_2,
int64_t np_input_dim_3, int64_t np_init_vector_pointer_addr,
int64_t np_init_vector_dim_0, int64_t number_of_processes);
private:
};
#endif /* SRC_HDYNAMICCNNMANYIP_H_ */

View file

@ -1,75 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
# Change to your python bin directory (tested with Python 3.10.4)
PYBIN=~/P3.10/bin/
COMPILER=clang++
PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes`
PARAMETERS= -O3 -std=c++14 -fPIC $(PYBIND11INCLUDE) -Wall -fopenmp=libomp
all: PyHDynamicCNNManyIP \
PySpikeGeneration2DManyIP \
#######################
HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cpp
$(COMPILER) $(PARAMETERS) -c HDynamicCNNManyIP.cpp -o HDynamicCNNManyIP.o
PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp
$(COMPILER) $(PARAMETERS) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o
PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
$(COMPILER) -shared -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o -lm -lomp -lstdc++ -Wall
cp PyHDynamicCNNManyIP ../PyHDynamicCNNManyIP`$(PYBIN)python3-config --extension-suffix`
#######################
SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cpp
$(COMPILER) $(PARAMETERS) -c SpikeGeneration2DManyIP.cpp -o SpikeGeneration2DManyIP.o
PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp
$(COMPILER) $(PARAMETERS) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o
PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
$(COMPILER) -shared -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o -lm -lomp -lstdc++ -Wall
cp PySpikeGeneration2DManyIP ../PySpikeGeneration2DManyIP`$(PYBIN)python3-config --extension-suffix`
#######################
clean:
rm -f PyHDynamicCNNManyIP
rm -f PySpikeGeneration2DManyIP
rm -f *.o

View file

@ -1,44 +0,0 @@
// MIT License
// Copyright 2022 University of Bremen
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
// David Rotermund ( davrot@uni-bremen.de )
//
//
// Release history:
// ================
// 1.0.0 -- 01.05.2022: first release
//
//
#include <pybind11/pybind11.h>
#include "HDynamicCNNManyIP.h"
namespace py = pybind11;
PYBIND11_MODULE(PyHDynamicCNNManyIP, m) {
m.doc() = "HDynamicCNNManyIP Module";
py::class_<HDynamicCNNManyIP>(m, "HDynamicCNNManyIP")
.def(py::init<>())
.def("update_with_init_vector_multi_pattern",
&HDynamicCNNManyIP::update_with_init_vector_multi_pattern);
}

View file

@ -1,44 +0,0 @@
// MIT License
// Copyright 2022 University of Bremen
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
// David Rotermund ( davrot@uni-bremen.de )
//
//
// Release history:
// ================
// 1.0.0 -- 01.05.2022: first release
//
//
#include <pybind11/pybind11.h>
#include "SpikeGeneration2DManyIP.h"
namespace py = pybind11;
PYBIND11_MODULE(PySpikeGeneration2DManyIP, m) {
m.doc() = "SpikeGeneration2DManyIP Module";
py::class_<SpikeGeneration2DManyIP>(m, "SpikeGeneration2DManyIP")
.def(py::init<>())
.def("spike_generation_multi_pattern",
&SpikeGeneration2DManyIP::spike_generation_multi_pattern);
}

View file

@ -1,197 +0,0 @@
// MIT License
// Copyright 2022 University of Bremen
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
// David Rotermund ( davrot@uni-bremen.de )
//
//
// Release history:
// ================
// 1.0.0 -- 01.05.2022: first release
//
//
#include "SpikeGeneration2DManyIP.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
#include <vector>
SpikeGeneration2DManyIP::SpikeGeneration2DManyIP(){
};
SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP(){
};
bool SpikeGeneration2DManyIP::spike_generation_multi_pattern(
int64_t np_input_pointer_addr, int64_t np_input_dim_0,
int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3,
int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0,
int64_t np_random_values_dim_1, int64_t np_random_values_dim_2,
int64_t np_random_values_dim_3, int64_t np_output_pointer_addr,
int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2,
int64_t np_output_dim_3, int64_t number_of_cpu_processes) {
int64_t number_of_pattern = np_input_dim_0;
int64_t pattern_id;
omp_set_num_threads(number_of_cpu_processes);
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) {
spike_generation(
np_input_pointer_addr, np_input_dim_0, np_input_dim_1, np_input_dim_2,
np_input_dim_3, np_random_values_pointer_addr, np_random_values_dim_0,
np_random_values_dim_1, np_random_values_dim_2, np_random_values_dim_3,
np_output_pointer_addr, np_output_dim_0, np_output_dim_1,
np_output_dim_2, np_output_dim_3, pattern_id);
}
return true;
};
bool SpikeGeneration2DManyIP::spike_generation(
int64_t np_input_pointer_addr, int64_t np_input_dim_0,
int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3,
int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0,
int64_t np_random_values_dim_1, int64_t np_random_values_dim_2,
int64_t np_random_values_dim_3, int64_t np_output_pointer_addr,
int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2,
int64_t np_output_dim_3, int64_t id_pattern) {
float *np_input_pointer = (float *)np_input_pointer_addr;
float *np_random_values_pointer = (float *)np_random_values_pointer_addr;
int64_t *np_output_pointer = (int64_t *)np_output_pointer_addr;
assert((id_pattern >= 0));
assert((id_pattern < np_input_dim_0));
// Input
assert((np_input_pointer != nullptr));
assert((np_input_dim_0 > 0));
assert((np_input_dim_1 > 0));
assert((np_input_dim_2 > 0));
assert((np_input_dim_3 > 0));
int64_t np_input_dim_c0 = np_input_dim_1 * np_input_dim_2 * np_input_dim_3;
int64_t np_input_dim_c1 = np_input_dim_2 * np_input_dim_3;
int64_t np_input_dim_c2 = np_input_dim_3;
// Random
assert((np_random_values_pointer != nullptr));
assert((np_random_values_dim_0 > 0));
assert((np_random_values_dim_1 > 0));
assert((np_random_values_dim_2 > 0));
assert((np_random_values_dim_3 > 0));
int64_t np_random_values_dim_c0 =
np_random_values_dim_1 * np_random_values_dim_2 * np_random_values_dim_3;
int64_t np_random_values_dim_c1 =
np_random_values_dim_2 * np_random_values_dim_3;
int64_t np_random_values_dim_c2 = np_random_values_dim_3;
// Output
assert((np_output_pointer != nullptr));
assert((np_output_dim_0 > 0));
assert((np_output_dim_1 > 0));
assert((np_output_dim_2 > 0));
assert((np_output_dim_3 > 0));
int64_t np_output_dim_c0 =
np_output_dim_1 * np_output_dim_2 * np_output_dim_3;
int64_t np_output_dim_c1 = np_output_dim_2 * np_output_dim_3;
int64_t np_output_dim_c2 = np_output_dim_3;
// -------------------------------
int64_t h_dim = np_input_dim_1;
int64_t spike_dim = np_output_dim_1;
std::vector<float> temp_p;
temp_p.resize(h_dim);
float *temp_p_ptr = temp_p.data();
std::vector<int64_t> temp_out;
temp_out.resize(spike_dim);
int64_t *temp_out_ptr = temp_out.data();
std::vector<float> temp_rand;
temp_rand.resize(spike_dim);
float *temp_rand_ptr = temp_rand.data();
int64_t counter;
int64_t counter_x = 0;
int64_t counter_y = 0;
float *p_ptr = nullptr;
int64_t *out_ptr = nullptr;
float *rand_ptr = nullptr;
std::vector<float>::iterator position_iterator;
for (counter_x = 0; counter_x < np_output_dim_2; counter_x++) {
for (counter_y = 0; counter_y < np_output_dim_3; counter_y++) {
p_ptr = np_input_pointer + id_pattern * np_input_dim_c0 +
counter_x * np_input_dim_c2 + counter_y;
// + counter * np_input_dim_c1
out_ptr = np_output_pointer + id_pattern * np_output_dim_c0 +
counter_x * np_output_dim_c2 + counter_y;
// + counter * np_output_dim_c1
rand_ptr = np_random_values_pointer +
id_pattern * np_random_values_dim_c0 +
counter_x * np_random_values_dim_c2 + counter_y;
// + counter * np_random_values_dim_c1
#pragma omp simd
for (counter = 0; counter < h_dim; counter++) {
temp_p_ptr[counter] = p_ptr[counter * np_input_dim_c1];
}
#pragma omp simd
for (counter = 0; counter < spike_dim; counter++) {
temp_rand_ptr[counter] = rand_ptr[counter * np_random_values_dim_c1];
}
// ----------------------------
for (counter = 0; counter < spike_dim; counter++) {
position_iterator = std::lower_bound(temp_p.begin(), temp_p.end(),
temp_rand_ptr[counter]);
temp_out_ptr[counter] = position_iterator - temp_p.begin();
}
// ----------------------------
#pragma omp simd
for (counter = 0; counter < spike_dim; counter++) {
out_ptr[counter * np_output_dim_c1] = temp_out_ptr[counter];
}
}
}
return true;
};

View file

@ -1,66 +0,0 @@
// MIT License
// Copyright 2022 University of Bremen
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
// David Rotermund ( davrot@uni-bremen.de )
//
//
// Release history:
// ================
// 1.0.0 -- 01.05.2022: first release
//
//
#ifndef SRC_SPIKEGENERATION2DMANYIP_H_
#define SRC_SPIKEGENERATION2DMANYIP_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class SpikeGeneration2DManyIP {
public:
SpikeGeneration2DManyIP();
~SpikeGeneration2DManyIP();
bool spike_generation_multi_pattern(
int64_t np_input_pointer_addr, int64_t np_input_dim_0,
int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3,
int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0,
int64_t np_random_values_dim_1, int64_t np_random_values_dim_2,
int64_t np_random_values_dim_3, int64_t np_output_pointer_addr,
int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2,
int64_t np_output_dim_3, int64_t number_of_cpu_processes);
bool spike_generation(
int64_t np_input_pointer_addr, int64_t np_input_dim_0,
int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3,
int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0,
int64_t np_random_values_dim_1, int64_t np_random_values_dim_2,
int64_t np_random_values_dim_3, int64_t np_output_pointer_addr,
int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2,
int64_t np_output_dim_3, int64_t id_pattern);
private:
};
#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */

View file

@ -1,422 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
from abc import ABC, abstractmethod
import torch
import numpy as np
import torchvision as tv # type: ignore
from Parameter import Config
class DatasetMaster(torch.utils.data.Dataset, ABC):
path_label: str
label_storage: np.ndarray
pattern_storage: np.ndarray
number_of_pattern: int
mean: list[float]
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__()
if train is True:
self.label_storage = np.load(path_label + "/TrainLabelStorage.npy")
else:
self.label_storage = np.load(path_label + "/TestLabelStorage.npy")
if train is True:
self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy")
else:
self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy")
self.number_of_pattern = self.label_storage.shape[0]
self.mean = []
def __len__(self) -> int:
return self.number_of_pattern
# Get one pattern at position index
@abstractmethod
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
pass
@abstractmethod
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
pass
@abstractmethod
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
pass
class DatasetMNIST(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, 0:1, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
class DatasetFashionMNIST(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, 0:1, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
class DatasetCIFAR(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, :, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 3 channel version (e.g. r,g,b channels).
"""
assert len(cfg.image_statistics.mean) == 3
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1])
my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2])
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. is randomly horizontally flipped
3. is randomly color jitteres
4. on/off filteres
5. returned.
This is a 3 channel version (e.g. r,g,b channels).
"""
assert len(cfg.image_statistics.mean) == 3
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1])
my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2])
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor
class OnOffFilter(torch.nn.Module):
def __init__(self, p: float = 0.5) -> None:
super(OnOffFilter, self).__init__()
self.p: float = p
def forward(self, tensor: torch.Tensor) -> torch.Tensor:
assert tensor.shape[1] == 1
tensor_clone = 2.0 * (tensor - self.p)
temp_0: torch.Tensor = torch.where(
tensor_clone < 0.0,
-tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
temp_1: torch.Tensor = torch.where(
tensor_clone >= 0.0,
tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1)
return new_tensor
def __repr__(self) -> str:
return self.__class__.__name__ + "(p={0})".format(self.p)
if __name__ == "__main__":
pass

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

View file

@ -1,164 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
# %%
from dataclasses import dataclass, field
import numpy as np
import torch
import os
@dataclass
class Network:
"""Parameters of the network. The details about
its layers and the number of output neurons."""
number_of_output_neurons: int = field(default=0)
forward_kernel_size: list[list[int]] = field(default_factory=list)
forward_neuron_numbers: list[list[int]] = field(default_factory=list)
strides: list[list[int]] = field(default_factory=list)
dilation: list[list[int]] = field(default_factory=list)
padding: list[list[int]] = field(default_factory=list)
is_pooling_layer: list[bool] = field(default_factory=list)
w_trainable: list[bool] = field(default_factory=list)
eps_xy_trainable: list[bool] = field(default_factory=list)
eps_xy_mean: list[bool] = field(default_factory=list)
@dataclass
class LearningParameters:
"""Parameter required for training"""
loss_coeffs_mse: float = field(default=0.5)
loss_coeffs_kldiv: float = field(default=1.0)
learning_rate_gamma_w: float = field(default=-1.0)
learning_rate_gamma_eps_xy: float = field(default=-1.0)
learning_rate_threshold_w: float = field(default=0.00001)
learning_rate_threshold_eps_xy: float = field(default=0.00001)
learning_active: bool = field(default=True)
weight_noise_amplitude: float = field(default=0.01)
eps_xy_intitial: float = field(default=0.1)
test_every_x_learning_steps: int = field(default=50)
test_during_learning: bool = field(default=True)
lr_scheduler_factor: float = field(default=0.75)
lr_scheduler_patience: int = field(default=10)
optimizer_name: str = field(default="Adam")
lr_schedule_name: str = field(default="ReduceLROnPlateau")
number_of_batches_for_one_update: int = field(default=1)
alpha_number_of_iterations: int = field(default=0)
overload_path: str = field(default="./Previous")
@dataclass
class Augmentation:
"""Parameters used for data augmentation."""
crop_width_in_pixel: int = field(default=2)
flip_p: float = field(default=0.5)
jitter_brightness: float = field(default=0.5)
jitter_contrast: float = field(default=0.1)
jitter_saturation: float = field(default=0.1)
jitter_hue: float = field(default=0.15)
@dataclass
class ImageStatistics:
"""(Statistical) information about the input. i.e.
mean values and the x and y size of the input"""
mean: list[float] = field(default_factory=list)
the_size: list[int] = field(default_factory=list)
@dataclass
class Config:
"""Master config class."""
# Sub classes
network_structure: Network = field(default_factory=Network)
learning_parameters: LearningParameters = field(default_factory=LearningParameters)
augmentation: Augmentation = field(default_factory=Augmentation)
image_statistics: ImageStatistics = field(default_factory=ImageStatistics)
batch_size: int = field(default=500)
data_mode: str = field(default="")
learning_step: int = field(default=0)
learning_step_max: int = field(default=10000)
number_of_cpu_processes: int = field(default=-1)
number_of_spikes: int = field(default=0)
cooldown_after_number_of_spikes: int = field(default=0)
weight_path: str = field(default="./Weights/")
eps_xy_path: str = field(default="./EpsXY/")
data_path: str = field(default="./")
reduction_cooldown: float = field(default=25.0)
epsilon_0: float = field(default=1.0)
update_after_x_batch: float = field(default=1.0)
def __post_init__(self) -> None:
"""Post init determines the number of cores.
Creates the required directory and gives us an optimized
(for the amount of cores) batch size."""
number_of_cpu_processes_temp = os.cpu_count()
if self.number_of_cpu_processes < 1:
if number_of_cpu_processes_temp is None:
self.number_of_cpu_processes = 1
else:
self.number_of_cpu_processes = number_of_cpu_processes_temp
os.makedirs(self.weight_path, exist_ok=True)
os.makedirs(self.eps_xy_path, exist_ok=True)
os.makedirs(self.data_path, exist_ok=True)
self.batch_size = (
self.batch_size // self.number_of_cpu_processes
) * self.number_of_cpu_processes
self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes))
self.batch_size = int(self.batch_size)
def get_epsilon_t(self):
"""Generates the time series of the basic epsilon."""
np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32)
np_epsilon_t[
self.cooldown_after_number_of_spikes : self.number_of_spikes
] /= self.reduction_cooldown
return torch.tensor(np_epsilon_t)
def get_update_after_x_pattern(self):
"""Tells us after how many pattern we need to update the weights."""
return self.batch_size * self.update_after_x_batch

View file

@ -1 +0,0 @@
Performance reached (test data correct classifications): 76.60%

View file

@ -1,31 +0,0 @@
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import numpy as np
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
filename: str = "events.out.tfevents.1651334099.fedora.121264.0"
acc = event_accumulator.EventAccumulator(filename)
acc.Reload()
# What is available?
# available_scalar = acc.Tags()["scalars"]
# print("Available Scalars")
# print(available_scalar)
which_scalar: str = "Test Number Correct"
te = acc.Scalars(which_scalar)
temp: list = []
for te_item in te:
temp.append((te_item[1], te_item[2]))
temp_np = np.array(temp)
plt.semilogy(temp_np[:, 0], (1.0 - (temp_np[:, 1] / 10000)) * 100)
plt.xlabel("Epochs")
plt.ylabel("Error [%]")
plt.savefig("Error.png")
plt.show()

View file

@ -1,203 +0,0 @@
# %%
import torch
from Dataset import DatasetCIFAR
from Parameter import Config
import torchvision as tv # type: ignore
# Some parameters
cfg = Config()
input_number_of_channel: int = 3
input_dim_x: int = 28
input_dim_y: int = 28
number_of_output_channels_conv1: int = 96
number_of_output_channels_conv2: int = 192
number_of_output_channels_flatten1: int = 3072
number_of_output_channels_full1: int = 10
kernel_size_conv1: tuple[int, int] = (5, 5)
kernel_size_pool1: tuple[int, int] = (2, 2)
kernel_size_conv2: tuple[int, int] = (5, 5)
kernel_size_pool2: tuple[int, int] = (2, 2)
stride_conv1: tuple[int, int] = (1, 1)
stride_pool1: tuple[int, int] = (2, 2)
stride_conv2: tuple[int, int] = (1, 1)
stride_pool2: tuple[int, int] = (2, 2)
padding_conv1: int = 0
padding_pool1: int = 0
padding_conv2: int = 0
padding_pool2: int = 0
network = torch.nn.Sequential(
torch.nn.Conv2d(
in_channels=input_number_of_channel,
out_channels=number_of_output_channels_conv1,
kernel_size=kernel_size_conv1,
stride=stride_conv1,
padding=padding_conv1,
),
torch.nn.ReLU(),
torch.nn.MaxPool2d(
kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1
),
torch.nn.Conv2d(
in_channels=number_of_output_channels_conv1,
out_channels=number_of_output_channels_conv2,
kernel_size=kernel_size_conv2,
stride=stride_conv2,
padding=padding_conv2,
),
torch.nn.ReLU(),
torch.nn.MaxPool2d(
kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2
),
torch.nn.Flatten(
start_dim=1,
),
torch.nn.Linear(
in_features=number_of_output_channels_flatten1,
out_features=number_of_output_channels_full1,
bias=True,
),
torch.nn.Softmax(dim=1),
)
# %%
path_pattern: str = "./DATA_CIFAR10/"
path_label: str = "./DATA_CIFAR10/"
dataset_train = DatasetCIFAR(
train=True, path_pattern=path_pattern, path_label=path_label
)
dataset_test = DatasetCIFAR(
train=False, path_pattern=path_pattern, path_label=path_label
)
cfg.image_statistics.mean = dataset_train.mean
# The basic size
cfg.image_statistics.the_size = [
dataset_train.pattern_storage.shape[2],
dataset_train.pattern_storage.shape[3],
]
# Minus the stuff we cut away in the pattern filter
cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel
cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel
batch_size_train: int = 100
batch_size_test: int = 100
train_data_load = torch.utils.data.DataLoader(
dataset_train, batch_size=batch_size_train, shuffle=True
)
test_data_load = torch.utils.data.DataLoader(
dataset_test, batch_size=batch_size_test, shuffle=False
)
transforms_test: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms_test = torch.jit.script(transforms_test)
transforms_train: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms_train = torch.jit.script(transforms_train)
# %%
# The optimizer
optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
# The LR Scheduler
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75)
# %%
number_of_test_pattern: int = dataset_test.__len__()
number_of_train_pattern: int = dataset_train.__len__()
number_of_epoch: int = 500
# %%
import time
from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()
# %%
loss_function = torch.nn.CrossEntropyLoss()
for epoch_id in range(0, number_of_epoch):
print(f"Epoch: {epoch_id}")
t_start: float = time.perf_counter()
train_loss: float = 0.0
train_correct: int = 0
train_number: int = 0
test_correct: int = 0
test_number: int = 0
# Switch the network into training mode
network.train()
# This runs in total for one epoch split up into mini-batches
for image, target in train_data_load:
# Clean the gradient
optimizer.zero_grad()
output = network(scripted_transforms_train(image))
loss = loss_function(output, target)
train_loss += loss.item()
train_correct += (output.argmax(dim=1) == target).sum().numpy()
train_number += target.shape[0]
# Calculate backprop
loss.backward()
# Update the parameter
optimizer.step()
# Update the learning rate
lr_scheduler.step(train_loss)
t_training: float = time.perf_counter()
# Switch the network into evalution mode
network.eval()
with torch.no_grad():
for image, target in test_data_load:
output = network(scripted_transforms_test(image))
test_correct += (output.argmax(dim=1) == target).sum().numpy()
test_number += target.shape[0]
t_testing = time.perf_counter()
perfomance_test_correct: float = 100.0 * test_correct / test_number
perfomance_train_correct: float = 100.0 * train_correct / train_number
tb.add_scalar("Train Loss", train_loss, epoch_id)
tb.add_scalar("Train Number Correct", train_correct, epoch_id)
tb.add_scalar("Test Number Correct", test_correct, epoch_id)
print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%")
print(f"Testing: Correct={perfomance_test_correct:.2f}%")
print(
f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec"
)
torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt")
print()
# %%
tb.close()

View file

@ -1,422 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
from abc import ABC, abstractmethod
import torch
import numpy as np
import torchvision as tv # type: ignore
from Parameter import Config
class DatasetMaster(torch.utils.data.Dataset, ABC):
path_label: str
label_storage: np.ndarray
pattern_storage: np.ndarray
number_of_pattern: int
mean: list[float]
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__()
if train is True:
self.label_storage = np.load(path_label + "/TrainLabelStorage.npy")
else:
self.label_storage = np.load(path_label + "/TestLabelStorage.npy")
if train is True:
self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy")
else:
self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy")
self.number_of_pattern = self.label_storage.shape[0]
self.mean = []
def __len__(self) -> int:
return self.number_of_pattern
# Get one pattern at position index
@abstractmethod
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
pass
@abstractmethod
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
pass
@abstractmethod
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
pass
class DatasetMNIST(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, 0:1, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
class DatasetFashionMNIST(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, 0:1, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray
class DatasetCIFAR(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, :, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 3 channel version (e.g. r,g,b channels).
"""
assert len(cfg.image_statistics.mean) == 3
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1])
my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2])
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. is randomly horizontally flipped
3. is randomly color jitteres
4. on/off filteres
5. returned.
This is a 3 channel version (e.g. r,g,b channels).
"""
assert len(cfg.image_statistics.mean) == 3
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1])
my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2])
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor
class OnOffFilter(torch.nn.Module):
def __init__(self, p: float = 0.5) -> None:
super(OnOffFilter, self).__init__()
self.p: float = p
def forward(self, tensor: torch.Tensor) -> torch.Tensor:
assert tensor.shape[1] == 1
tensor_clone = 2.0 * (tensor - self.p)
temp_0: torch.Tensor = torch.where(
tensor_clone < 0.0,
-tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
temp_1: torch.Tensor = torch.where(
tensor_clone >= 0.0,
tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1)
return new_tensor
def __repr__(self) -> str:
return self.__class__.__name__ + "(p={0})".format(self.p)
if __name__ == "__main__":
pass

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

View file

@ -1,164 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
# %%
from dataclasses import dataclass, field
import numpy as np
import torch
import os
@dataclass
class Network:
"""Parameters of the network. The details about
its layers and the number of output neurons."""
number_of_output_neurons: int = field(default=0)
forward_kernel_size: list[list[int]] = field(default_factory=list)
forward_neuron_numbers: list[list[int]] = field(default_factory=list)
strides: list[list[int]] = field(default_factory=list)
dilation: list[list[int]] = field(default_factory=list)
padding: list[list[int]] = field(default_factory=list)
is_pooling_layer: list[bool] = field(default_factory=list)
w_trainable: list[bool] = field(default_factory=list)
eps_xy_trainable: list[bool] = field(default_factory=list)
eps_xy_mean: list[bool] = field(default_factory=list)
@dataclass
class LearningParameters:
"""Parameter required for training"""
loss_coeffs_mse: float = field(default=0.5)
loss_coeffs_kldiv: float = field(default=1.0)
learning_rate_gamma_w: float = field(default=-1.0)
learning_rate_gamma_eps_xy: float = field(default=-1.0)
learning_rate_threshold_w: float = field(default=0.00001)
learning_rate_threshold_eps_xy: float = field(default=0.00001)
learning_active: bool = field(default=True)
weight_noise_amplitude: float = field(default=0.01)
eps_xy_intitial: float = field(default=0.1)
test_every_x_learning_steps: int = field(default=50)
test_during_learning: bool = field(default=True)
lr_scheduler_factor: float = field(default=0.75)
lr_scheduler_patience: int = field(default=10)
optimizer_name: str = field(default="Adam")
lr_schedule_name: str = field(default="ReduceLROnPlateau")
number_of_batches_for_one_update: int = field(default=1)
alpha_number_of_iterations: int = field(default=0)
overload_path: str = field(default="./Previous")
@dataclass
class Augmentation:
"""Parameters used for data augmentation."""
crop_width_in_pixel: int = field(default=2)
flip_p: float = field(default=0.5)
jitter_brightness: float = field(default=0.5)
jitter_contrast: float = field(default=0.1)
jitter_saturation: float = field(default=0.1)
jitter_hue: float = field(default=0.15)
@dataclass
class ImageStatistics:
"""(Statistical) information about the input. i.e.
mean values and the x and y size of the input"""
mean: list[float] = field(default_factory=list)
the_size: list[int] = field(default_factory=list)
@dataclass
class Config:
"""Master config class."""
# Sub classes
network_structure: Network = field(default_factory=Network)
learning_parameters: LearningParameters = field(default_factory=LearningParameters)
augmentation: Augmentation = field(default_factory=Augmentation)
image_statistics: ImageStatistics = field(default_factory=ImageStatistics)
batch_size: int = field(default=500)
data_mode: str = field(default="")
learning_step: int = field(default=0)
learning_step_max: int = field(default=10000)
number_of_cpu_processes: int = field(default=-1)
number_of_spikes: int = field(default=0)
cooldown_after_number_of_spikes: int = field(default=0)
weight_path: str = field(default="./Weights/")
eps_xy_path: str = field(default="./EpsXY/")
data_path: str = field(default="./")
reduction_cooldown: float = field(default=25.0)
epsilon_0: float = field(default=1.0)
update_after_x_batch: float = field(default=1.0)
def __post_init__(self) -> None:
"""Post init determines the number of cores.
Creates the required directory and gives us an optimized
(for the amount of cores) batch size."""
number_of_cpu_processes_temp = os.cpu_count()
if self.number_of_cpu_processes < 1:
if number_of_cpu_processes_temp is None:
self.number_of_cpu_processes = 1
else:
self.number_of_cpu_processes = number_of_cpu_processes_temp
os.makedirs(self.weight_path, exist_ok=True)
os.makedirs(self.eps_xy_path, exist_ok=True)
os.makedirs(self.data_path, exist_ok=True)
self.batch_size = (
self.batch_size // self.number_of_cpu_processes
) * self.number_of_cpu_processes
self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes))
self.batch_size = int(self.batch_size)
def get_epsilon_t(self):
"""Generates the time series of the basic epsilon."""
np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32)
np_epsilon_t[
self.cooldown_after_number_of_spikes : self.number_of_spikes
] /= self.reduction_cooldown
return torch.tensor(np_epsilon_t)
def get_update_after_x_pattern(self):
"""Tells us after how many pattern we need to update the weights."""
return self.batch_size * self.update_after_x_batch

View file

@ -1 +0,0 @@
Performance reached (test data correct classifications): 89.82%

View file

@ -1,31 +0,0 @@
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import numpy as np
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
filename: str = "events.out.tfevents.1651328399.fedora.118340.0"
acc = event_accumulator.EventAccumulator(filename)
acc.Reload()
# What is available?
# available_scalar = acc.Tags()["scalars"]
# print("Available Scalars")
# print(available_scalar)
which_scalar: str = "Test Number Correct"
te = acc.Scalars(which_scalar)
temp: list = []
for te_item in te:
temp.append((te_item[1], te_item[2]))
temp_np = np.array(temp)
plt.semilogy(temp_np[:, 0], (1.0 - (temp_np[:, 1] / 10000)) * 100)
plt.xlabel("Epochs")
plt.ylabel("Error [%]")
plt.savefig("Error.png")
plt.show()

View file

@ -1,203 +0,0 @@
# %%
import torch
from Dataset import DatasetFashionMNIST
from Parameter import Config
import torchvision as tv # type: ignore
# Some parameters
cfg = Config()
input_number_of_channel: int = 1
input_dim_x: int = 24
input_dim_y: int = 24
number_of_output_channels_conv1: int = 32
number_of_output_channels_conv2: int = 64
number_of_output_channels_flatten1: int = 576
number_of_output_channels_full1: int = 10
kernel_size_conv1: tuple[int, int] = (5, 5)
kernel_size_pool1: tuple[int, int] = (2, 2)
kernel_size_conv2: tuple[int, int] = (5, 5)
kernel_size_pool2: tuple[int, int] = (2, 2)
stride_conv1: tuple[int, int] = (1, 1)
stride_pool1: tuple[int, int] = (2, 2)
stride_conv2: tuple[int, int] = (1, 1)
stride_pool2: tuple[int, int] = (2, 2)
padding_conv1: int = 0
padding_pool1: int = 0
padding_conv2: int = 0
padding_pool2: int = 0
network = torch.nn.Sequential(
torch.nn.Conv2d(
in_channels=input_number_of_channel,
out_channels=number_of_output_channels_conv1,
kernel_size=kernel_size_conv1,
stride=stride_conv1,
padding=padding_conv1,
),
torch.nn.ReLU(),
torch.nn.MaxPool2d(
kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1
),
torch.nn.Conv2d(
in_channels=number_of_output_channels_conv1,
out_channels=number_of_output_channels_conv2,
kernel_size=kernel_size_conv2,
stride=stride_conv2,
padding=padding_conv2,
),
torch.nn.ReLU(),
torch.nn.MaxPool2d(
kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2
),
torch.nn.Flatten(
start_dim=1,
),
torch.nn.Linear(
in_features=number_of_output_channels_flatten1,
out_features=number_of_output_channels_full1,
bias=True,
),
torch.nn.Softmax(dim=1),
)
# %%
path_pattern: str = "./DATA_FASHION_MNIST/"
path_label: str = "./DATA_FASHION_MNIST/"
dataset_train = DatasetFashionMNIST(
train=True, path_pattern=path_pattern, path_label=path_label
)
dataset_test = DatasetFashionMNIST(
train=False, path_pattern=path_pattern, path_label=path_label
)
cfg.image_statistics.mean = dataset_train.mean
# The basic size
cfg.image_statistics.the_size = [
dataset_train.pattern_storage.shape[2],
dataset_train.pattern_storage.shape[3],
]
# Minus the stuff we cut away in the pattern filter
cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel
cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel
batch_size_train: int = 100
batch_size_test: int = 100
train_data_load = torch.utils.data.DataLoader(
dataset_train, batch_size=batch_size_train, shuffle=True
)
test_data_load = torch.utils.data.DataLoader(
dataset_test, batch_size=batch_size_test, shuffle=False
)
transforms_test: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms_test = torch.jit.script(transforms_test)
transforms_train: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms_train = torch.jit.script(transforms_train)
# %%
# The optimizer
optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
# The LR Scheduler
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75)
# %%
number_of_test_pattern: int = dataset_test.__len__()
number_of_train_pattern: int = dataset_train.__len__()
number_of_epoch: int = 200
# %%
import time
from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()
# %%
loss_function = torch.nn.CrossEntropyLoss()
for epoch_id in range(0, number_of_epoch):
print(f"Epoch: {epoch_id}")
t_start: float = time.perf_counter()
train_loss: float = 0.0
train_correct: int = 0
train_number: int = 0
test_correct: int = 0
test_number: int = 0
# Switch the network into training mode
network.train()
# This runs in total for one epoch split up into mini-batches
for image, target in train_data_load:
# Clean the gradient
optimizer.zero_grad()
output = network(scripted_transforms_train(image))
loss = loss_function(output, target)
train_loss += loss.item()
train_correct += (output.argmax(dim=1) == target).sum().numpy()
train_number += target.shape[0]
# Calculate backprop
loss.backward()
# Update the parameter
optimizer.step()
# Update the learning rate
lr_scheduler.step(train_loss)
t_training: float = time.perf_counter()
# Switch the network into evalution mode
network.eval()
with torch.no_grad():
for image, target in test_data_load:
output = network(scripted_transforms_test(image))
test_correct += (output.argmax(dim=1) == target).sum().numpy()
test_number += target.shape[0]
t_testing = time.perf_counter()
perfomance_test_correct: float = 100.0 * test_correct / test_number
perfomance_train_correct: float = 100.0 * train_correct / train_number
tb.add_scalar("Train Loss", train_loss, epoch_id)
tb.add_scalar("Train Number Correct", train_correct, epoch_id)
tb.add_scalar("Test Number Correct", test_correct, epoch_id)
print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%")
print(f"Testing: Correct={perfomance_test_correct:.2f}%")
print(
f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec"
)
torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt")
print()
# %%
tb.close()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

View file

@ -1,164 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
# %%
from dataclasses import dataclass, field
import numpy as np
import torch
import os
@dataclass
class Network:
"""Parameters of the network. The details about
its layers and the number of output neurons."""
number_of_output_neurons: int = field(default=0)
forward_kernel_size: list[list[int]] = field(default_factory=list)
forward_neuron_numbers: list[list[int]] = field(default_factory=list)
strides: list[list[int]] = field(default_factory=list)
dilation: list[list[int]] = field(default_factory=list)
padding: list[list[int]] = field(default_factory=list)
is_pooling_layer: list[bool] = field(default_factory=list)
w_trainable: list[bool] = field(default_factory=list)
eps_xy_trainable: list[bool] = field(default_factory=list)
eps_xy_mean: list[bool] = field(default_factory=list)
@dataclass
class LearningParameters:
"""Parameter required for training"""
loss_coeffs_mse: float = field(default=0.5)
loss_coeffs_kldiv: float = field(default=1.0)
learning_rate_gamma_w: float = field(default=-1.0)
learning_rate_gamma_eps_xy: float = field(default=-1.0)
learning_rate_threshold_w: float = field(default=0.00001)
learning_rate_threshold_eps_xy: float = field(default=0.00001)
learning_active: bool = field(default=True)
weight_noise_amplitude: float = field(default=0.01)
eps_xy_intitial: float = field(default=0.1)
test_every_x_learning_steps: int = field(default=50)
test_during_learning: bool = field(default=True)
lr_scheduler_factor: float = field(default=0.75)
lr_scheduler_patience: int = field(default=10)
optimizer_name: str = field(default="Adam")
lr_schedule_name: str = field(default="ReduceLROnPlateau")
number_of_batches_for_one_update: int = field(default=1)
alpha_number_of_iterations: int = field(default=0)
overload_path: str = field(default="./Previous")
@dataclass
class Augmentation:
"""Parameters used for data augmentation."""
crop_width_in_pixel: int = field(default=2)
flip_p: float = field(default=0.5)
jitter_brightness: float = field(default=0.5)
jitter_contrast: float = field(default=0.1)
jitter_saturation: float = field(default=0.1)
jitter_hue: float = field(default=0.15)
@dataclass
class ImageStatistics:
"""(Statistical) information about the input. i.e.
mean values and the x and y size of the input"""
mean: list[float] = field(default_factory=list)
the_size: list[int] = field(default_factory=list)
@dataclass
class Config:
"""Master config class."""
# Sub classes
network_structure: Network = field(default_factory=Network)
learning_parameters: LearningParameters = field(default_factory=LearningParameters)
augmentation: Augmentation = field(default_factory=Augmentation)
image_statistics: ImageStatistics = field(default_factory=ImageStatistics)
batch_size: int = field(default=500)
data_mode: str = field(default="")
learning_step: int = field(default=0)
learning_step_max: int = field(default=10000)
number_of_cpu_processes: int = field(default=-1)
number_of_spikes: int = field(default=0)
cooldown_after_number_of_spikes: int = field(default=0)
weight_path: str = field(default="./Weights/")
eps_xy_path: str = field(default="./EpsXY/")
data_path: str = field(default="./")
reduction_cooldown: float = field(default=25.0)
epsilon_0: float = field(default=1.0)
update_after_x_batch: float = field(default=1.0)
def __post_init__(self) -> None:
"""Post init determines the number of cores.
Creates the required directory and gives us an optimized
(for the amount of cores) batch size."""
number_of_cpu_processes_temp = os.cpu_count()
if self.number_of_cpu_processes < 1:
if number_of_cpu_processes_temp is None:
self.number_of_cpu_processes = 1
else:
self.number_of_cpu_processes = number_of_cpu_processes_temp
os.makedirs(self.weight_path, exist_ok=True)
os.makedirs(self.eps_xy_path, exist_ok=True)
os.makedirs(self.data_path, exist_ok=True)
self.batch_size = (
self.batch_size // self.number_of_cpu_processes
) * self.number_of_cpu_processes
self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes))
self.batch_size = int(self.batch_size)
def get_epsilon_t(self):
"""Generates the time series of the basic epsilon."""
np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32)
np_epsilon_t[
self.cooldown_after_number_of_spikes : self.number_of_spikes
] /= self.reduction_cooldown
return torch.tensor(np_epsilon_t)
def get_update_after_x_pattern(self):
"""Tells us after how many pattern we need to update the weights."""
return self.batch_size * self.update_after_x_batch

View file

@ -1 +0,0 @@
Performance reached (test data correct classifications): 99.26%

View file

@ -1,31 +0,0 @@
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import numpy as np
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
filename: str = "events.out.tfevents.1651325827.fedora.115860.0"
acc = event_accumulator.EventAccumulator(filename)
acc.Reload()
# What is available?
# available_scalar = acc.Tags()["scalars"]
# print("Available Scalars")
# print(available_scalar)
which_scalar: str = "Test Number Correct"
te = acc.Scalars(which_scalar)
temp: list = []
for te_item in te:
temp.append((te_item[1], te_item[2]))
temp_np = np.array(temp)
plt.semilogy(temp_np[:, 0], (1.0 - (temp_np[:, 1] / 10000)) * 100)
plt.xlabel("Epochs")
plt.ylabel("Error [%]")
plt.savefig("Error.png")
plt.show()

View file

@ -1,180 +0,0 @@
# %%
import torch
from Dataset import DatasetMNIST
# Some parameters
input_number_of_channel: int = 1
input_dim_x: int = 24
input_dim_y: int = 24
number_of_output_channels_conv1: int = 32
number_of_output_channels_conv2: int = 64
number_of_output_channels_flatten1: int = 576
number_of_output_channels_full1: int = 10
kernel_size_conv1: tuple[int, int] = (5, 5)
kernel_size_pool1: tuple[int, int] = (2, 2)
kernel_size_conv2: tuple[int, int] = (5, 5)
kernel_size_pool2: tuple[int, int] = (2, 2)
stride_conv1: tuple[int, int] = (1, 1)
stride_pool1: tuple[int, int] = (2, 2)
stride_conv2: tuple[int, int] = (1, 1)
stride_pool2: tuple[int, int] = (2, 2)
padding_conv1: int = 0
padding_pool1: int = 0
padding_conv2: int = 0
padding_pool2: int = 0
network = torch.nn.Sequential(
torch.nn.Conv2d(
in_channels=input_number_of_channel,
out_channels=number_of_output_channels_conv1,
kernel_size=kernel_size_conv1,
stride=stride_conv1,
padding=padding_conv1,
),
torch.nn.ReLU(),
torch.nn.MaxPool2d(
kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1
),
torch.nn.Conv2d(
in_channels=number_of_output_channels_conv1,
out_channels=number_of_output_channels_conv2,
kernel_size=kernel_size_conv2,
stride=stride_conv2,
padding=padding_conv2,
),
torch.nn.ReLU(),
torch.nn.MaxPool2d(
kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2
),
torch.nn.Flatten(
start_dim=1,
),
torch.nn.Linear(
in_features=number_of_output_channels_flatten1,
out_features=number_of_output_channels_full1,
bias=True,
),
torch.nn.Softmax(dim=1),
)
# %%
import torchvision
test_processing_chain = torchvision.transforms.Compose(
transforms=[torchvision.transforms.CenterCrop((24, 24))],
)
train_processing_chain = torchvision.transforms.Compose(
transforms=[torchvision.transforms.RandomCrop((24, 24))],
)
path_pattern: str = "./DATA_MNIST/"
path_label: str = "./DATA_MNIST/"
dataset_train = DatasetMNIST(
train=True, path_pattern=path_pattern, path_label=path_label
)
dataset_test = DatasetMNIST(
train=False, path_pattern=path_pattern, path_label=path_label
)
batch_size_train: int = 100
batch_size_test: int = 100
train_data_load = torch.utils.data.DataLoader(
dataset_train, batch_size=batch_size_train, shuffle=True
)
test_data_load = torch.utils.data.DataLoader(
dataset_test, batch_size=batch_size_test, shuffle=False
)
# %%
# The optimizer
optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
# The LR Scheduler
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75)
# %%
number_of_test_pattern: int = dataset_test.__len__()
number_of_train_pattern: int = dataset_train.__len__()
number_of_epoch: int = 50
# %%
import time
from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()
# %%
loss_function = torch.nn.CrossEntropyLoss()
for epoch_id in range(0, number_of_epoch):
print(f"Epoch: {epoch_id}")
t_start: float = time.perf_counter()
train_loss: float = 0.0
train_correct: int = 0
train_number: int = 0
test_correct: int = 0
test_number: int = 0
# Switch the network into training mode
network.train()
# This runs in total for one epoch split up into mini-batches
for image, target in train_data_load:
# Clean the gradient
optimizer.zero_grad()
output = network(train_processing_chain(image))
loss = loss_function(output, target)
train_loss += loss.item()
train_correct += (output.argmax(dim=1) == target).sum().numpy()
train_number += target.shape[0]
# Calculate backprop
loss.backward()
# Update the parameter
optimizer.step()
# Update the learning rate
lr_scheduler.step(train_loss)
t_training: float = time.perf_counter()
# Switch the network into evalution mode
network.eval()
with torch.no_grad():
for image, target in test_data_load:
output = network(test_processing_chain(image))
test_correct += (output.argmax(dim=1) == target).sum().numpy()
test_number += target.shape[0]
t_testing = time.perf_counter()
perfomance_test_correct: float = 100.0 * test_correct / test_number
perfomance_train_correct: float = 100.0 * train_correct / train_number
tb.add_scalar("Train Loss", train_loss, epoch_id)
tb.add_scalar("Train Number Correct", train_correct, epoch_id)
tb.add_scalar("Test Number Correct", test_correct, epoch_id)
print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%")
print(f"Testing: Correct={perfomance_test_correct:.2f}%")
print(
f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec"
)
torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt")
print()
# %%
tb.close()

View file

@ -1,455 +0,0 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
from abc import ABC, abstractmethod
import torch
import numpy as np
import torchvision as tv # type: ignore
from Parameter import Config
class DatasetMaster(torch.utils.data.Dataset, ABC):
path_label: str
label_storage: np.ndarray
pattern_storage: np.ndarray
number_of_pattern: int
mean: list[float]
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__()
if train is True:
self.label_storage = np.load(path_label + "/TrainLabelStorage.npy")
else:
self.label_storage = np.load(path_label + "/TestLabelStorage.npy")
if train is True:
self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy")
else:
self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy")
self.number_of_pattern = self.label_storage.shape[0]
self.mean = []
def __len__(self) -> int:
return self.number_of_pattern
# Get one pattern at position index
@abstractmethod
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
pass
@abstractmethod
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
pass
@abstractmethod
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
pass
class DatasetMNIST(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, 0:1, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
if cfg.augmentation.use_on_off_filter is True:
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
else:
gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps
return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
if cfg.augmentation.use_on_off_filter is True:
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
else:
gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps
return gray
class DatasetFashionMNIST(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, 0:1, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
if cfg.augmentation.use_on_off_filter is True:
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
else:
gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps
return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. on/off filteres
3. returned.
This is a 1 channel version (e.g. one gray channel).
"""
assert len(cfg.image_statistics.mean) == 1
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
if cfg.augmentation.use_on_off_filter is True:
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
else:
gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps
return gray
class DatasetCIFAR(DatasetMaster):
"""Contstructor"""
# Initialize
def __init__(
self,
train: bool = False,
path_pattern: str = "./",
path_label: str = "./",
) -> None:
super().__init__(train, path_pattern, path_label)
self.pattern_storage = np.ascontiguousarray(
np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1)
)
self.pattern_storage /= np.max(self.pattern_storage)
mean = self.pattern_storage.mean(3).mean(2).mean(0)
self.mean = [*mean]
def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
image = self.pattern_storage[index, :, :, :]
target = int(self.label_storage[index])
return torch.tensor(image), target
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in
1. is center cropped
2. on/off filteres
3. returned.
This is a 3 channel version (e.g. r,g,b channels).
"""
assert len(cfg.image_statistics.mean) == 3
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.CenterCrop(size=cfg.image_statistics.the_size),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
if cfg.augmentation.use_on_off_filter is True:
my_on_off_filter_r: OnOffFilter = OnOffFilter(
p=cfg.image_statistics.mean[0]
)
my_on_off_filter_g: OnOffFilter = OnOffFilter(
p=cfg.image_statistics.mean[1]
)
my_on_off_filter_b: OnOffFilter = OnOffFilter(
p=cfg.image_statistics.mean[2]
)
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
else:
r = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps
g = pattern[:, 1:2, :, :] + torch.finfo(torch.float32).eps
b = pattern[:, 2:3, :, :] + torch.finfo(torch.float32).eps
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in
1. is cropped from a random position
2. is randomly horizontally flipped
3. is randomly color jitteres
4. on/off filteres
5. returned.
This is a 3 channel version (e.g. r,g,b channels).
"""
assert len(cfg.image_statistics.mean) == 3
assert len(cfg.image_statistics.the_size) == 2
assert cfg.image_statistics.the_size[0] > 0
assert cfg.image_statistics.the_size[1] > 0
# Transformation chain
my_transforms: torch.nn.Sequential = torch.nn.Sequential(
tv.transforms.RandomCrop(size=cfg.image_statistics.the_size),
tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p),
tv.transforms.ColorJitter(
brightness=cfg.augmentation.jitter_brightness,
contrast=cfg.augmentation.jitter_contrast,
saturation=cfg.augmentation.jitter_saturation,
hue=cfg.augmentation.jitter_hue,
),
)
scripted_transforms = torch.jit.script(my_transforms)
# Preprocess the input data
pattern = scripted_transforms(pattern)
# => On/Off
if cfg.augmentation.use_on_off_filter is True:
my_on_off_filter_r: OnOffFilter = OnOffFilter(
p=cfg.image_statistics.mean[0]
)
my_on_off_filter_g: OnOffFilter = OnOffFilter(
p=cfg.image_statistics.mean[1]
)
my_on_off_filter_b: OnOffFilter = OnOffFilter(
p=cfg.image_statistics.mean[2]
)
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
else:
r = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps
g = pattern[:, 1:2, :, :] + torch.finfo(torch.float32).eps
b = pattern[:, 2:3, :, :] + torch.finfo(torch.float32).eps
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor
class OnOffFilter(torch.nn.Module):
def __init__(self, p: float = 0.5) -> None:
super(OnOffFilter, self).__init__()
self.p: float = p
def forward(self, tensor: torch.Tensor) -> torch.Tensor:
assert tensor.shape[1] == 1
tensor_clone = 2.0 * (tensor - self.p)
temp_0: torch.Tensor = torch.where(
tensor_clone < 0.0,
-tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
temp_1: torch.Tensor = torch.where(
tensor_clone >= 0.0,
tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1)
return new_tensor
def __repr__(self) -> str:
return self.__class__.__name__ + "(p={0})".format(self.p)
if __name__ == "__main__":
pass

View file

@ -1,18 +0,0 @@
#
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
#
"""HDynamicCNNManyIP Module"""
from __future__ import annotations
import PyHDynamicCNNManyIP
import typing
__all__ = [
"HDynamicCNNManyIP"
]
class HDynamicCNNManyIP():
def __init__(self) -> None: ...
def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int) -> bool: ...
pass

View file

@ -21,8 +21,8 @@ It was programmed with 3.10.4. And I used some 3.10 Python expression. Thus you
# C++ # C++
You need to modify the Makefile in the C++ directory to your Python installation. You need to modify the Makefile in the C++ directory to your Python installation.
In addition yor Python installation needs the PyBind11 package installed. You might want to perform a
In addition your Python installation needs the PyBind11 package installed. You might want to perform a
pip install pybind11 pip install pybind11
The Makefile uses clang as a compiler. If you want something else then you need to change the Makefile. The Makefile uses clang as a compiler. If you want something else then you need to change the Makefile.
For CUDA I used version 12.0.

1291
SbS.py

File diff suppressed because it is too large Load diff

10
clean.sh Normal file
View file

@ -0,0 +1,10 @@
read -p "Are you sure? " -n 1 -r
echo # (optional) move to a new line
if [[ ! $REPLY =~ ^[Yy]$ ]]
then
exit 1
fi
rm -rf Log
rm -rf Parameters
rm *.txt

4
dataset.json Normal file
View file

@ -0,0 +1,4 @@
{
"data_path": "./DATA_FASHION_MNIST/",
"data_mode": "MNIST_FASHION"
}

24
def.json Normal file
View file

@ -0,0 +1,24 @@
{
"epoch_id_max": 200,
"number_of_spikes": [
1600
],
"batch_size": 24,
"forgetting_offset": 0.0,
"stage_id": 0,
"simulation_id": 0,
"learning_parameters": {
"learning_rate_threshold_w": 0.001,
"eps_xy_intitial": 1.0,
"number_of_batches_for_one_update": 20,
"learning_rate_gamma_w": 0.001,
"lr_scheduler_patience_w": 50,
"adapt_learning_rate_after_minibatch": true,
"w_trainable": [
true
]
},
"augmentation": {},
"image_statistics": {},
"approximation_setting": {}
}

24
def_sbs_L0.json Normal file
View file

@ -0,0 +1,24 @@
{
"epoch_id_max": 200,
"number_of_spikes": [
1600
],
"batch_size": 24,
"forgetting_offset": 0.0,
"stage_id": 0,
"simulation_id": 0,
"learning_parameters": {
"learning_rate_threshold_w": 0.001,
"eps_xy_intitial": 1.0,
"number_of_batches_for_one_update": 20,
"learning_rate_gamma_w": 1.0,
"lr_scheduler_patience_w": 50,
"adapt_learning_rate_after_minibatch": true,
"w_trainable": [
true
]
},
"augmentation": {},
"image_statistics": {},
"approximation_setting": {}
}

47
get_perf.py Normal file
View file

@ -0,0 +1,47 @@
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
import numpy as np
import json
from jsmin import jsmin
import glob
# -------------------------------
filename:str = "def.json"
with open(filename) as json_file:
minified = jsmin(json_file.read())
data = json.loads(minified)
number_of_spikes = data["number_of_spikes"]
# -------------------------------
path_runs: str = "./Log/*"
temp = glob.glob(path_runs)
assert len(temp) == 1
path = temp[0]
acc = event_accumulator.EventAccumulator(path)
acc.Reload()
available_scalar = acc.Tags()["scalars"]
available_histograms = acc.Tags()["histograms"]
which_scalar = "Test Error"
te = acc.Scalars(which_scalar)
temp = []
for te_item in te:
temp.append((te_item[1], te_item[2]))
temp = np.array(temp)
print(temp)
np.save(f"test_error_{number_of_spikes}.npy", temp)

View file

@ -1,74 +1,30 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
# %% # %%
import os import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import numpy as np
import sys import sys
import torch import torch
import time
import dataconf import dataconf
import logging import logging
from datetime import datetime from datetime import datetime
import glob
from Dataset import ( from network.Parameter import Config
DatasetMaster,
DatasetCIFAR, from network.build_network import build_network
DatasetMNIST, from network.build_optimizer import build_optimizer
DatasetFashionMNIST, from network.build_lr_scheduler import build_lr_scheduler
) from network.build_datasets import build_datasets
from Parameter import Config from network.load_previous_weights import load_previous_weights
from SbS import SbS
from network.loop_train_test import loop_test, loop_train, run_lr_scheduler
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
try:
from SbSLRScheduler import SbSLRScheduler
sbs_lr_scheduler: bool = True # ######################################################################
except Exception: # We want to log what is going on into a file and screen
sbs_lr_scheduler = False # ######################################################################
tb = SummaryWriter()
torch.set_default_dtype(torch.float32)
#######################################################################
# We want to log what is going on into a file and screen #
#######################################################################
now = datetime.now() now = datetime.now()
dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S") dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S")
@ -80,9 +36,9 @@ logging.basicConfig(
) )
logging.getLogger().addHandler(logging.StreamHandler()) logging.getLogger().addHandler(logging.StreamHandler())
####################################################################### # ######################################################################
# Load the config data from the json file # # Load the config data from the json file
####################################################################### # ######################################################################
if len(sys.argv) < 2: if len(sys.argv) < 2:
raise Exception("Argument: Config file name is missing") raise Exception("Argument: Config file name is missing")
@ -92,570 +48,154 @@ filename: str = sys.argv[1]
if os.path.exists(filename) is False: if os.path.exists(filename) is False:
raise Exception(f"Config file not found! {filename}") raise Exception(f"Config file not found! {filename}")
cfg = dataconf.file(filename, Config) if os.path.exists("network.json") is False:
raise Exception("Config file not found! network.json")
if os.path.exists("dataset.json") is False:
raise Exception("Config file not found! dataset.json")
cfg = dataconf.multi.file("network.json").file("dataset.json").file(filename).on(Config)
logging.info(cfg)
logging.info(f"Using configuration file: {filename}") logging.info(f"Using configuration file: {filename}")
logging.info(f"Number of spikes: {cfg.number_of_spikes}")
logging.info(f"Cooldown after spikes: {cfg.cooldown_after_number_of_spikes}")
logging.info(f"Reduction cooldown: {cfg.reduction_cooldown}")
logging.info("")
logging.info(f"Epsilon 0: {cfg.epsilon_0}")
logging.info(f"Batch size: {cfg.batch_size}")
logging.info(f"Data mode: {cfg.data_mode}")
logging.info("")
logging.info("*** Config loaded.")
logging.info("")
####################################################################### tb = SummaryWriter(log_dir=cfg.log_path)
# Prepare the test and training data #
#######################################################################
# Load the input data # ###########################################
the_dataset_train: DatasetMaster # GPU Yes / NO ?
the_dataset_test: DatasetMaster # ###########################################
if cfg.data_mode == "CIFAR10": default_dtype = torch.float32
the_dataset_train = DatasetCIFAR( torch.set_default_dtype(default_dtype)
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path torch_device: str = "cuda:0" if torch.cuda.is_available() else "cpu"
) use_gpu: bool = True if torch.cuda.is_available() else False
the_dataset_test = DatasetCIFAR( print(f"Using {torch_device} device")
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path device = torch.device(torch_device)
)
elif cfg.data_mode == "MNIST":
the_dataset_train = DatasetMNIST(
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
)
the_dataset_test = DatasetMNIST(
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
)
elif cfg.data_mode == "MNIST_FASHION":
the_dataset_train = DatasetFashionMNIST(
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
)
the_dataset_test = DatasetFashionMNIST(
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
)
else:
raise Exception("data_mode unknown")
if len(cfg.image_statistics.mean) == 0: # ######################################################################
cfg.image_statistics.mean = the_dataset_train.mean # Prepare the test and training data
# ######################################################################
# The basic size the_dataset_train, the_dataset_test, my_loader_test, my_loader_train = build_datasets(
cfg.image_statistics.the_size = [ cfg
the_dataset_train.pattern_storage.shape[2],
the_dataset_train.pattern_storage.shape[3],
]
# Minus the stuff we cut away in the pattern filter
cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel
cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel
my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
the_dataset_test, batch_size=cfg.batch_size, shuffle=False
)
my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
the_dataset_train, batch_size=cfg.batch_size, shuffle=True
) )
logging.info("*** Data loaded.") logging.info("*** Data loaded.")
####################################################################### # ######################################################################
# Build the network # # Build the network, Optimizer, and LR Scheduler #
####################################################################### # ######################################################################
wf: list[np.ndarray] = [] network = build_network(
eps_xy: list[np.ndarray] = [] cfg=cfg, device=device, default_dtype=default_dtype, logging=logging
network = torch.nn.Sequential() )
for id in range(0, len(cfg.network_structure.is_pooling_layer)): logging.info("")
if id == 0:
input_size: list[int] = cfg.image_statistics.the_size
else:
input_size = network[id - 1].output_size.tolist()
network.append( optimizer = build_optimizer(network=network, cfg=cfg, logging=logging)
SbS(
number_of_input_neurons=cfg.network_structure.forward_neuron_numbers[id][0],
number_of_neurons=cfg.network_structure.forward_neuron_numbers[id][1],
input_size=input_size,
forward_kernel_size=cfg.network_structure.forward_kernel_size[id],
number_of_spikes=cfg.number_of_spikes,
epsilon_t=cfg.get_epsilon_t(),
epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial,
epsilon_0=cfg.epsilon_0,
weight_noise_amplitude=cfg.learning_parameters.weight_noise_amplitude,
is_pooling_layer=cfg.network_structure.is_pooling_layer[id],
strides=cfg.network_structure.strides[id],
dilation=cfg.network_structure.dilation[id],
padding=cfg.network_structure.padding[id],
alpha_number_of_iterations=cfg.learning_parameters.alpha_number_of_iterations,
number_of_cpu_processes=cfg.number_of_cpu_processes,
)
)
eps_xy.append(network[id].epsilon_xy.detach().clone().numpy()) lr_scheduler = build_lr_scheduler(optimizer=optimizer, cfg=cfg, logging=logging)
wf.append(network[id].weights.detach().clone().numpy())
logging.info("*** Network generated.") logging.info("*** Network generated.")
for id in range(0, len(network)): load_previous_weights(
# Load previous weights and epsilon xy network=network,
if cfg.learning_step > 0: overload_path=cfg.learning_parameters.overload_path,
filename = ( logging=logging,
cfg.weight_path device=device,
+ "/Weight_L" default_dtype=default_dtype,
+ str(id) )
+ "_S"
+ str(cfg.learning_step)
+ ".npy"
)
if os.path.exists(filename) is True:
network[id].weights = torch.tensor(
np.load(filename),
dtype=torch.float32,
)
wf[id] = np.load(filename)
filename = (
cfg.eps_xy_path
+ "/EpsXY_L"
+ str(id)
+ "_S"
+ str(cfg.learning_step)
+ ".npy"
)
if os.path.exists(filename) is True:
network[id].epsilon_xy = torch.tensor(
np.load(filename),
dtype=torch.float32,
)
eps_xy[id] = np.load(filename)
for id in range(0, len(network)):
# Are there weights that overwrite the initial weights?
file_to_load = glob.glob(
cfg.learning_parameters.overload_path + "/Weight_L" + str(id) + "*.npy"
)
if len(file_to_load) > 1:
raise Exception(
f"Too many previous weights files {cfg.learning_parameters.overload_path}/Weight_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id].weights = torch.tensor(
np.load(file_to_load[0]),
dtype=torch.float32,
)
wf[id] = np.load(file_to_load[0])
logging.info(f"File used: {file_to_load[0]}")
# Are there epsinlon xy files that overwrite the initial epsilon xy?
file_to_load = glob.glob(
cfg.learning_parameters.overload_path + "/EpsXY_L" + str(id) + "*.npy"
)
if len(file_to_load) > 1:
raise Exception(
f"Too many previous epsilon xy files {cfg.learning_parameters.overload_path}/EpsXY_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id].epsilon_xy = torch.tensor(
np.load(file_to_load[0]),
dtype=torch.float32,
)
eps_xy[id] = np.load(file_to_load[0])
logging.info(f"File used: {file_to_load[0]}")
#######################################################################
# Optimizer and LR Scheduler #
#######################################################################
# I keep weights and epsilon xy seperate to
# set the initial learning rate independently
parameter_list_weights: list = []
parameter_list_epsilon_xy: list = []
for id in range(0, len(network)):
parameter_list_weights.append(network[id]._weights)
parameter_list_epsilon_xy.append(network[id]._epsilon_xy)
if cfg.learning_parameters.optimizer_name == "Adam":
logging.info("Using optimizer: Adam")
if cfg.learning_parameters.learning_rate_gamma_w > 0:
optimizer_wf: torch.optim.Optimizer = torch.optim.Adam(
parameter_list_weights,
lr=cfg.learning_parameters.learning_rate_gamma_w,
)
else:
optimizer_wf = torch.optim.Adam(
parameter_list_weights,
)
if cfg.learning_parameters.learning_rate_gamma_eps_xy > 0:
optimizer_eps: torch.optim.Optimizer = torch.optim.Adam(
parameter_list_epsilon_xy,
lr=cfg.learning_parameters.learning_rate_gamma_eps_xy,
)
else:
optimizer_eps = torch.optim.Adam(
parameter_list_epsilon_xy,
)
else:
raise Exception("Optimizer not implemented")
do_lr_scheduler_step: bool = True
if cfg.learning_parameters.lr_schedule_name == "None":
logging.info("Using lr scheduler: None")
do_lr_scheduler_step = False
elif cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau":
logging.info("Using lr scheduler: ReduceLROnPlateau")
assert cfg.learning_parameters.lr_scheduler_factor_w > 0
assert cfg.learning_parameters.lr_scheduler_factor_eps_xy > 0
assert cfg.learning_parameters.lr_scheduler_patience_w > 0
assert cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0
lr_scheduler_wf = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer_wf,
factor=cfg.learning_parameters.lr_scheduler_factor_w,
patience=cfg.learning_parameters.lr_scheduler_patience_w,
)
lr_scheduler_eps = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer_eps,
factor=cfg.learning_parameters.lr_scheduler_factor_eps_xy,
patience=cfg.learning_parameters.lr_scheduler_patience_eps_xy,
)
elif cfg.learning_parameters.lr_schedule_name == "SbSLRScheduler":
logging.info("Using lr scheduler: SbSLRScheduler")
assert cfg.learning_parameters.lr_scheduler_factor_w > 0
assert cfg.learning_parameters.lr_scheduler_factor_eps_xy > 0
assert cfg.learning_parameters.lr_scheduler_patience_w > 0
assert cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0
if sbs_lr_scheduler is False:
raise Exception("lr_scheduler: SbSLRScheduler.py missing")
lr_scheduler_wf = SbSLRScheduler(
optimizer_wf,
factor=cfg.learning_parameters.lr_scheduler_factor_w,
patience=cfg.learning_parameters.lr_scheduler_patience_w,
tau=cfg.learning_parameters.lr_scheduler_tau_w,
)
lr_scheduler_eps = SbSLRScheduler(
optimizer_eps,
factor=cfg.learning_parameters.lr_scheduler_factor_eps_xy,
patience=cfg.learning_parameters.lr_scheduler_patience_eps_xy,
tau=cfg.learning_parameters.lr_scheduler_tau_eps_xy,
)
else:
raise Exception("lr_scheduler not implemented")
logging.info("*** Optimizer prepared.")
#######################################################################
# Some variable declarations #
#######################################################################
test_correct: int = 0
test_all: int = 0
test_complete: int = the_dataset_test.__len__()
train_correct: int = 0
train_all: int = 0
train_complete: int = the_dataset_train.__len__()
train_number_of_processed_pattern: int = 0
train_loss: np.ndarray = np.zeros((1), dtype=np.float32)
last_test_performance: float = -1.0
logging.info("") logging.info("")
last_test_performance: float = -1.0
with torch.no_grad(): with torch.no_grad():
if cfg.learning_parameters.learning_active is True: if cfg.learning_parameters.learning_active is True:
while True: while cfg.epoch_id < cfg.epoch_id_max:
############################################### # ##############################################
# Run a training data batch # # Run a training data epoch
############################################### # ##############################################
network.train()
for h_x, h_x_labels in my_loader_train:
time_0: float = time.perf_counter()
if train_number_of_processed_pattern == 0:
# Reset the gradient of the torch optimizers
optimizer_wf.zero_grad()
optimizer_eps.zero_grad()
with torch.enable_grad():
h_collection = []
h_collection.append(
the_dataset_train.pattern_filter_train(h_x, cfg).type(
dtype=torch.float32
)
)
for id in range(0, len(network)):
h_collection.append(network[id](h_collection[-1]))
# Convert label into one hot
target_one_hot: torch.Tensor = torch.zeros(
( (
h_x_labels.shape[0], my_loss_for_batch,
int(cfg.network_structure.number_of_output_neurons), performance_for_batch,
full_loss,
full_correct,
) = loop_train(
cfg=cfg,
network=network,
my_loader_train=my_loader_train,
the_dataset_train=the_dataset_train,
optimizer=optimizer,
device=device,
default_dtype=default_dtype,
logging=logging,
tb=tb,
adapt_learning_rate=cfg.learning_parameters.adapt_learning_rate_after_minibatch,
lr_scheduler=lr_scheduler,
last_test_performance=last_test_performance,
) )
)
target_one_hot.scatter_(
1, h_x_labels.unsqueeze(1), torch.ones((h_x_labels.shape[0], 1))
)
target_one_hot = (
target_one_hot.unsqueeze(2)
.unsqueeze(2)
.type(dtype=torch.float32)
)
h_y1 = torch.log(h_collection[-1] + 1e-20)
my_loss: torch.Tensor = (
(
torch.nn.functional.mse_loss(
h_collection[-1],
target_one_hot,
reduction="none",
)
* cfg.learning_parameters.loss_coeffs_mse
+ torch.nn.functional.kl_div(
h_y1, target_one_hot + 1e-20, reduction="none"
)
* cfg.learning_parameters.loss_coeffs_kldiv
)
/ (
cfg.learning_parameters.loss_coeffs_kldiv
+ cfg.learning_parameters.loss_coeffs_mse
)
).mean()
time_1: float = time.perf_counter()
my_loss.backward()
my_loss_float = my_loss.item()
time_2: float = time.perf_counter()
train_correct += (
(h_collection[-1].argmax(dim=1).squeeze() == h_x_labels)
.sum()
.numpy()
)
train_all += h_collection[-1].shape[0]
performance: float = 100.0 * train_correct / train_all
time_measure_a: float = time_1 - time_0
logging.info(
(
f"{cfg.learning_step:^6} Training \t{train_all^6} pattern "
f"with {performance/100.0:^6.2%} "
f"\t\tForward time: \t{time_measure_a:^6.2f}sec"
)
)
train_loss[0] += my_loss_float
train_number_of_processed_pattern += h_collection[-1].shape[0]
time_measure_b: float = time_2 - time_1
logging.info(
(
f"\t\t\tLoss: {train_loss[0]/train_number_of_processed_pattern:^15.3e} "
f"\t\t\tBackward time: \t{time_measure_b:^6.2f}sec "
)
)
if (
train_number_of_processed_pattern
>= cfg.get_update_after_x_pattern()
):
my_loss_for_batch: float = (
train_loss[0] / train_number_of_processed_pattern
)
optimizer_wf.step()
optimizer_eps.step()
for id in range(0, len(network)):
if cfg.network_structure.w_trainable[id] is True:
network[id].norm_weights()
network[id].threshold_weights(
cfg.learning_parameters.learning_rate_threshold_w
)
network[id].norm_weights()
else:
network[id].weights = torch.tensor(
wf[id], dtype=torch.float32
)
if cfg.network_structure.eps_xy_trainable[id] is True:
network[id].threshold_epsilon_xy(
cfg.learning_parameters.learning_rate_threshold_eps_xy
)
if cfg.network_structure.eps_xy_mean[id] is True:
network[id].mean_epsilon_xy()
else:
network[id].epsilon_xy = torch.tensor(
eps_xy[id], dtype=torch.float32
)
if cfg.network_structure.w_trainable[id] is True:
# Save the new values
np.save(
cfg.weight_path
+ "/Weight_L"
+ str(id)
+ "_S"
+ str(cfg.learning_step)
+ ".npy",
network[id].weights.detach().numpy(),
)
try:
tb.add_histogram(
"Weights " + str(id),
network[id].weights,
cfg.learning_step,
)
except ValueError:
pass
if cfg.network_structure.eps_xy_trainable[id] is True:
np.save(
cfg.eps_xy_path
+ "/EpsXY_L"
+ str(id)
+ "_S"
+ str(cfg.learning_step)
+ ".npy",
network[id].epsilon_xy.detach().numpy(),
)
try:
tb.add_histogram(
"Epsilon XY " + str(id),
network[id].epsilon_xy.detach().numpy(),
cfg.learning_step,
)
except ValueError:
pass
# Let the torch learning rate scheduler update the # Let the torch learning rate scheduler update the
# learning rates of the optimiers # learning rates of the optimiers
if do_lr_scheduler_step is True: if cfg.learning_parameters.adapt_learning_rate_after_minibatch is False:
if cfg.learning_parameters.lr_scheduler_use_performance is True: run_lr_scheduler(
lr_scheduler_wf.step(100.0 - performance) cfg=cfg,
lr_scheduler=lr_scheduler,
optimizer=optimizer,
performance_for_batch=performance_for_batch,
my_loss_for_batch=my_loss_for_batch,
tb=tb,
logging=logging,
)
# ##############################################
# Run test data
# ##############################################
network.eval()
last_test_performance = loop_test(
epoch_id=cfg.epoch_id,
cfg=cfg,
network=network,
my_loader_test=my_loader_test,
the_dataset_test=the_dataset_test,
device=device,
default_dtype=default_dtype,
logging=logging,
tb=tb,
)
# Next epoch
cfg.epoch_id += 1
else: else:
lr_scheduler_wf.step(my_loss_for_batch) # ##############################################
# Run test data
if do_lr_scheduler_step is True: # ##############################################
if cfg.learning_parameters.lr_scheduler_use_performance is True: network.eval()
lr_scheduler_eps.step(100.0 - performance) last_test_performance = loop_test(
else: epoch_id=cfg.epoch_id,
lr_scheduler_eps.step(my_loss_for_batch) cfg=cfg,
network=network,
tb.add_scalar("Train Error", 100.0 - performance, cfg.learning_step) my_loader_test=my_loader_test,
tb.add_scalar("Train Loss", my_loss_for_batch, cfg.learning_step) the_dataset_test=the_dataset_test,
tb.add_scalar( device=device,
"Learning Rate Scale WF", default_dtype=default_dtype,
optimizer_wf.param_groups[-1]["lr"], logging=logging,
cfg.learning_step, tb=tb,
)
tb.add_scalar(
"Learning Rate Scale Eps XY ",
optimizer_eps.param_groups[-1]["lr"],
cfg.learning_step,
)
logging.info(
f"\t\t\tLearning rate: weights:{optimizer_wf.param_groups[-1]['lr']:^15.3e} \t epsilon xy:{optimizer_eps.param_groups[-1]['lr']:^15.3e}"
)
logging.info("\t\t\t*** Updating the weights ***")
cfg.learning_step += 1
train_loss = np.zeros((1), dtype=np.float32)
train_correct = 0
train_all = 0
performance = 0
train_number_of_processed_pattern = 0
tb.flush()
test_correct = 0
test_all = 0
if last_test_performance < 0:
logging.info("")
else:
logging.info(
f"\t\t\tLast test performance: {last_test_performance/100.0:^6.2%}"
)
logging.info("")
###############################################
# Run a test data performance measurement #
###############################################
if (
(
(
(
cfg.learning_step
% cfg.learning_parameters.test_every_x_learning_steps
)
== 0
)
or (cfg.learning_step == cfg.learning_step_max)
)
and (cfg.learning_parameters.test_during_learning is True)
and (cfg.learning_step > 0)
):
logging.info("")
logging.info("Testing:")
for h_x, h_x_labels in my_loader_test:
time_0 = time.perf_counter()
h_h: torch.Tensor = network(
the_dataset_test.pattern_filter_test(h_x, cfg).type(
dtype=torch.float32
)
) )
test_correct += (
(h_h.argmax(dim=1).squeeze() == h_x_labels)
.sum()
.numpy()
)
test_all += h_h.shape[0]
performance = 100.0 * test_correct / test_all
time_1 = time.perf_counter()
time_measure_a = time_1 - time_0
logging.info(
(
f"\t\t{test_all} of {test_complete}"
f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec"
)
)
logging.info("")
last_test_performance = performance
tb.add_scalar(
"Test Error", 100.0 - performance, cfg.learning_step
)
tb.flush()
if cfg.learning_step == cfg.learning_step_max:
tb.close() tb.close()
exit(1)
# %% # %%

5
make_new_previous.sh Normal file
View file

@ -0,0 +1,5 @@
MAX_NUMBER=199
MyPATH="Previous++"
mkdir $MyPATH
cp Parameters/*_S$MAX_NUMBER.npy $MyPATH

91
network.json Normal file
View file

@ -0,0 +1,91 @@
{
"network_structure": {
"number_of_output_neurons": 10,
"layer_type": [
"SbS",
"MAX POOLING",
"SbS",
"MAX POOLING",
"SbS",
"SbS"
],
"strides": [
[
1,
1
], // "SbS"
[
2,
2
], // POOLING
[
1,
1
], // "SbS"
[
2,
2
], // POOLING
[
1,
1
], // "SbS"
[
1,
1
] // "SbS"
],
"forward_neuron_numbers": [
[
1,
32
], // "SbS"
[
32,
32
], // POOLING
[
32,
64
], // "SbS"
[
64,
64
], // POOLING
[
64,
96
], // "SbS"
[
96,
10
] // "SbS"
],
"forward_kernel_size": [
[
5,
5
], // "SbS"
[
2,
2
], // POOLING
[
5,
5
], // "SbS"
[
2,
2
], // POOLING
[
3,
3
], // "SbS"
[
1,
1
] // "SbS"
]
}
}

156
network/Adam.py Normal file
View file

@ -0,0 +1,156 @@
import torch
import math
class Adam(torch.optim.Optimizer):
sbs_setting: list[bool]
lr: float
beta1: float
beta2: float
eps: float
maximize: bool
def __init__(
self,
params,
sbs_setting: list[bool],
lr: float = 1e-3,
beta1: float = 0.9,
beta2: float = 0.999,
eps: float = 1e-8,
maximize: bool = False,
) -> None:
assert lr > 0.0
assert eps > 0.0
assert beta1 > 0.0
assert beta1 < 1.0
assert beta2 > 0.0
assert beta2 < 1.0
assert len(sbs_setting) == len(params)
self.sbs_setting = sbs_setting
self.params = params
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.eps = eps
self.maximize = maximize
defaults = dict(
lr=lr,
beta1=beta1,
beta2=beta2,
eps=eps,
maximize=maximize,
)
super().__init__(params, defaults)
def step(self):
params_with_grad = []
grads = []
exp_avgs = []
exp_avg_sqs = []
state_steps = []
sbs_setting = []
assert len(self.param_groups) == 1
for id, p in enumerate(self.params):
if p.grad is not None:
params_with_grad.append(p)
grads.append(p.grad)
sbs_setting.append(self.sbs_setting[id])
state = self.state[p]
# Lazy state initialization
if len(state) == 0:
state["step"] = torch.tensor(0.0)
# Exponential moving average of gradient values
state["exp_avg"] = torch.zeros_like(
p, memory_format=torch.preserve_format
)
# Exponential moving average of squared gradient values
state["exp_avg_sq"] = torch.zeros_like(
p, memory_format=torch.preserve_format
)
exp_avgs.append(state["exp_avg"])
exp_avg_sqs.append(state["exp_avg_sq"])
state_steps.append(state["step"])
self.adam(
params_with_grad,
grads,
sbs_setting,
exp_avgs,
exp_avg_sqs,
state_steps,
beta1=self.beta1,
beta2=self.beta2,
lr=self.param_groups[0]["lr"],
eps=self.eps,
maximize=self.maximize,
)
def adam(
self,
params: list[torch.Tensor],
grads: list[torch.Tensor],
sbs_setting: list[bool],
exp_avgs: list[torch.Tensor],
exp_avg_sqs: list[torch.Tensor],
state_steps: list[torch.Tensor],
beta1: float,
beta2: float,
lr: float,
eps: float,
maximize: bool,
) -> None:
with torch.no_grad():
for i, param in enumerate(params):
if maximize is False:
grad = grads[i]
else:
grad = -grads[i]
exp_avg = exp_avgs[i]
exp_avg_sq = exp_avg_sqs[i]
step_t = state_steps[i]
# increase step
step_t += 1
# Decay the first and second moment running average coefficient
exp_avg *= beta1
exp_avg += (1.0 - beta1) * grad
exp_avg_sq *= beta2
exp_avg_sq += (1.0 - beta2) * grad**2
step_size: float = lr / (1.0 - beta1 ** float(step_t))
denom = (
exp_avg_sq.sqrt() / math.sqrt(1.0 - beta2 ** float(step_t))
) + eps
if sbs_setting[i] is False:
param -= step_size * (exp_avg / denom)
else:
delta = torch.exp(-step_size * (exp_avg / denom))
print(
f"{float(delta.min()) - 1.0:.4e} {float(delta.max()) - 1.0:.4e} {lr:.4e}"
)
param *= delta

View file

@ -0,0 +1,714 @@
#include "HDynamicCNNManyIP.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
HDynamicCNNManyIP::HDynamicCNNManyIP()
{
};
HDynamicCNNManyIP::~HDynamicCNNManyIP()
{
};
bool HDynamicCNNManyIP::update_entrypoint(
int64_t h_pointer_addr,
int64_t h_dim_0,
int64_t h_dim_1,
int64_t h_dim_2,
int64_t h_dim_3,
int64_t epsilon_xy_pointer_addr,
int64_t epsilon_xy_dim_0,
int64_t epsilon_xy_dim_1,
int64_t epsilon_xy_dim_2,
int64_t epsilon_t_pointer_addr,
int64_t epsilon_t_dim_0,
int64_t weights_pointer_addr,
int64_t weights_dim_0,
int64_t weights_dim_1,
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t init_vector_pointer_addr,
int64_t init_vector_dim_0,
int64_t number_of_processes,
float forgetting_offset,
int64_t gpu_tuning_factor)
{
size_t number_of_pattern = input_dim_0;
size_t h_dim = init_vector_dim_0;
float* h_init_ptr = (float*)init_vector_pointer_addr;
assert((h_init_ptr != nullptr));
assert((h_dim > 0));
float* h_pointer = (float*)h_pointer_addr;
assert((h_pointer != nullptr));
assert((h_dim_0 > 0));
assert((h_dim_1 > 0));
assert((h_dim_2 > 0));
assert((h_dim_3 > 0));
size_t h_dim_c0 = h_dim_1 * h_dim_2 * h_dim_3;
size_t h_dim_c1 = h_dim_2 * h_dim_3;
size_t h_dim_c2 = h_dim_3;
float* epsilon_xy_pointer = (float*)epsilon_xy_pointer_addr;
assert((epsilon_xy_pointer != nullptr));
assert((epsilon_xy_dim_0 > 0));
assert((epsilon_xy_dim_1 > 0));
size_t epsilon_xy_dim_c0 = epsilon_xy_dim_2 * epsilon_xy_dim_1;
size_t epsilon_xy_dim_c1 = epsilon_xy_dim_2;
float* epsilon_t_pointer = (float*)epsilon_t_pointer_addr;
assert((epsilon_t_pointer != nullptr));
assert((epsilon_t_dim_0 > 0));
float* weights_pointer = (float*)weights_pointer_addr;
assert((weights_pointer != nullptr));
assert((weights_dim_0 > 0));
assert((weights_dim_1 > 0));
size_t weights_dim_c0 = weights_dim_1;
int64_t* input_pointer = (int64_t*)input_pointer_addr;
assert((input_pointer != nullptr));
assert((input_dim_0 > 0));
assert((input_dim_1 > 0));
assert((input_dim_2 > 0));
assert((input_dim_3 > 0));
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
size_t input_dim_c1 = input_dim_2 * input_dim_3;
size_t input_dim_c2 = input_dim_3;
assert((h_dim == weights_dim_1));
size_t number_of_spikes = input_dim_1;
size_t dim_x = input_dim_2;
size_t dim_y = input_dim_3;
float forgetting_offset_local = forgetting_offset / static_cast<float>(h_dim);
// --------------------
if (number_of_processes > 0)
{
omp_set_num_threads(number_of_processes);
size_t pattern_id;
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
{
update(
h_init_ptr,
h_pointer,
h_dim_c0,
h_dim_c1,
h_dim_c2,
h_dim,
epsilon_xy_pointer,
epsilon_xy_dim_c0,
epsilon_xy_dim_c1,
epsilon_t_pointer,
weights_pointer,
weights_dim_c0,
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
number_of_spikes,
dim_x,
dim_y,
forgetting_offset,
forgetting_offset_local,
pattern_id);
}
}
else
{
gpu_update(
h_init_ptr,
h_pointer,
h_dim_c0,
h_dim_c1,
h_dim_c2,
h_dim,
epsilon_xy_pointer,
epsilon_xy_dim_c0,
epsilon_xy_dim_c1,
epsilon_t_pointer,
weights_pointer,
weights_dim_c0,
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
number_of_spikes,
dim_x,
dim_y,
forgetting_offset,
forgetting_offset_local,
number_of_pattern,
gpu_tuning_factor);
}
return true;
};
bool HDynamicCNNManyIP::update(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* epsilon_t_pointer,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
size_t number_of_spikes,
size_t dim_x,
size_t dim_y,
float forgetting_offset,
float forgetting_offset_local,
size_t pattern_id)
{
float* h_ptr;
float* epsilon_xy_ptr;
int64_t* input_ptr;
size_t counter_x;
size_t counter_y;
for (counter_x = 0; counter_x < dim_x; counter_x++)
{
for (counter_y = 0; counter_y < dim_y; counter_y++)
{
epsilon_xy_ptr = epsilon_xy_pointer +
counter_x * epsilon_xy_dim_c1 + counter_y;
h_ptr = h_pointer +
pattern_id * h_dim_c0 + counter_x * h_dim_c2 + counter_y;
input_ptr = input_pointer +
pattern_id * input_dim_c0 + counter_x * input_dim_c2 + counter_y;
update_one_ip(
h_init_ptr,
h_ptr,
h_dim_c1,
h_dim,
weights_pointer,
weights_dim_c0,
input_ptr,
input_dim_c1,
epsilon_xy_ptr,
epsilon_xy_dim_c0,
epsilon_t_pointer,
number_of_spikes,
forgetting_offset,
forgetting_offset_local);
}
}
return true;
};
void HDynamicCNNManyIP::update_one_ip(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c1,
size_t h_dim,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c1,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
float* epsilon_t_pointer,
size_t number_of_spikes,
float forgetting_offset,
float forgetting_offset_local)
{
float* h_temp = new float[h_dim];
float* h_subsegment = new float[h_dim];
memcpy(h_subsegment, h_init_ptr, sizeof(float) * h_dim);
size_t counter_spike;
size_t counter;
float h_temp_sum;
float temp_value;
float epsilon_subsegment;
float epsilon_scale = 1.0;
int64_t* spike;
float* w_ptr;
for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++)
{
if (epsilon_scale > 1E10)
{
temp_value = 1.0 / epsilon_scale;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] *= temp_value;
}
epsilon_scale = 1.0;
}
spike = input_pointer + counter_spike * input_dim_c1;
if (*spike >= 0)
{
epsilon_subsegment =
epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike];
w_ptr = weights_pointer + *spike * weights_dim_c0;
memcpy(h_temp, h_subsegment, sizeof(float) * h_dim);
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_temp[counter] *= w_ptr[counter];
}
h_temp_sum = 0.0;
#pragma omp simd reduction(+ : h_temp_sum)
for (counter = 0; counter < h_dim; counter++)
{
h_temp_sum += h_temp[counter];
}
if (h_temp_sum > 1E-10)
{
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_temp[counter] *= temp_value;
}
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] += h_temp[counter];
}
if (forgetting_offset_local > 0.0)
{
temp_value =
epsilon_scale * epsilon_subsegment * forgetting_offset_local;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] += temp_value;
}
epsilon_scale *=
1.0 + epsilon_subsegment * (1.0 + forgetting_offset);
}
else
{
epsilon_scale *= 1.0 + epsilon_subsegment * 1.0;
}
}
}
}
temp_value = 1.0 / epsilon_scale;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_pointer[counter * h_dim_c1] =
h_subsegment[counter] * temp_value;
}
delete[] h_temp;
delete[] h_subsegment;
return;
};
__device__ void gpu_update_one_ip(
float* __restrict__ h_init_ptr,
float* __restrict__ h_pointer,
size_t h_dim_c1,
size_t h_dim,
float* __restrict__ weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c1,
float* __restrict__ epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
float* __restrict__ epsilon_t_pointer,
size_t number_of_spikes,
float forgetting_offset,
float forgetting_offset_local,
float* __restrict__ h_temp,
float* __restrict__ h_subsegment
)
{
size_t counter_spike;
size_t counter;
float h_temp_sum;
float temp_value;
float epsilon_subsegment;
float epsilon_scale = 1.0;
int64_t* spike;
float* w_ptr;
// float* h_temp = new float[h_dim];
// float* h_subsegment = new float[h_dim];
// Initialize the sub-segement
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] = h_init_ptr[counter];
}
for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++)
{
if (epsilon_scale > 1E10)
{
temp_value = 1.0 / epsilon_scale;
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] *= temp_value;
}
epsilon_scale = 1.0;
}
spike = input_pointer + counter_spike * input_dim_c1;
if (*spike >= 0)
{
epsilon_subsegment =
epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike];
w_ptr = weights_pointer + *spike * weights_dim_c0;
for (counter = 0; counter < h_dim; counter++)
{
h_temp[counter] = h_subsegment[counter] * w_ptr[counter];
}
h_temp_sum = 0.0;
for (counter = 0; counter < h_dim; counter++)
{
h_temp_sum += h_temp[counter];
}
if (h_temp_sum > 1E-10)
{
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
for (counter = 0; counter < h_dim; counter++)
{
h_temp[counter] *= temp_value;
}
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] += h_temp[counter];
}
if (forgetting_offset_local > 0.0)
{
temp_value =
epsilon_scale * epsilon_subsegment * forgetting_offset_local;
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] += temp_value;
}
epsilon_scale *=
1.0 + epsilon_subsegment * (1.0 + forgetting_offset);
}
else
{
epsilon_scale *= 1.0 + epsilon_subsegment * 1.0;
}
}
}
}
temp_value = 1.0 / epsilon_scale;
for (counter = 0; counter < h_dim; counter++)
{
h_pointer[counter * h_dim_c1] =
h_subsegment[counter] * temp_value;
}
// delete[] h_temp;
// delete[] h_subsegment;
return;
};
__global__ void kernel_spike_generation(
float* __restrict__ h_init_ptr,
float* __restrict__ h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* __restrict__ weights_pointer,
size_t weights_dim_c0,
int64_t* __restrict__ input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* __restrict__ epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* __restrict__ epsilon_t_pointer,
size_t number_of_spikes,
float forgetting_offset,
float forgetting_offset_local,
size_t dim_x,
size_t dim_y,
size_t dim_xy,
size_t max_threadable_tasks,
float* __restrict__ temp_memory_a,
float* __restrict__ temp_memory_b
)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < max_threadable_tasks)
{
float* h_ptr;
float* epsilon_xy_ptr;
int64_t* input_ptr;
float* temp_memory_ptr_a = temp_memory_a + idx * h_dim;
float* temp_memory_ptr_b = temp_memory_b + idx * h_dim;
// int pattern_id = idx;
int pattern_id = idx / dim_xy;
int position_xy = idx - (pattern_id * dim_xy);
// size_t position_x = blockIdx.y;
// size_t position_y = blockIdx.z;
size_t position_x = position_xy / dim_y;
size_t position_y = position_xy - (position_x * dim_y);
epsilon_xy_ptr = epsilon_xy_pointer +
position_x * epsilon_xy_dim_c1 + position_y;
h_ptr = h_pointer +
pattern_id * h_dim_c0 + position_x * h_dim_c2 + position_y;
input_ptr = input_pointer +
pattern_id * input_dim_c0 + position_x * input_dim_c2 + position_y;
gpu_update_one_ip(
h_init_ptr,
h_ptr,
h_dim_c1,
h_dim,
weights_pointer,
weights_dim_c0,
input_ptr,
input_dim_c1,
epsilon_xy_ptr,
epsilon_xy_dim_c0,
epsilon_t_pointer,
number_of_spikes,
forgetting_offset,
forgetting_offset_local,
temp_memory_ptr_a,
temp_memory_ptr_b
);
}
};
// Let's face it... We need a better way to paralelize it...
bool HDynamicCNNManyIP::gpu_update(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* epsilon_t_pointer,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
size_t number_of_spikes,
size_t dim_x,
size_t dim_y,
float forgetting_offset,
float forgetting_offset_local,
size_t number_of_pattern,
size_t gpu_tuning_factor)
{
cudaError_t status;
assert((dim_x < 65535));
assert((dim_y < 65535));
// // //////////////////////////////////////
// // Get infos about the device
// // //////////////////////////////////////
// int device;
// cudaDeviceProp prop;
// status = cudaGetDevice(&device);
// assert((status == cudaSuccess));
// // std::cout << "Device ID: " << device << std::endl;
// status = cudaGetDeviceProperties(&prop, device);
// assert((status == cudaSuccess));
// // std::cout << "Device name: " << prop.name << std::endl;
// int _cuda_heap_size_in_mb = 16;
// status = cudaDeviceSetLimit(cudaLimitMallocHeapSize, _cuda_heap_size_in_mb * (1 << 20));
// assert((status == cudaSuccess));
// size_t pValue;
// cudaDeviceGetLimit(&pValue, cudaLimitMallocHeapSize);
// std::cout << pValue << " " << (pValue/(2*4*h_dim)) << std::endl;
// exit(1);
// //////////////////////////////////////
// Calculate the distribution on the GPU
// //////////////////////////////////////
int min_grid_size;
int block_size;
int grid_size;
size_t dynamic_s_mem_size = 0;
size_t max_threadable_tasks = number_of_pattern * dim_x * dim_y;
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
(void*)kernel_spike_generation,
dynamic_s_mem_size, max_threadable_tasks);
assert((status == cudaSuccess));
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
// Maximum dimensionality of grid of thread blocks: 3
// Maximum x -dimension of a grid of thread blocks: (2^31)-1
// Maximum y- or z-dimension of a grid of thread blocks: 65535
// Reduce the automatic block size with our guess
if ((gpu_tuning_factor > 0) && (gpu_tuning_factor < block_size))
{
block_size = int(gpu_tuning_factor);
}
// Round up according to array size
// (I will separate x and y into other grid dimentsions soon)
// grid_size = (number_of_pattern + block_size - 1) / block_size;
grid_size = (max_threadable_tasks + block_size - 1) / block_size;
// std::cout << min_grid_size << std::endl;
// std::cout << grid_size << std::endl;
// std::cout << block_size << std::endl;
// std::cout << max_threadable_tasks << std::endl;
//dim3 grid(grid_size, dim_x, dim_y);
float* temp_memory_a = nullptr;
status = cudaMalloc((void**)&temp_memory_a, h_dim * max_threadable_tasks * sizeof(float));
assert((status == cudaSuccess));
float* temp_memory_b = nullptr;
status = cudaMalloc((void**)&temp_memory_b, h_dim * max_threadable_tasks * sizeof(float));
assert((status == cudaSuccess));
//kernel_spike_generation<<<grid, block_size >>>(
kernel_spike_generation<<<grid_size, block_size >>>(
h_init_ptr,
h_pointer,
h_dim_c0,
h_dim_c1,
h_dim_c2,
h_dim,
weights_pointer,
weights_dim_c0,
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
epsilon_xy_pointer,
epsilon_xy_dim_c0,
epsilon_xy_dim_c1,
epsilon_t_pointer,
number_of_spikes,
forgetting_offset,
forgetting_offset_local,
dim_x,
dim_y,
(dim_x * dim_y),
//number_of_pattern
max_threadable_tasks,
temp_memory_a,
temp_memory_b
);
status = cudaDeviceSynchronize();
assert((status == cudaSuccess));
status = cudaFree(temp_memory_a);
assert((status == cudaSuccess));
status = cudaFree(temp_memory_b);
assert((status == cudaSuccess));
return true;
};

View file

@ -0,0 +1,111 @@
#ifndef SRC_HDYNAMICCNNMANYIP_H_
#define SRC_HDYNAMICCNNMANYIP_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class HDynamicCNNManyIP
{
public:
HDynamicCNNManyIP();
~HDynamicCNNManyIP();
bool update_entrypoint(
int64_t h_pointer_addr,
int64_t h_dim_0,
int64_t h_dim_1,
int64_t h_dim_2,
int64_t h_dim_3,
int64_t epsilon_xy_pointer_addr,
int64_t epsilon_xy_dim_0,
int64_t epsilon_xy_dim_1,
int64_t epsilon_xy_dim_2,
int64_t epsilon_t_pointer_addr,
int64_t epsilon_t_dim_0,
int64_t weights_pointer_addr,
int64_t weights_dim_0,
int64_t weights_dim_1,
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t init_vector_pointer_addr,
int64_t init_vector_dim_0,
int64_t number_of_processes,
float forgetting_offset,
int64_t gpu_tuning_factor);
private:
bool update(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* epsilon_t_pointer,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
size_t number_of_spikes,
size_t dim_x,
size_t dim_y,
float forgetting_offset,
float forgetting_offset_local,
size_t pattern_id);
void update_one_ip(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c1,
size_t h_dim,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c1,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
float* epsilon_t_pointer,
size_t number_of_spikes,
float forgetting_offset,
float forgetting_offset_local);
bool gpu_update(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* epsilon_t_pointer,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
size_t number_of_spikes,
size_t dim_x,
size_t dim_y,
float forgetting_offset,
float forgetting_offset_local,
size_t number_of_pattern,
size_t gpu_tuning_factor);
};
#endif /* SRC_HDYNAMICCNNMANYIP_H_ */

67
network/CPP_Cuda/Makefile Normal file
View file

@ -0,0 +1,67 @@
# Change to your python bin directory (tested with Python 3.10.4)
PYBIN=~/P3.10GPU/bin/
NVCC=/usr/local/cuda-12/bin/nvcc -allow-unsupported-compiler
CC=/usr/lib64/ccache/clang++
PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes`
PARAMETERS_O= -O3 -std=c++14 $(PYBIND11INCLUDE) -ccbin=$(CC) \
-Xcompiler "-fPIC -Wall -fopenmp=libomp"
PARAMETERS_Linker=-Xcompiler "-shared -lm -lomp -lstdc++ -Wall"
PYPOSTFIX=`$(PYBIN)python3-config --extension-suffix`
all: PyHDynamicCNNManyIP \
PySpikeGeneration2DManyIP \
PyMultiApp
#######################
HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cu
$(NVCC) $(PARAMETERS_O) -c HDynamicCNNManyIP.cu -o HDynamicCNNManyIP.o
PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp
$(NVCC) $(PARAMETERS_O) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o
PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
$(NVCC) $(PARAMETERS_Linker) -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
cp PyHDynamicCNNManyIP PyHDynamicCNNManyIP$(PYPOSTFIX)
$(PYBIN)python3 pybind11_auto_pyi.py
#######################
SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cu
$(NVCC) $(PARAMETERS_O) -c SpikeGeneration2DManyIP.cu -o SpikeGeneration2DManyIP.o
PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp
$(NVCC) $(PARAMETERS_O) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o
PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
$(NVCC) $(PARAMETERS_Linker) -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
cp PySpikeGeneration2DManyIP PySpikeGeneration2DManyIP$(PYPOSTFIX)
$(PYBIN)python3 pybind11_auto_pyi.py
#######################
MultiApp.o: MultiApp.h MultiApp.cu approximation_multiplication_function.cpp \
gpu_approximation_multiplication_function.cu error_term.cpp gpu_error_term.cu
$(NVCC) $(PARAMETERS_O) -c MultiApp.cu -o MultiApp.o
PyMultiApp.o: MultiApp.h PyMultiApp.cpp
$(NVCC) $(PARAMETERS_O) -c PyMultiApp.cpp -o PyMultiApp.o
PyMultiApp: MultiApp.o PyMultiApp.o
$(NVCC) $(PARAMETERS_Linker) -o PyMultiApp MultiApp.o PyMultiApp.o
cp PyMultiApp PyMultiApp$(PYPOSTFIX)
$(PYBIN)python3 pybind11_auto_pyi.py
#######################
clean:
rm -f PyHDynamicCNNManyIP
rm -f PySpikeGeneration2DManyIP
rm -f PyMultiApp
rm -f *.o
rm -f *.so

View file

@ -0,0 +1,313 @@
#include "MultiApp.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <vector>
#include "approximation_multiplication_function.cpp"
#include "gpu_approximation_multiplication_function.cu"
MultiApp::MultiApp()
{
};
MultiApp::~MultiApp()
{
};
bool MultiApp::update(float* np_input_pointer,
float* np_weight_pointer,
float* np_output_pointer, int64_t pattern_dim,
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
int64_t input_channel_dim, int64_t id_pattern,
bool approximation_enable, int64_t number_of_trunc_bits,
int64_t number_of_frac_bits)
{
assert((id_pattern >= 0));
assert((id_pattern < pattern_dim));
float* np_input_pointer_pattern;
float* np_output_pointer_pattern;
float* input_ptr;
float* output_ptr;
float* w_ptr;
uint64_t pattern_size = input_channel_dim;
std::vector<float> ap_h_vector;
ap_h_vector.resize(pattern_size);
float* ap_h_ptr = ap_h_vector.data();
std::vector<uint32_t> ap_x_vector;
ap_x_vector.resize(pattern_size);
uint32_t* ap_x_ptr = ap_x_vector.data();
std::vector<uint32_t> ap_y_vector;
ap_y_vector.resize(pattern_size);
uint32_t* ap_y_ptr = ap_y_vector.data();
std::vector<uint32_t> ap_x_exponent_vector;
ap_x_exponent_vector.resize(pattern_size);
uint32_t* ap_x_exponent_ptr = ap_x_exponent_vector.data();
std::vector<uint32_t> ap_y_exponent_vector;
ap_y_exponent_vector.resize(pattern_size);
uint32_t* ap_y_exponent_ptr = ap_y_exponent_vector.data();
std::vector<uint32_t> ap_h_exponent_vector;
ap_h_exponent_vector.resize(pattern_size);
uint32_t* ap_h_exponent_ptr = ap_h_exponent_vector.data();
std::vector<uint64_t> ap_res_vector;
ap_res_vector.resize(pattern_size);
uint64_t* ap_res_ptr = ap_res_vector.data();
uint32_t ap_mask = static_cast<uint64_t>(pow(2, number_of_trunc_bits)) - 1;
std::vector<uint32_t> sign_temp_vector;
sign_temp_vector.resize(pattern_size);
uint32_t* sign_temp_ptr = sign_temp_vector.data();
uint64_t input_pattern_size = input_channel_dim * x_dim * y_dim;
uint64_t output_pattern_size = feature_dim * x_dim * y_dim;
np_input_pointer_pattern = np_input_pointer + id_pattern * input_pattern_size;
np_output_pointer_pattern =
np_output_pointer + id_pattern * output_pattern_size;
uint64_t counter;
uint64_t counter_x;
uint64_t counter_y;
uint64_t counter_feature;
uint64_t pos_xy;
uint64_t pos_xy_if;
float temp_sum;
uint64_t pattern_c_2 = x_dim * y_dim;
for (counter_x = 0; counter_x < x_dim; counter_x++)
{
for (counter_y = 0; counter_y < y_dim; counter_y++)
{
pos_xy = counter_y + counter_x * y_dim;
for (counter_feature = 0; counter_feature < feature_dim;
counter_feature++)
{
pos_xy_if = counter_feature * pattern_c_2 + pos_xy;
input_ptr = np_input_pointer_pattern + pos_xy;
output_ptr = np_output_pointer_pattern + pos_xy_if;
w_ptr = np_weight_pointer + counter_feature * input_channel_dim;
#pragma omp simd
for (counter = 0; counter < pattern_size; counter++)
{
ap_h_ptr[counter] = input_ptr[counter * pattern_c_2];
}
approximation_multiplication_function(
ap_h_ptr, w_ptr, pattern_size, number_of_trunc_bits,
number_of_frac_bits, ap_x_ptr, ap_y_ptr, ap_x_exponent_ptr,
ap_y_exponent_ptr, ap_h_exponent_ptr, ap_mask, ap_res_ptr,
sign_temp_ptr, approximation_enable);
temp_sum = 0.0;
#pragma omp simd reduction(+ \
: temp_sum)
for (counter = 0; counter < pattern_size; counter++)
{
temp_sum += ap_h_ptr[counter];
}
output_ptr[0] = temp_sum;
}
}
}
return true;
};
bool MultiApp::update_with_init_vector_multi_pattern(
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
int64_t number_of_processes, bool approximation_enable,
int64_t number_of_trunc_bits, int64_t number_of_frac)
{
int64_t number_of_pattern = pattern_dim;
int64_t pattern_id;
float* np_input_pointer = (float*)np_input_pointer_addr;
float* np_weight_pointer = (float*)np_weight_pointer_addr;
float* np_output_pointer = (float*)np_output_pointer_addr;
assert((np_input_pointer != nullptr));
assert((np_output_pointer != nullptr));
assert((np_weight_pointer != nullptr));
assert((pattern_dim > 0));
assert((feature_dim > 0));
assert((x_dim > 0));
assert((y_dim > 0));
assert((input_channel_dim > 0));
if (number_of_processes > 0)
{
omp_set_num_threads(number_of_processes);
// For debugging: Only one thread
// omp_set_num_threads(1);
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
{
update(np_input_pointer, np_weight_pointer,
np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim,
input_channel_dim, pattern_id, approximation_enable,
number_of_trunc_bits, number_of_frac);
}
}
else
{
update_gpu(np_input_pointer, np_weight_pointer,
np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim,
input_channel_dim, approximation_enable,
number_of_trunc_bits, number_of_frac);
}
return true;
};
__global__ void kernel_approx_multiplication(float* __restrict__ input_pointer, float* __restrict__ weight_pointer,
float* __restrict__ output_pointer, uint64_t pattern_dim,
uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim,
uint64_t input_channel_dim, size_t max_threadable_tasks,
uint64_t input_index_scale, uint64_t number_of_frac_bits,
bool approximation_enable, uint64_t number_of_trunc_bits,
uint32_t ap_mask)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < max_threadable_tasks)
{
int pattern_id = idx / feature_dim;
int feature_id = idx - (pattern_id * feature_dim);
int x_id = blockIdx.y;
int y_id = blockIdx.z;
float* weight_pointer_sub = weight_pointer + feature_id * input_channel_dim;
float* input_pointer_sub = input_pointer + pattern_id * input_channel_dim * x_dim * y_dim + x_id * y_dim + y_id;
float* output_pointer_sub = output_pointer +
pattern_id * feature_dim * x_dim * y_dim +
feature_id * x_dim * y_dim + x_id * y_dim + y_id;
*output_pointer_sub = 0.0;
size_t counter;
for (counter = 0; counter < input_channel_dim; counter++)
{
*output_pointer_sub += gpu_approximation_multiplication_function(
weight_pointer_sub[counter],
input_pointer_sub[counter * input_index_scale],
number_of_frac_bits, approximation_enable,
number_of_trunc_bits, ap_mask);
}
}
};
bool MultiApp::update_gpu(float* np_input_pointer,
float* np_weight_pointer,
float* np_output_pointer, uint64_t pattern_dim,
uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim,
uint64_t input_channel_dim,
bool approximation_enable, uint64_t number_of_trunc_bits,
uint64_t number_of_frac_bits)
{
uint32_t ap_mask = static_cast<uint64_t>(pow(2, number_of_trunc_bits)) - 1;
// std::cout << approximation_enable << std::endl;
// std::cout << number_of_trunc_bits << std::endl;
// std::cout << number_of_frac_bits << std::endl;
cudaError_t status;
assert((x_dim < 65535));
assert((y_dim < 65535));
// //////////////////////////////////////
// Get infos about the device
// //////////////////////////////////////
int device;
cudaDeviceProp prop;
status = cudaGetDevice(&device);
assert((status == cudaSuccess));
// std::cout << "Device ID: " << device << std::endl;
status = cudaGetDeviceProperties(&prop, device);
assert((status == cudaSuccess));
// std::cout << "Device name: " << prop.name << std::endl;
// //////////////////////////////////////
// Calculate the distribution on the GPU
// //////////////////////////////////////
int min_grid_size;
int block_size;
int grid_size;
size_t dynamic_s_mem_size = 0;
size_t max_threadable_tasks = pattern_dim * feature_dim * x_dim * y_dim;
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
(void*)kernel_approx_multiplication,
dynamic_s_mem_size, max_threadable_tasks);
assert((status == cudaSuccess));
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
// Maximum dimensionality of grid of thread blocks: 3
// Maximum x -dimension of a grid of thread blocks: (2^31)-1
// Maximum y- or z-dimension of a grid of thread blocks: 65535
// Round up according to array size
grid_size = ((pattern_dim * feature_dim) + block_size - 1) / block_size;
// std::cout << min_grid_size << std::endl;
// std::cout << grid_size << std::endl;
// std::cout << block_size << std::endl;
// std::cout << max_threadable_tasks << std::endl;
dim3 grid(grid_size, x_dim, y_dim);
kernel_approx_multiplication<<<grid, block_size>>>(np_input_pointer,
np_weight_pointer,
np_output_pointer,
pattern_dim,
feature_dim,
x_dim,
y_dim,
input_channel_dim,
(pattern_dim * feature_dim),
(x_dim * y_dim),
number_of_frac_bits,
approximation_enable,
number_of_trunc_bits,
ap_mask);
cudaDeviceSynchronize();
return true;
};

View file

@ -0,0 +1,39 @@
#ifndef SRC_MultiApp_H_
#define SRC_MultiApp_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class MultiApp
{
public:
MultiApp();
~MultiApp();
bool update(float *np_input_pointer, float *np_weight_pointer,
float *np_output_pointer, int64_t pattern_dim,
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
int64_t input_channel_dim, int64_t id_pattern,
bool approximation_enable, int64_t number_of_trunc_bits,
int64_t number_of_frac);
bool update_gpu(float *input_pointer, float *weight_pointer,
float *output_pointer, uint64_t pattern_dim,
uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim,
uint64_t input_channel_dim,
bool approximation_enable, uint64_t number_of_trunc_bits,
uint64_t number_of_frac);
bool update_with_init_vector_multi_pattern(
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
int64_t number_of_processes, bool approximation_enable,
int64_t number_of_trunc_bits, int64_t number_of_frac);
private:
};
#endif /* SRC_MultiApp_H_ */

View file

@ -0,0 +1,14 @@
#include <pybind11/pybind11.h>
#include "HDynamicCNNManyIP.h"
namespace py = pybind11;
PYBIND11_MODULE(PyHDynamicCNNManyIP, m)
{
m.doc() = "HDynamicCNNManyIP Module";
py::class_<HDynamicCNNManyIP>(m, "HDynamicCNNManyIP")
.def(py::init<>())
.def("update",
&HDynamicCNNManyIP::update_entrypoint);
}

View file

@ -0,0 +1,18 @@
#
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
#
"""HDynamicCNNManyIP Module"""
from __future__ import annotations
import PyHDynamicCNNManyIP
import typing
__all__ = [
"HDynamicCNNManyIP"
]
class HDynamicCNNManyIP():
def __init__(self) -> None: ...
def update(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int, arg22: float, arg23: int) -> bool: ...
pass

View file

@ -0,0 +1,14 @@
#include <pybind11/pybind11.h>
#include "MultiApp.h"
namespace py = pybind11;
PYBIND11_MODULE(PyMultiApp, m) {
m.doc() = "MultiApp Module";
py::class_<MultiApp>(m, "MultiApp")
.def(py::init<>())
.def("update_with_init_vector_multi_pattern",
&MultiApp::update_with_init_vector_multi_pattern);
}

View file

@ -0,0 +1,18 @@
#
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
#
"""MultiApp Module"""
from __future__ import annotations
import PyMultiApp
import typing
__all__ = [
"MultiApp"
]
class MultiApp():
def __init__(self) -> None: ...
def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: bool, arg10: int, arg11: int) -> bool: ...
pass

View file

@ -0,0 +1,15 @@
#include <pybind11/pybind11.h>
#include "SpikeGeneration2DManyIP.h"
namespace py = pybind11;
PYBIND11_MODULE(PySpikeGeneration2DManyIP, m)
{
m.doc() = "SpikeGeneration2DManyIP Module";
py::class_<SpikeGeneration2DManyIP>(m, "SpikeGeneration2DManyIP")
.def(py::init<>())
.def("spike_generation",
&SpikeGeneration2DManyIP::spike_generation_entrypoint);
}

View file

@ -14,5 +14,5 @@ __all__ = [
class SpikeGeneration2DManyIP(): class SpikeGeneration2DManyIP():
def __init__(self) -> None: ... def __init__(self) -> None: ...
def spike_generation_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ... def spike_generation(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ...
pass pass

View file

@ -0,0 +1,390 @@
#include "SpikeGeneration2DManyIP.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
SpikeGeneration2DManyIP::SpikeGeneration2DManyIP()
{
};
SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP()
{
};
bool SpikeGeneration2DManyIP::spike_generation_entrypoint(
int64_t input_pointer_addr, int64_t input_dim_0,
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
int64_t random_values_dim_1, int64_t random_values_dim_2,
int64_t random_values_dim_3, int64_t output_pointer_addr,
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
int64_t output_dim_3, int64_t number_of_cpu_processes)
{
float* input_pointer = (float*)input_pointer_addr;
float* random_values_pointer = (float*)random_values_pointer_addr;
int64_t* output_pointer = (int64_t*)output_pointer_addr;
// Input
assert((input_pointer != nullptr));
assert((input_dim_0 > 0));
assert((input_dim_1 > 0));
assert((input_dim_2 > 0));
assert((input_dim_3 > 0));
// Random
assert((random_values_pointer != nullptr));
assert((random_values_dim_0 > 0));
assert((random_values_dim_1 > 0));
assert((random_values_dim_2 > 0));
assert((random_values_dim_3 > 0));
// Output
assert((output_pointer != nullptr));
assert((output_dim_0 > 0));
assert((output_dim_1 > 0));
assert((output_dim_2 > 0));
assert((output_dim_3 > 0));
// Input
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
size_t input_dim_c1 = input_dim_2 * input_dim_3;
size_t input_dim_c2 = input_dim_3;
// Random
size_t random_values_dim_c0 =
random_values_dim_1 * random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c1 =
random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c2 = random_values_dim_3;
// Output
size_t output_dim_c0 =
output_dim_1 * output_dim_2 * output_dim_3;
size_t output_dim_c1 = output_dim_2 * output_dim_3;
size_t output_dim_c2 = output_dim_3;
size_t number_of_pattern = input_dim_0;
size_t h_dim = input_dim_1;
size_t spike_dim = output_dim_1;
size_t x_dim = output_dim_2;
size_t y_dim = output_dim_2;
if (number_of_cpu_processes > 0)
{
omp_set_num_threads(number_of_cpu_processes);
// DEBUG:
// omp_set_num_threads(1);
size_t pattern_id;
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
{
spike_generation(
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
random_values_pointer,
random_values_dim_c0,
random_values_dim_c1,
random_values_dim_c2,
output_pointer,
output_dim_c0,
output_dim_c1,
output_dim_c2,
x_dim,
y_dim,
spike_dim,
h_dim,
pattern_id);
}
}
else
{
gpu_spike_generation(
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
random_values_pointer,
random_values_dim_c0,
random_values_dim_c1,
random_values_dim_c2,
output_pointer,
output_dim_c0,
output_dim_c1,
output_dim_c2,
x_dim,
y_dim,
spike_dim,
h_dim,
number_of_pattern);
}
return true;
};
bool SpikeGeneration2DManyIP::spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t pattern_id)
{
size_t counter;
size_t counter_x = 0;
size_t counter_y = 0;
float* p_ptr = nullptr;
int64_t* out_ptr = nullptr;
float* rand_ptr = nullptr;
for (counter_x = 0; counter_x < x_dim; counter_x++)
{
for (counter_y = 0; counter_y < y_dim; counter_y++)
{
p_ptr = input_pointer + pattern_id * input_dim_c0 +
counter_x * input_dim_c2 + counter_y;
// + counter * input_dim_c1
out_ptr = output_pointer + pattern_id * output_dim_c0 +
counter_x * output_dim_c2 + counter_y;
// + counter * output_dim_c1
rand_ptr = random_values_pointer +
pattern_id * random_values_dim_c0 +
counter_x * random_values_dim_c2 + counter_y;
// + counter * random_values_dim_c1
for (counter = 0; counter < spike_dim; counter++)
{
out_ptr[counter * output_dim_c1] = lower_bound(p_ptr,
h_dim,
input_dim_c1,
rand_ptr[counter * random_values_dim_c1]);
}
}
}
return true;
};
// algorithmic idea stolen from libc++
size_t SpikeGeneration2DManyIP::lower_bound(float* data_ptr,
size_t data_length,
size_t data_ptr_stride,
float compare_to_value)
{
size_t start_of_range = 0;
size_t length_of_range = data_length;
while (length_of_range != 0)
{
size_t half_length = length_of_range >> 1;
size_t actual_position = start_of_range + half_length;
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
{
start_of_range = ++actual_position;
length_of_range -= half_length + 1;
}
else
length_of_range = half_length;
}
return start_of_range;
};
__device__ size_t gpu_lower_bound(float* __restrict__ data_ptr,
size_t data_length,
size_t data_ptr_stride,
float compare_to_value)
{
size_t start_of_range = 0;
size_t length_of_range = data_length;
while (length_of_range != 0)
{
size_t half_length = length_of_range >> 1;
size_t actual_position = start_of_range + half_length;
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
{
start_of_range = ++actual_position;
length_of_range -= half_length + 1;
}
else
length_of_range = half_length;
}
return start_of_range;
};
__global__ void kernel_spike_generation(
float* __restrict__ input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* __restrict__ random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* __restrict__ output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t max_threadable_tasks)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < max_threadable_tasks)
{
size_t pattern_id = idx / spike_dim;
size_t position_spike = idx - (pattern_id * spike_dim);
size_t position_x = blockIdx.y;
size_t position_y = blockIdx.z;
float* p_ptr = input_pointer + pattern_id * input_dim_c0 +
position_x * input_dim_c2 + position_y;
int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 +
position_x * output_dim_c2 + position_y
+ position_spike * output_dim_c1;
float* rand_ptr = random_values_pointer +
pattern_id * random_values_dim_c0 +
position_x * random_values_dim_c2 + position_y
+ position_spike * random_values_dim_c1;
*out_ptr = gpu_lower_bound(p_ptr,
h_dim,
input_dim_c1,
*rand_ptr);
}
};
bool SpikeGeneration2DManyIP::gpu_spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t number_of_pattern)
{
cudaError_t status;
assert((x_dim < 65535));
assert((y_dim < 65535));
// // //////////////////////////////////////
// // Get infos about the device
// // //////////////////////////////////////
// int device;
// cudaDeviceProp prop;
// status = cudaGetDevice(&device);
// assert((status == cudaSuccess));
// // std::cout << "Device ID: " << device << std::endl;
// status = cudaGetDeviceProperties(&prop, device);
// assert((status == cudaSuccess));
// // std::cout << "Device name: " << prop.name << std::endl;
// //////////////////////////////////////
// Calculate the distribution on the GPU
// //////////////////////////////////////
int min_grid_size;
int block_size;
int grid_size;
size_t dynamic_s_mem_size = 0;
size_t max_threadable_tasks = number_of_pattern * spike_dim * x_dim * y_dim;
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator
status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size,
(void*)kernel_spike_generation,
dynamic_s_mem_size, max_threadable_tasks);
assert((status == cudaSuccess));
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications
// Maximum dimensionality of grid of thread blocks: 3
// Maximum x -dimension of a grid of thread blocks: (2^31)-1
// Maximum y- or z-dimension of a grid of thread blocks: 65535
// Round up according to array size
// (I will separate x and y into other grid dimentsions soon)
grid_size = ((number_of_pattern * spike_dim) + block_size - 1) / block_size;
// std::cout << min_grid_size << std::endl;
// std::cout << grid_size << std::endl;
// std::cout << block_size << std::endl;
// std::cout << max_threadable_tasks << std::endl;
dim3 grid(grid_size, x_dim, y_dim);
kernel_spike_generation<<<grid, block_size >>>(
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
random_values_pointer,
random_values_dim_c0,
random_values_dim_c1,
random_values_dim_c2,
output_pointer,
output_dim_c0,
output_dim_c1,
output_dim_c2,
x_dim,
y_dim,
spike_dim,
h_dim,
(number_of_pattern * spike_dim));
cudaDeviceSynchronize();
return true;
};

View file

@ -0,0 +1,68 @@
#ifndef SRC_SPIKEGENERATION2DMANYIP_H_
#define SRC_SPIKEGENERATION2DMANYIP_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class SpikeGeneration2DManyIP
{
public:
SpikeGeneration2DManyIP();
~SpikeGeneration2DManyIP();
bool spike_generation_entrypoint(
int64_t input_pointer_addr, int64_t input_dim_0,
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
int64_t random_values_dim_1, int64_t random_values_dim_2,
int64_t random_values_dim_3, int64_t output_pointer_addr,
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
int64_t output_dim_3, int64_t number_of_cpu_processes);
bool spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t pattern_id);
bool gpu_spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t number_of_pattern);
private:
size_t lower_bound(float* data_ptr, size_t data_length,
size_t data_ptr_stride,
float compare_to_value);
};
#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */

View file

@ -0,0 +1,138 @@
#include <unistd.h>
#include <bitset>
#include <cassert>
#include <cctype>
#include "error_term.cpp"
// Best way to plot the bits
// std::cout << std::bitset<32>(ap_y_ptr[1]) << "\n";
// The result needs to be written back into h_pointer (which contains h)
// Don't write to w_pointer.
void approximation_multiplication_function(
float *h_pointer, float *w_pointer, int64_t pattern_length,
uint64_t number_of_trunc_bits, uint64_t number_of_frac_bits,
uint32_t *ap_x_ptr, uint32_t *ap_y_ptr, uint32_t *ap_x_exponent_ptr,
uint32_t *ap_y_exponent_ptr, uint32_t *ap_h_exponent_ptr, uint32_t ap_mask,
uint64_t *ap_res_ptr, uint32_t *sign_temp_ptr, bool approximation_enable) {
uint64_t counter;
uint32_t *w_pointer_mod = (uint32_t *)w_pointer;
uint32_t *h_pointer_mod = (uint32_t *)h_pointer;
// Calculate the new sign
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
sign_temp_ptr[counter] = (w_pointer_mod[counter] & 0x80000000) ^
(h_pointer_mod[counter] & 0x80000000);
}
// Extract the exponent
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_x_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_y_exponent_ptr[counter] = (w_pointer_mod[counter] << 1) >> 24;
}
// Cast and "normalize"
uint64_t shift_value = 32 - number_of_frac_bits;
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_x_ptr[counter] =
((h_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_y_ptr[counter] =
((w_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
}
// Make the zero -g-r-e-a-t- correct again
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
if (h_pointer[counter] == 0) {
ap_x_ptr[counter] = 0;
}
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
if (w_pointer[counter] == 0) {
ap_y_ptr[counter] = 0;
}
}
// res = x*y
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_res_ptr[counter] = static_cast<uint64_t>(ap_x_ptr[counter]) * static_cast<uint64_t>(ap_y_ptr[counter]);
}
uint32_t temp;
if (approximation_enable == true){
// Go through the vector values
for (counter = 0; counter < pattern_length; counter++) {
temp = error_term(ap_y_ptr[counter], ap_x_ptr[counter], ap_mask,
number_of_trunc_bits);
if (temp > ap_res_ptr[counter]) {
ap_res_ptr[counter] = 0;
} else {
ap_res_ptr[counter] -= temp;
}
}
}
// Cast from int to float
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer[counter] = static_cast<float>(ap_res_ptr[counter]);
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_h_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
}
// devide by the 2^number_of_frac_bits
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_h_exponent_ptr[counter] -= 2 * number_of_frac_bits;
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
temp = ap_x_exponent_ptr[counter] + ap_y_exponent_ptr[counter] +
ap_h_exponent_ptr[counter];
if (temp > 252) {
ap_h_exponent_ptr[counter] = temp - 252;
} else {
// Here I try to catch the case that the new exponent is too small
ap_h_exponent_ptr[counter] = 0;
}
}
// Remove the old exponent
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer_mod[counter] = (h_pointer_mod[counter] << 9) >> 9;
}
// Install the new exponent
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer_mod[counter] += ap_h_exponent_ptr[counter] << 23;
}
// Add the sign back
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer_mod[counter] += sign_temp_ptr[counter];
}
return;
}

View file

@ -0,0 +1,28 @@
#include <unistd.h>
#include <cassert>
#include <cctype>
uint32_t error_term(uint32_t a, uint32_t b, uint32_t ap_mask,
uint32_t number_of_trunc_bits) {
uint32_t error_value = 0;
uint32_t temp_shift_a = a;
uint32_t temp_shift_b = b & ap_mask;
uint32_t counter_trunc;
uint32_t temp;
// Go through the bits
for (counter_trunc = 0; counter_trunc < number_of_trunc_bits;
counter_trunc++) {
temp = temp_shift_a & 1;
if (temp == 1) {
error_value += temp_shift_b & ap_mask;
}
temp_shift_a >>= 1;
temp_shift_b <<= 1;
}
return error_value;
}

View file

@ -0,0 +1,102 @@
#include "gpu_error_term.cu"
__device__ float gpu_approximation_multiplication_function(
float weight,
float input,
uint64_t number_of_frac_bits,
bool approximation_enable,
uint64_t number_of_trunc_bits,
uint32_t ap_mask)
{
float weight_copy = weight;
float input_copy = input;
uint32_t *weight_pointer_mod = (uint32_t *)&weight_copy;
uint32_t *input_pointer_mod = (uint32_t *)&input_copy;
// Calculate the new sign
uint32_t sign_temp = (*weight_pointer_mod & 0x80000000) ^
(*input_pointer_mod & 0x80000000);
// Extract the exponent
uint32_t ap_input_exponent = (*input_pointer_mod << 1) >> 24;
uint32_t ap_weight_exponent = (*weight_pointer_mod << 1) >> 24;
// Cast and "normalize"
uint64_t shift_value = 32 - number_of_frac_bits;
uint32_t ap_input_mantissa =
((*input_pointer_mod << 8) | 0x80000000) >> shift_value;
uint32_t ap_weight_mantissa =
((*weight_pointer_mod << 8) | 0x80000000) >> shift_value;
// Make the zero -g-r-e-a-t- correct again
if (input == 0)
{
ap_input_mantissa = 0;
}
if (weight == 0)
{
ap_weight_mantissa = 0;
}
// res = x*y
uint64_t ap_result = static_cast<uint64_t>(ap_input_mantissa) * static_cast<uint64_t>(ap_weight_mantissa);
uint32_t temp;
// --------------------------------------------
// Approx
// --------------------------------------------
if (approximation_enable == true)
{
// Go through the vector values
temp = gpu_error_term(ap_weight_mantissa, ap_input_mantissa, ap_mask,
number_of_trunc_bits);
if (temp > ap_result)
{
ap_result = 0;
}
else
{
ap_result -= temp;
}
}
// Cast from int to float
float output = static_cast<float>(ap_result);
if (ap_result == 0)
{
output = 0.0;
}
else
{
uint32_t *output_pointer_mod = (uint32_t *)&output;
uint32_t ap_output_exponent = (*output_pointer_mod << 1) >> 24;
ap_output_exponent -= 2 * number_of_frac_bits;
temp = ap_input_exponent + ap_weight_exponent + ap_output_exponent;
if (temp > 252)
{
ap_output_exponent = temp - 252;
}
else
{
// Here I try to catch the case that the new exponent is too small
ap_output_exponent = 0;
}
// Remove the old exponent
*output_pointer_mod = (*output_pointer_mod << 9) >> 9;
// Install the new exponent
*output_pointer_mod += ap_output_exponent << 23;
// Add the sign back
*output_pointer_mod += sign_temp;
}
return output;
};

View file

@ -0,0 +1,28 @@
__device__ uint32_t gpu_error_term(uint32_t ap_weight_mantissa,
uint32_t ap_input_mantissa,
uint32_t ap_mask,
uint32_t number_of_trunc_bits)
{
uint32_t error_value = 0;
uint32_t temp_shift_a = ap_weight_mantissa;
uint32_t temp_shift_b = ap_input_mantissa & ap_mask;
uint32_t counter_trunc;
uint32_t temp;
// Go through the bits
for (counter_trunc = 0; counter_trunc < number_of_trunc_bits;
counter_trunc++)
{
temp = temp_shift_a & 1;
if (temp == 1)
{
error_value += temp_shift_b & ap_mask;
}
temp_shift_a >>= 1;
temp_shift_b <<= 1;
}
return error_value;
}

View file

@ -1,7 +1,3 @@
# Derived from code by Aaron.Ma
# which was posted here:
# https://blog.csdn.net/sinat_40922660/article/details/123850832
# %% # %%
# pip install pybind11-stubgen # pip install pybind11-stubgen
from pybind11_stubgen import ModuleStubsGenerator # type: ignore from pybind11_stubgen import ModuleStubsGenerator # type: ignore

View file

@ -0,0 +1,339 @@
#include "HDynamicCNNManyIP.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
HDynamicCNNManyIP::HDynamicCNNManyIP()
{
};
HDynamicCNNManyIP::~HDynamicCNNManyIP()
{
};
bool HDynamicCNNManyIP::update_entrypoint(
int64_t h_pointer_addr,
int64_t h_dim_0,
int64_t h_dim_1,
int64_t h_dim_2,
int64_t h_dim_3,
int64_t epsilon_xy_pointer_addr,
int64_t epsilon_xy_dim_0,
int64_t epsilon_xy_dim_1,
int64_t epsilon_xy_dim_2,
int64_t epsilon_t_pointer_addr,
int64_t epsilon_t_dim_0,
int64_t weights_pointer_addr,
int64_t weights_dim_0,
int64_t weights_dim_1,
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t init_vector_pointer_addr,
int64_t init_vector_dim_0,
int64_t number_of_processes,
float forgetting_offset,
int64_t gpu_tuning_factor)
{
size_t number_of_pattern = input_dim_0;
size_t h_dim = init_vector_dim_0;
float* h_init_ptr = (float*)init_vector_pointer_addr;
assert((h_init_ptr != nullptr));
assert((h_dim > 0));
float* h_pointer = (float*)h_pointer_addr;
assert((h_pointer != nullptr));
assert((h_dim_0 > 0));
assert((h_dim_1 > 0));
assert((h_dim_2 > 0));
assert((h_dim_3 > 0));
size_t h_dim_c0 = h_dim_1 * h_dim_2 * h_dim_3;
size_t h_dim_c1 = h_dim_2 * h_dim_3;
size_t h_dim_c2 = h_dim_3;
float* epsilon_xy_pointer = (float*)epsilon_xy_pointer_addr;
assert((epsilon_xy_pointer != nullptr));
assert((epsilon_xy_dim_0 > 0));
assert((epsilon_xy_dim_1 > 0));
size_t epsilon_xy_dim_c0 = epsilon_xy_dim_2 * epsilon_xy_dim_1;
size_t epsilon_xy_dim_c1 = epsilon_xy_dim_2;
float* epsilon_t_pointer = (float*)epsilon_t_pointer_addr;
assert((epsilon_t_pointer != nullptr));
assert((epsilon_t_dim_0 > 0));
float* weights_pointer = (float*)weights_pointer_addr;
assert((weights_pointer != nullptr));
assert((weights_dim_0 > 0));
assert((weights_dim_1 > 0));
size_t weights_dim_c0 = weights_dim_1;
int64_t* input_pointer = (int64_t*)input_pointer_addr;
assert((input_pointer != nullptr));
assert((input_dim_0 > 0));
assert((input_dim_1 > 0));
assert((input_dim_2 > 0));
assert((input_dim_3 > 0));
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
size_t input_dim_c1 = input_dim_2 * input_dim_3;
size_t input_dim_c2 = input_dim_3;
assert((h_dim == weights_dim_1));
size_t number_of_spikes = input_dim_1;
size_t dim_x = input_dim_2;
size_t dim_y = input_dim_3;
float forgetting_offset_local = forgetting_offset / static_cast<float>(h_dim);
// --------------------
if (number_of_processes > 0)
{
omp_set_num_threads(number_of_processes);
size_t pattern_id;
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
{
update(
h_init_ptr,
h_pointer,
h_dim_c0,
h_dim_c1,
h_dim_c2,
h_dim,
epsilon_xy_pointer,
epsilon_xy_dim_c0,
epsilon_xy_dim_c1,
epsilon_t_pointer,
weights_pointer,
weights_dim_c0,
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
number_of_spikes,
dim_x,
dim_y,
forgetting_offset,
forgetting_offset_local,
pattern_id);
}
}
else
{
std::cout << "Error: number_of_processes <= 0" << std::endl;
return false;
}
return true;
};
bool HDynamicCNNManyIP::update(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* epsilon_t_pointer,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
size_t number_of_spikes,
size_t dim_x,
size_t dim_y,
float forgetting_offset,
float forgetting_offset_local,
size_t pattern_id)
{
float* h_ptr;
float* epsilon_xy_ptr;
int64_t* input_ptr;
size_t counter_x;
size_t counter_y;
for (counter_x = 0; counter_x < dim_x; counter_x++)
{
for (counter_y = 0; counter_y < dim_y; counter_y++)
{
epsilon_xy_ptr = epsilon_xy_pointer +
counter_x * epsilon_xy_dim_c1 + counter_y;
h_ptr = h_pointer +
pattern_id * h_dim_c0 + counter_x * h_dim_c2 + counter_y;
input_ptr = input_pointer +
pattern_id * input_dim_c0 + counter_x * input_dim_c2 + counter_y;
update_one_ip(
h_init_ptr,
h_ptr,
h_dim_c1,
h_dim,
weights_pointer,
weights_dim_c0,
input_ptr,
input_dim_c1,
epsilon_xy_ptr,
epsilon_xy_dim_c0,
epsilon_t_pointer,
number_of_spikes,
forgetting_offset,
forgetting_offset_local);
}
}
return true;
};
void HDynamicCNNManyIP::update_one_ip(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c1,
size_t h_dim,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c1,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
float* epsilon_t_pointer,
size_t number_of_spikes,
float forgetting_offset,
float forgetting_offset_local)
{
float* h_temp = new float[h_dim];
float* h_subsegment = new float[h_dim];
memcpy(h_subsegment, h_init_ptr, sizeof(float) * h_dim);
size_t counter_spike;
size_t counter;
float h_temp_sum;
float temp_value;
float epsilon_subsegment;
float epsilon_scale = 1.0;
int64_t* spike;
float* w_ptr;
for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++)
{
if (epsilon_scale > 1E10)
{
temp_value = 1.0 / epsilon_scale;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] *= temp_value;
}
epsilon_scale = 1.0;
}
spike = input_pointer + counter_spike * input_dim_c1;
if (*spike >= 0)
{
epsilon_subsegment =
epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike];
w_ptr = weights_pointer + *spike * weights_dim_c0;
memcpy(h_temp, h_subsegment, sizeof(float) * h_dim);
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_temp[counter] *= w_ptr[counter];
}
h_temp_sum = 0.0;
#pragma omp simd reduction(+ : h_temp_sum)
for (counter = 0; counter < h_dim; counter++)
{
h_temp_sum += h_temp[counter];
}
if (h_temp_sum > 1E-10)
{
temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_temp[counter] *= temp_value;
}
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] += h_temp[counter];
}
if (forgetting_offset_local > 0.0)
{
temp_value =
epsilon_scale * epsilon_subsegment * forgetting_offset_local;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_subsegment[counter] += temp_value;
}
epsilon_scale *=
1.0 + epsilon_subsegment * (1.0 + forgetting_offset);
}
else
{
epsilon_scale *= 1.0 + epsilon_subsegment * 1.0;
}
}
}
}
temp_value = 1.0 / epsilon_scale;
#pragma omp simd
for (counter = 0; counter < h_dim; counter++)
{
h_pointer[counter * h_dim_c1] =
h_subsegment[counter] * temp_value;
}
delete[] h_temp;
delete[] h_subsegment;
return;
};

View file

@ -0,0 +1,85 @@
#ifndef SRC_HDYNAMICCNNMANYIP_H_
#define SRC_HDYNAMICCNNMANYIP_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class HDynamicCNNManyIP
{
public:
HDynamicCNNManyIP();
~HDynamicCNNManyIP();
bool update_entrypoint(
int64_t h_pointer_addr,
int64_t h_dim_0,
int64_t h_dim_1,
int64_t h_dim_2,
int64_t h_dim_3,
int64_t epsilon_xy_pointer_addr,
int64_t epsilon_xy_dim_0,
int64_t epsilon_xy_dim_1,
int64_t epsilon_xy_dim_2,
int64_t epsilon_t_pointer_addr,
int64_t epsilon_t_dim_0,
int64_t weights_pointer_addr,
int64_t weights_dim_0,
int64_t weights_dim_1,
int64_t input_pointer_addr,
int64_t input_dim_0,
int64_t input_dim_1,
int64_t input_dim_2,
int64_t input_dim_3,
int64_t init_vector_pointer_addr,
int64_t init_vector_dim_0,
int64_t number_of_processes,
float forgetting_offset,
int64_t gpu_tuning_factor);
private:
bool update(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c0,
size_t h_dim_c1,
size_t h_dim_c2,
size_t h_dim,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
size_t epsilon_xy_dim_c1,
float* epsilon_t_pointer,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
size_t number_of_spikes,
size_t dim_x,
size_t dim_y,
float forgetting_offset,
float forgetting_offset_local,
size_t pattern_id);
void update_one_ip(
float* h_init_ptr,
float* h_pointer,
size_t h_dim_c1,
size_t h_dim,
float* weights_pointer,
size_t weights_dim_c0,
int64_t* input_pointer,
size_t input_dim_c1,
float* epsilon_xy_pointer,
size_t epsilon_xy_dim_c0,
float* epsilon_t_pointer,
size_t number_of_spikes,
float forgetting_offset,
float forgetting_offset_local);
};
#endif /* SRC_HDYNAMICCNNMANYIP_H_ */

View file

@ -0,0 +1,64 @@
# Change to your python bin directory (tested with Python 3.10.4)
PYBIN=~/P3.10GPU/bin/
CC=/usr/lib64/ccache/clang++
PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes`
PARAMETERS_O= -O3 -std=c++14 $(PYBIND11INCLUDE) -fPIC -Wall -fopenmp=libomp
PARAMETERS_Linker=-shared -lm -lomp -lstdc++ -Wall
PYPOSTFIX=`$(PYBIN)python3-config --extension-suffix`
all: PyHDynamicCNNManyIP \
PySpikeGeneration2DManyIP \
PyMultiApp
#######################
HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cpp
$(CC) $(PARAMETERS_O) -c HDynamicCNNManyIP.cpp -o HDynamicCNNManyIP.o
PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp
$(CC) $(PARAMETERS_O) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o
PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
$(CC) $(PARAMETERS_Linker) -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o
cp PyHDynamicCNNManyIP PyHDynamicCNNManyIP$(PYPOSTFIX)
$(PYBIN)python3 pybind11_auto_pyi.py
#######################
SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cpp
$(CC) $(PARAMETERS_O) -c SpikeGeneration2DManyIP.cpp -o SpikeGeneration2DManyIP.o
PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp
$(CC) $(PARAMETERS_O) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o
PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
$(CC) $(PARAMETERS_Linker) -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o
cp PySpikeGeneration2DManyIP PySpikeGeneration2DManyIP$(PYPOSTFIX)
$(PYBIN)python3 pybind11_auto_pyi.py
#######################
MultiApp.o: MultiApp.h MultiApp.cpp approximation_multiplication_function.cpp \
error_term.cpp
$(CC) $(PARAMETERS_O) -c MultiApp.cpp -o MultiApp.o
PyMultiApp.o: MultiApp.h PyMultiApp.cpp
$(CC) $(PARAMETERS_O) -c PyMultiApp.cpp -o PyMultiApp.o
PyMultiApp: MultiApp.o PyMultiApp.o
$(CC) $(PARAMETERS_Linker) -o PyMultiApp MultiApp.o PyMultiApp.o
cp PyMultiApp PyMultiApp$(PYPOSTFIX)
$(PYBIN)python3 pybind11_auto_pyi.py
#######################
clean:
rm -f PyHDynamicCNNManyIP
rm -f PySpikeGeneration2DManyIP
rm -f PyMultiApp
rm -f *.o
rm -f *.so

View file

@ -0,0 +1,187 @@
#include "MultiApp.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <vector>
#include "approximation_multiplication_function.cpp"
MultiApp::MultiApp()
{
};
MultiApp::~MultiApp()
{
};
bool MultiApp::update(float* np_input_pointer,
float* np_weight_pointer,
float* np_output_pointer, int64_t pattern_dim,
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
int64_t input_channel_dim, int64_t id_pattern,
bool approximation_enable, int64_t number_of_trunc_bits,
int64_t number_of_frac_bits)
{
assert((id_pattern >= 0));
assert((id_pattern < pattern_dim));
float* np_input_pointer_pattern;
float* np_output_pointer_pattern;
float* input_ptr;
float* output_ptr;
float* w_ptr;
uint64_t pattern_size = input_channel_dim;
std::vector<float> ap_h_vector;
ap_h_vector.resize(pattern_size);
float* ap_h_ptr = ap_h_vector.data();
std::vector<uint32_t> ap_x_vector;
ap_x_vector.resize(pattern_size);
uint32_t* ap_x_ptr = ap_x_vector.data();
std::vector<uint32_t> ap_y_vector;
ap_y_vector.resize(pattern_size);
uint32_t* ap_y_ptr = ap_y_vector.data();
std::vector<uint32_t> ap_x_exponent_vector;
ap_x_exponent_vector.resize(pattern_size);
uint32_t* ap_x_exponent_ptr = ap_x_exponent_vector.data();
std::vector<uint32_t> ap_y_exponent_vector;
ap_y_exponent_vector.resize(pattern_size);
uint32_t* ap_y_exponent_ptr = ap_y_exponent_vector.data();
std::vector<uint32_t> ap_h_exponent_vector;
ap_h_exponent_vector.resize(pattern_size);
uint32_t* ap_h_exponent_ptr = ap_h_exponent_vector.data();
std::vector<uint64_t> ap_res_vector;
ap_res_vector.resize(pattern_size);
uint64_t* ap_res_ptr = ap_res_vector.data();
uint32_t ap_mask = static_cast<uint64_t>(pow(2, number_of_trunc_bits)) - 1;
std::vector<uint32_t> sign_temp_vector;
sign_temp_vector.resize(pattern_size);
uint32_t* sign_temp_ptr = sign_temp_vector.data();
uint64_t input_pattern_size = input_channel_dim * x_dim * y_dim;
uint64_t output_pattern_size = feature_dim * x_dim * y_dim;
np_input_pointer_pattern = np_input_pointer + id_pattern * input_pattern_size;
np_output_pointer_pattern =
np_output_pointer + id_pattern * output_pattern_size;
uint64_t counter;
uint64_t counter_x;
uint64_t counter_y;
uint64_t counter_feature;
uint64_t pos_xy;
uint64_t pos_xy_if;
float temp_sum;
uint64_t pattern_c_2 = x_dim * y_dim;
for (counter_x = 0; counter_x < x_dim; counter_x++)
{
for (counter_y = 0; counter_y < y_dim; counter_y++)
{
pos_xy = counter_y + counter_x * y_dim;
for (counter_feature = 0; counter_feature < feature_dim;
counter_feature++)
{
pos_xy_if = counter_feature * pattern_c_2 + pos_xy;
input_ptr = np_input_pointer_pattern + pos_xy;
output_ptr = np_output_pointer_pattern + pos_xy_if;
w_ptr = np_weight_pointer + counter_feature * input_channel_dim;
#pragma omp simd
for (counter = 0; counter < pattern_size; counter++)
{
ap_h_ptr[counter] = input_ptr[counter * pattern_c_2];
}
approximation_multiplication_function(
ap_h_ptr, w_ptr, pattern_size, number_of_trunc_bits,
number_of_frac_bits, ap_x_ptr, ap_y_ptr, ap_x_exponent_ptr,
ap_y_exponent_ptr, ap_h_exponent_ptr, ap_mask, ap_res_ptr,
sign_temp_ptr, approximation_enable);
temp_sum = 0.0;
#pragma omp simd reduction(+ \
: temp_sum)
for (counter = 0; counter < pattern_size; counter++)
{
temp_sum += ap_h_ptr[counter];
}
output_ptr[0] = temp_sum;
}
}
}
return true;
};
bool MultiApp::update_with_init_vector_multi_pattern(
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
int64_t number_of_processes, bool approximation_enable,
int64_t number_of_trunc_bits, int64_t number_of_frac)
{
int64_t number_of_pattern = pattern_dim;
int64_t pattern_id;
float* np_input_pointer = (float*)np_input_pointer_addr;
float* np_weight_pointer = (float*)np_weight_pointer_addr;
float* np_output_pointer = (float*)np_output_pointer_addr;
assert((np_input_pointer != nullptr));
assert((np_output_pointer != nullptr));
assert((np_weight_pointer != nullptr));
assert((pattern_dim > 0));
assert((feature_dim > 0));
assert((x_dim > 0));
assert((y_dim > 0));
assert((input_channel_dim > 0));
if (number_of_processes > 0)
{
omp_set_num_threads(number_of_processes);
// For debugging: Only one thread
// omp_set_num_threads(1);
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
{
update(np_input_pointer, np_weight_pointer,
np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim,
input_channel_dim, pattern_id, approximation_enable,
number_of_trunc_bits, number_of_frac);
}
}
else
{
std::cout << "Error: number_of_processes <= 0" << std::endl;
return false;
}
return true;
};

View file

@ -0,0 +1,32 @@
#ifndef SRC_MultiApp_H_
#define SRC_MultiApp_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class MultiApp
{
public:
MultiApp();
~MultiApp();
bool update(float* np_input_pointer, float* np_weight_pointer,
float* np_output_pointer, int64_t pattern_dim,
int64_t feature_dim, int64_t x_dim, int64_t y_dim,
int64_t input_channel_dim, int64_t id_pattern,
bool approximation_enable, int64_t number_of_trunc_bits,
int64_t number_of_frac);
bool update_with_init_vector_multi_pattern(
int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr,
int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim,
int64_t x_dim, int64_t y_dim, int64_t input_channel_dim,
int64_t number_of_processes, bool approximation_enable,
int64_t number_of_trunc_bits, int64_t number_of_frac);
private:
};
#endif /* SRC_MultiApp_H_ */

View file

@ -0,0 +1,14 @@
#include <pybind11/pybind11.h>
#include "HDynamicCNNManyIP.h"
namespace py = pybind11;
PYBIND11_MODULE(PyHDynamicCNNManyIP, m)
{
m.doc() = "HDynamicCNNManyIP Module";
py::class_<HDynamicCNNManyIP>(m, "HDynamicCNNManyIP")
.def(py::init<>())
.def("update",
&HDynamicCNNManyIP::update_entrypoint);
}

View file

@ -0,0 +1,18 @@
#
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
#
"""HDynamicCNNManyIP Module"""
from __future__ import annotations
import PyHDynamicCNNManyIP
import typing
__all__ = [
"HDynamicCNNManyIP"
]
class HDynamicCNNManyIP():
def __init__(self) -> None: ...
def update(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int, arg22: float, arg23: int) -> bool: ...
pass

View file

@ -0,0 +1,14 @@
#include <pybind11/pybind11.h>
#include "MultiApp.h"
namespace py = pybind11;
PYBIND11_MODULE(PyMultiApp, m) {
m.doc() = "MultiApp Module";
py::class_<MultiApp>(m, "MultiApp")
.def(py::init<>())
.def("update_with_init_vector_multi_pattern",
&MultiApp::update_with_init_vector_multi_pattern);
}

View file

@ -0,0 +1,18 @@
#
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
#
"""MultiApp Module"""
from __future__ import annotations
import PyMultiApp
import typing
__all__ = [
"MultiApp"
]
class MultiApp():
def __init__(self) -> None: ...
def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: bool, arg10: int, arg11: int) -> bool: ...
pass

View file

@ -0,0 +1,15 @@
#include <pybind11/pybind11.h>
#include "SpikeGeneration2DManyIP.h"
namespace py = pybind11;
PYBIND11_MODULE(PySpikeGeneration2DManyIP, m)
{
m.doc() = "SpikeGeneration2DManyIP Module";
py::class_<SpikeGeneration2DManyIP>(m, "SpikeGeneration2DManyIP")
.def(py::init<>())
.def("spike_generation",
&SpikeGeneration2DManyIP::spike_generation_entrypoint);
}

View file

@ -0,0 +1,18 @@
#
# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!
#
"""SpikeGeneration2DManyIP Module"""
from __future__ import annotations
import PySpikeGeneration2DManyIP
import typing
__all__ = [
"SpikeGeneration2DManyIP"
]
class SpikeGeneration2DManyIP():
def __init__(self) -> None: ...
def spike_generation(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ...
pass

View file

@ -0,0 +1,205 @@
#include "SpikeGeneration2DManyIP.h"
#include <omp.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <cassert>
#include <iostream>
SpikeGeneration2DManyIP::SpikeGeneration2DManyIP()
{
};
SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP()
{
};
bool SpikeGeneration2DManyIP::spike_generation_entrypoint(
int64_t input_pointer_addr, int64_t input_dim_0,
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
int64_t random_values_dim_1, int64_t random_values_dim_2,
int64_t random_values_dim_3, int64_t output_pointer_addr,
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
int64_t output_dim_3, int64_t number_of_cpu_processes)
{
float* input_pointer = (float*)input_pointer_addr;
float* random_values_pointer = (float*)random_values_pointer_addr;
int64_t* output_pointer = (int64_t*)output_pointer_addr;
// Input
assert((input_pointer != nullptr));
assert((input_dim_0 > 0));
assert((input_dim_1 > 0));
assert((input_dim_2 > 0));
assert((input_dim_3 > 0));
// Random
assert((random_values_pointer != nullptr));
assert((random_values_dim_0 > 0));
assert((random_values_dim_1 > 0));
assert((random_values_dim_2 > 0));
assert((random_values_dim_3 > 0));
// Output
assert((output_pointer != nullptr));
assert((output_dim_0 > 0));
assert((output_dim_1 > 0));
assert((output_dim_2 > 0));
assert((output_dim_3 > 0));
// Input
size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3;
size_t input_dim_c1 = input_dim_2 * input_dim_3;
size_t input_dim_c2 = input_dim_3;
// Random
size_t random_values_dim_c0 =
random_values_dim_1 * random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c1 =
random_values_dim_2 * random_values_dim_3;
size_t random_values_dim_c2 = random_values_dim_3;
// Output
size_t output_dim_c0 =
output_dim_1 * output_dim_2 * output_dim_3;
size_t output_dim_c1 = output_dim_2 * output_dim_3;
size_t output_dim_c2 = output_dim_3;
size_t number_of_pattern = input_dim_0;
size_t h_dim = input_dim_1;
size_t spike_dim = output_dim_1;
size_t x_dim = output_dim_2;
size_t y_dim = output_dim_2;
if (number_of_cpu_processes > 0)
{
omp_set_num_threads(number_of_cpu_processes);
// DEBUG:
// omp_set_num_threads(1);
size_t pattern_id;
#pragma omp parallel for
for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++)
{
spike_generation(
input_pointer,
input_dim_c0,
input_dim_c1,
input_dim_c2,
random_values_pointer,
random_values_dim_c0,
random_values_dim_c1,
random_values_dim_c2,
output_pointer,
output_dim_c0,
output_dim_c1,
output_dim_c2,
x_dim,
y_dim,
spike_dim,
h_dim,
pattern_id);
}
}
else
{
std::cout << "Error: number_of_processes <= 0" << std::endl;
return false;
}
return true;
};
bool SpikeGeneration2DManyIP::spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t pattern_id)
{
size_t counter;
size_t counter_x = 0;
size_t counter_y = 0;
float* p_ptr = nullptr;
int64_t* out_ptr = nullptr;
float* rand_ptr = nullptr;
for (counter_x = 0; counter_x < x_dim; counter_x++)
{
for (counter_y = 0; counter_y < y_dim; counter_y++)
{
p_ptr = input_pointer + pattern_id * input_dim_c0 +
counter_x * input_dim_c2 + counter_y;
// + counter * input_dim_c1
out_ptr = output_pointer + pattern_id * output_dim_c0 +
counter_x * output_dim_c2 + counter_y;
// + counter * output_dim_c1
rand_ptr = random_values_pointer +
pattern_id * random_values_dim_c0 +
counter_x * random_values_dim_c2 + counter_y;
// + counter * random_values_dim_c1
for (counter = 0; counter < spike_dim; counter++)
{
out_ptr[counter * output_dim_c1] = lower_bound(p_ptr,
h_dim,
input_dim_c1,
rand_ptr[counter * random_values_dim_c1]);
}
}
}
return true;
};
// algorithmic idea stolen from libc++
size_t SpikeGeneration2DManyIP::lower_bound(float* data_ptr,
size_t data_length,
size_t data_ptr_stride,
float compare_to_value)
{
size_t start_of_range = 0;
size_t length_of_range = data_length;
while (length_of_range != 0)
{
size_t half_length = length_of_range >> 1;
size_t actual_position = start_of_range + half_length;
if (data_ptr[actual_position * data_ptr_stride] < compare_to_value)
{
start_of_range = ++actual_position;
length_of_range -= half_length + 1;
}
else
length_of_range = half_length;
}
return start_of_range;
};

View file

@ -0,0 +1,49 @@
#ifndef SRC_SPIKEGENERATION2DMANYIP_H_
#define SRC_SPIKEGENERATION2DMANYIP_H_
#include <unistd.h>
#include <cctype>
#include <iostream>
class SpikeGeneration2DManyIP
{
public:
SpikeGeneration2DManyIP();
~SpikeGeneration2DManyIP();
bool spike_generation_entrypoint(
int64_t input_pointer_addr, int64_t input_dim_0,
int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3,
int64_t random_values_pointer_addr, int64_t random_values_dim_0,
int64_t random_values_dim_1, int64_t random_values_dim_2,
int64_t random_values_dim_3, int64_t output_pointer_addr,
int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2,
int64_t output_dim_3, int64_t number_of_cpu_processes);
bool spike_generation(
float* input_pointer,
size_t input_dim_c0,
size_t input_dim_c1,
size_t input_dim_c2,
float* random_values_pointer,
size_t random_values_dim_c0,
size_t random_values_dim_c1,
size_t random_values_dim_c2,
int64_t* output_pointer,
size_t output_dim_c0,
size_t output_dim_c1,
size_t output_dim_c2,
size_t x_dim,
size_t y_dim,
size_t spike_dim,
size_t h_dim,
size_t pattern_id);
private:
size_t lower_bound(float* data_ptr, size_t data_length,
size_t data_ptr_stride,
float compare_to_value);
};
#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */

View file

@ -0,0 +1,138 @@
#include <unistd.h>
#include <bitset>
#include <cassert>
#include <cctype>
#include "error_term.cpp"
// Best way to plot the bits
// std::cout << std::bitset<32>(ap_y_ptr[1]) << "\n";
// The result needs to be written back into h_pointer (which contains h)
// Don't write to w_pointer.
void approximation_multiplication_function(
float *h_pointer, float *w_pointer, int64_t pattern_length,
uint64_t number_of_trunc_bits, uint64_t number_of_frac_bits,
uint32_t *ap_x_ptr, uint32_t *ap_y_ptr, uint32_t *ap_x_exponent_ptr,
uint32_t *ap_y_exponent_ptr, uint32_t *ap_h_exponent_ptr, uint32_t ap_mask,
uint64_t *ap_res_ptr, uint32_t *sign_temp_ptr, bool approximation_enable) {
uint64_t counter;
uint32_t *w_pointer_mod = (uint32_t *)w_pointer;
uint32_t *h_pointer_mod = (uint32_t *)h_pointer;
// Calculate the new sign
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
sign_temp_ptr[counter] = (w_pointer_mod[counter] & 0x80000000) ^
(h_pointer_mod[counter] & 0x80000000);
}
// Extract the exponent
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_x_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_y_exponent_ptr[counter] = (w_pointer_mod[counter] << 1) >> 24;
}
// Cast and "normalize"
uint64_t shift_value = 32 - number_of_frac_bits;
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_x_ptr[counter] =
((h_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_y_ptr[counter] =
((w_pointer_mod[counter] << 8) | 0x80000000) >> shift_value;
}
// Make the zero -g-r-e-a-t- correct again
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
if (h_pointer[counter] == 0) {
ap_x_ptr[counter] = 0;
}
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
if (w_pointer[counter] == 0) {
ap_y_ptr[counter] = 0;
}
}
// res = x*y
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_res_ptr[counter] = static_cast<uint64_t>(ap_x_ptr[counter]) * static_cast<uint64_t>(ap_y_ptr[counter]);
}
uint32_t temp;
if (approximation_enable == true){
// Go through the vector values
for (counter = 0; counter < pattern_length; counter++) {
temp = error_term(ap_y_ptr[counter], ap_x_ptr[counter], ap_mask,
number_of_trunc_bits);
if (temp > ap_res_ptr[counter]) {
ap_res_ptr[counter] = 0;
} else {
ap_res_ptr[counter] -= temp;
}
}
}
// Cast from int to float
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer[counter] = static_cast<float>(ap_res_ptr[counter]);
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_h_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24;
}
// devide by the 2^number_of_frac_bits
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
ap_h_exponent_ptr[counter] -= 2 * number_of_frac_bits;
}
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
temp = ap_x_exponent_ptr[counter] + ap_y_exponent_ptr[counter] +
ap_h_exponent_ptr[counter];
if (temp > 252) {
ap_h_exponent_ptr[counter] = temp - 252;
} else {
// Here I try to catch the case that the new exponent is too small
ap_h_exponent_ptr[counter] = 0;
}
}
// Remove the old exponent
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer_mod[counter] = (h_pointer_mod[counter] << 9) >> 9;
}
// Install the new exponent
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer_mod[counter] += ap_h_exponent_ptr[counter] << 23;
}
// Add the sign back
#pragma omp simd
for (counter = 0; counter < pattern_length; counter++) {
h_pointer_mod[counter] += sign_temp_ptr[counter];
}
return;
}

View file

@ -0,0 +1,28 @@
#include <unistd.h>
#include <cassert>
#include <cctype>
uint32_t error_term(uint32_t a, uint32_t b, uint32_t ap_mask,
uint32_t number_of_trunc_bits) {
uint32_t error_value = 0;
uint32_t temp_shift_a = a;
uint32_t temp_shift_b = b & ap_mask;
uint32_t counter_trunc;
uint32_t temp;
// Go through the bits
for (counter_trunc = 0; counter_trunc < number_of_trunc_bits;
counter_trunc++) {
temp = temp_shift_a & 1;
if (temp == 1) {
error_value += temp_shift_b & ap_mask;
}
temp_shift_a >>= 1;
temp_shift_b <<= 1;
}
return error_value;
}

View file

@ -0,0 +1,23 @@
# %%
# pip install pybind11-stubgen
from pybind11_stubgen import ModuleStubsGenerator # type: ignore
import glob
def process(module_name: str) -> None:
module = ModuleStubsGenerator(module_name)
module.parse()
module.write_setup_py = False
with open(module_name + ".pyi", "w") as fp:
fp.write("#\n# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!\n#\n\n")
fp.write("\n".join(module.to_lines()))
Files = glob.glob("*.so")
for fid in Files:
Idx: int = fid.find(".")
module_name: str = fid[:Idx]
print("Processing: " + module_name)
process(module_name)

View file

@ -0,0 +1,318 @@
import torch
import math
from network.CPP.PyMultiApp import MultiApp
class Conv2dApproximation(torch.nn.Module):
in_channels: int | None = None
out_channels: int | None = None
kernel_size: list[int] | None = None
stride: list[int] = [1, 1]
padding: list[int] = [0, 0]
dilation: list[int] = [1, 1]
use_bias: bool = False
approximation_enable: bool = False
number_of_trunc_bits: int = -1
number_of_frac: int = -1
number_of_processes: int = 1
weights: torch.nn.parameter.Parameter
bias: torch.nn.parameter.Parameter | None
device: torch.device
dtype: torch.dtype
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: list[int],
stride: list[int] = [1, 1],
padding: list[int] = [0, 0],
dilation: list[int] = [1, 1],
bias: bool = True,
approximation_enable: bool = False,
number_of_trunc_bits: int = -1,
number_of_frac: int = -1,
number_of_processes: int = 1,
device: torch.device | None = None,
dtype: torch.dtype | None = None,
) -> None:
super().__init__()
assert device is not None
self.device = device
assert dtype is not None
self.dtype = dtype
assert len(kernel_size) == 2
assert len(stride) == 2
assert len(padding) == 2
assert len(dilation) == 2
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.use_bias = bias
self.number_of_processes = number_of_processes
self.approximation_enable = approximation_enable
self.number_of_trunc_bits = number_of_trunc_bits
self.number_of_frac = number_of_frac
if self.use_bias is True:
self.bias: torch.nn.parameter.Parameter | None = (
torch.nn.parameter.Parameter(
torch.empty(
(out_channels),
dtype=self.dtype,
device=self.device,
)
)
)
else:
self.bias = None
self.weights: torch.nn.parameter.Parameter = torch.nn.parameter.Parameter(
torch.empty(
(out_channels, in_channels, *kernel_size),
dtype=self.dtype,
device=self.device,
)
)
self.functional_multi = FunctionalMultiConv2d.apply
self.reset_parameters()
def reset_parameters(self) -> None:
# Stolen from original torch conv2 code
torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights)
if fan_in != 0:
bound = 1 / math.sqrt(fan_in)
torch.nn.init.uniform_(self.bias, -bound, bound)
def calculate_output_size(self, value: torch.Tensor) -> None:
coordinates_0, coordinates_1 = self._get_coordinates(value)
self.output_size: torch.Tensor = torch.tensor(
[
coordinates_0.shape[1],
coordinates_1.shape[1],
],
dtype=torch.int64,
)
self.output_size.requires_grad_(False)
def _get_coordinates(
self, value: torch.Tensor
) -> tuple[torch.Tensor, torch.Tensor]:
"""Function converts parameter in coordinates
for the convolution window"""
assert value is not None
assert torch.is_tensor(value) is True
assert value.dim() == 1
assert torch.numel(value) == 2
assert value.dtype == torch.int64
assert value[0] > 0
assert value[1] > 0
assert self.kernel_size is not None
assert len(self.kernel_size) == 2
assert len(self.stride) == 2
assert len(self.dilation) == 2
assert len(self.padding) == 2
unfold_0: torch.nn.Unfold = torch.nn.Unfold(
kernel_size=(int(self.kernel_size[0]), 1),
dilation=int(self.dilation[0]),
padding=int(self.padding[0]),
stride=int(self.stride[0]),
)
unfold_1: torch.nn.Unfold = torch.nn.Unfold(
kernel_size=(1, int(self.kernel_size[1])),
dilation=int(self.dilation[1]),
padding=int(self.padding[1]),
stride=int(self.stride[1]),
)
coordinates_0: torch.Tensor = (
unfold_0(
torch.unsqueeze(
torch.unsqueeze(
torch.unsqueeze(
torch.arange(0, int(value[0]), dtype=torch.float32),
1,
),
0,
),
0,
)
)
.squeeze(0)
.type(torch.int64)
)
coordinates_1: torch.Tensor = (
unfold_1(
torch.unsqueeze(
torch.unsqueeze(
torch.unsqueeze(
torch.arange(0, int(value[1]), dtype=torch.float32),
0,
),
0,
),
0,
)
)
.squeeze(0)
.type(torch.int64)
)
return coordinates_0, coordinates_1
def forward(self, input: torch.Tensor) -> torch.Tensor:
assert input.dim() == 4
assert self.kernel_size is not None
input_size = torch.Tensor([int(input.shape[-2]), int(input.shape[-1])]).type(
dtype=torch.int64
)
self.calculate_output_size(input_size)
input_fold = torch.nn.functional.fold(
torch.nn.functional.unfold(
input.requires_grad_(True),
tuple(self.kernel_size),
tuple(self.dilation),
tuple(self.padding),
tuple(self.stride),
),
output_size=(int(self.output_size[0]), int(self.output_size[1])),
kernel_size=(1, 1),
dilation=(1, 1),
padding=(0, 0),
stride=(1, 1),
)
weights_fold = torch.nn.functional.unfold(
self.weights.requires_grad_(True),
tuple(self.kernel_size),
tuple(self.dilation),
tuple(self.padding),
tuple(self.stride),
).squeeze(-1)
if input.device == torch.device("cpu"):
number_of_cpu_processes: int = int(self.number_of_processes)
else:
number_of_cpu_processes = -1
# Here...
parameter_list = torch.tensor(
[
int(self.approximation_enable), # 0
int(self.number_of_trunc_bits), # 1
int(self.number_of_frac), # 2
int(number_of_cpu_processes), # 3
],
dtype=torch.int64,
)
output = self.functional_multi(input_fold, weights_fold, parameter_list)
if self.bias is not None:
output += self.bias.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
return output
class FunctionalMultiConv2d(torch.autograd.Function):
@staticmethod
def forward( # type: ignore
ctx,
input: torch.Tensor,
weights: torch.Tensor,
parameter_list: torch.Tensor,
) -> torch.Tensor:
assert input.ndim == 4
assert input.dtype is torch.float32
assert input.is_contiguous() is True
assert weights.ndim == 2
assert weights.dtype is torch.float32
assert weights.is_contiguous() is True
assert input.shape[1] == weights.shape[1]
approximation_enable = bool(parameter_list[0])
number_of_trunc_bits = int(parameter_list[1])
number_of_frac = int(parameter_list[2])
number_of_processes = int(parameter_list[3])
assert input.device == weights.device
output = torch.empty(
(input.shape[0], weights.shape[0], input.shape[2], input.shape[3]),
dtype=weights.dtype,
device=weights.device,
requires_grad=True,
)
assert output.is_contiguous() is True
multiplier: MultiApp = MultiApp()
multiplier.update_with_init_vector_multi_pattern(
input.data_ptr(),
weights.data_ptr(),
output.data_ptr(),
int(output.shape[0]), # pattern
int(output.shape[1]), # feature channel
int(output.shape[2]), # x
int(output.shape[3]), # y
int(input.shape[1]), # input channel
int(number_of_processes),
bool(approximation_enable),
int(number_of_trunc_bits),
int(number_of_frac),
)
ctx.save_for_backward(
input.detach(),
weights.detach(),
)
return output
@staticmethod
def backward(ctx, grad_output):
(input, weights) = ctx.saved_tensors
grad_input = (
grad_output.unsqueeze(2) * weights.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
).sum(1)
grad_weights = (
(grad_output.unsqueeze(2) * input.unsqueeze(1)).sum(0).sum(-1).sum(-1)
)
grad_parameter_list = None
return (grad_input, grad_weights, grad_parameter_list)

View file

@ -1,39 +1,8 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import torch import torch
import numpy as np import numpy as np
import torchvision as tv # type: ignore import torchvision as tv # type: ignore
from Parameter import Config from network.Parameter import Config
class DatasetMaster(torch.utils.data.Dataset, ABC): class DatasetMaster(torch.utils.data.Dataset, ABC):
@ -114,8 +83,7 @@ class DatasetMNIST(DatasetMaster):
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in """0. The test image comes in
1. is center cropped 1. is center cropped
2. on/off filteres 2. returned.
3. returned.
This is a 1 channel version (e.g. one gray channel). This is a 1 channel version (e.g. one gray channel).
""" """
@ -134,19 +102,14 @@ class DatasetMNIST(DatasetMaster):
# Preprocess the input data # Preprocess the input data
pattern = scripted_transforms(pattern) pattern = scripted_transforms(pattern)
# => On/Off gray = pattern[:, 0:1, :, :] + 1e-20
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in """0. The training image comes in
1. is cropped from a random position 1. is cropped from a random position
2. on/off filteres 2. returned.
3. returned.
This is a 1 channel version (e.g. one gray channel). This is a 1 channel version (e.g. one gray channel).
""" """
@ -165,11 +128,7 @@ class DatasetMNIST(DatasetMaster):
# Preprocess the input data # Preprocess the input data
pattern = scripted_transforms(pattern) pattern = scripted_transforms(pattern)
# => On/Off gray = pattern[:, 0:1, :, :] + 1e-20
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray return gray
@ -204,8 +163,7 @@ class DatasetFashionMNIST(DatasetMaster):
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in """0. The test image comes in
1. is center cropped 1. is center cropped
2. on/off filteres 2. returned.
3. returned.
This is a 1 channel version (e.g. one gray channel). This is a 1 channel version (e.g. one gray channel).
""" """
@ -224,19 +182,14 @@ class DatasetFashionMNIST(DatasetMaster):
# Preprocess the input data # Preprocess the input data
pattern = scripted_transforms(pattern) pattern = scripted_transforms(pattern)
# => On/Off gray = pattern[:, 0:1, :, :] + 1e-20
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray return gray
def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The training image comes in """0. The training image comes in
1. is cropped from a random position 1. is cropped from a random position
2. on/off filteres 2. returned.
3. returned.
This is a 1 channel version (e.g. one gray channel). This is a 1 channel version (e.g. one gray channel).
""" """
@ -262,11 +215,7 @@ class DatasetFashionMNIST(DatasetMaster):
# Preprocess the input data # Preprocess the input data
pattern = scripted_transforms(pattern) pattern = scripted_transforms(pattern)
# => On/Off gray = pattern[:, 0:1, :, :] + 1e-20
my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0])
gray: torch.Tensor = my_on_off_filter(
pattern[:, 0:1, :, :],
)
return gray return gray
@ -300,8 +249,7 @@ class DatasetCIFAR(DatasetMaster):
def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor:
"""0. The test image comes in """0. The test image comes in
1. is center cropped 1. is center cropped
2. on/off filteres 2. returned.
3. returned.
This is a 3 channel version (e.g. r,g,b channels). This is a 3 channel version (e.g. r,g,b channels).
""" """
@ -320,20 +268,9 @@ class DatasetCIFAR(DatasetMaster):
# Preprocess the input data # Preprocess the input data
pattern = scripted_transforms(pattern) pattern = scripted_transforms(pattern)
# => On/Off r = pattern[:, 0:1, :, :] + 1e-20
g = pattern[:, 1:2, :, :] + 1e-20
my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) b = pattern[:, 2:3, :, :] + 1e-20
my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1])
my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2])
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor return new_tensor
@ -343,8 +280,7 @@ class DatasetCIFAR(DatasetMaster):
1. is cropped from a random position 1. is cropped from a random position
2. is randomly horizontally flipped 2. is randomly horizontally flipped
3. is randomly color jitteres 3. is randomly color jitteres
4. on/off filteres 4. returned.
5. returned.
This is a 3 channel version (e.g. r,g,b channels). This is a 3 channel version (e.g. r,g,b channels).
""" """
@ -369,54 +305,13 @@ class DatasetCIFAR(DatasetMaster):
# Preprocess the input data # Preprocess the input data
pattern = scripted_transforms(pattern) pattern = scripted_transforms(pattern)
# => On/Off r = pattern[:, 0:1, :, :] + 1e-20
my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) g = pattern[:, 1:2, :, :] + 1e-20
my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) b = pattern[:, 2:3, :, :] + 1e-20
my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2])
r: torch.Tensor = my_on_off_filter_r(
pattern[:, 0:1, :, :],
)
g: torch.Tensor = my_on_off_filter_g(
pattern[:, 1:2, :, :],
)
b: torch.Tensor = my_on_off_filter_b(
pattern[:, 2:3, :, :],
)
new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1)
return new_tensor return new_tensor
class OnOffFilter(torch.nn.Module):
def __init__(self, p: float = 0.5) -> None:
super(OnOffFilter, self).__init__()
self.p: float = p
def forward(self, tensor: torch.Tensor) -> torch.Tensor:
assert tensor.shape[1] == 1
tensor_clone = 2.0 * (tensor - self.p)
temp_0: torch.Tensor = torch.where(
tensor_clone < 0.0,
-tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
temp_1: torch.Tensor = torch.where(
tensor_clone >= 0.0,
tensor_clone,
tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype),
)
new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1)
return new_tensor
def __repr__(self) -> str:
return self.__class__.__name__ + "(p={0})".format(self.p)
if __name__ == "__main__": if __name__ == "__main__":
pass pass

View file

@ -1,34 +1,3 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
# %% # %%
from dataclasses import dataclass, field from dataclasses import dataclass, field
import numpy as np import numpy as np
@ -41,18 +10,14 @@ class Network:
"""Parameters of the network. The details about """Parameters of the network. The details about
its layers and the number of output neurons.""" its layers and the number of output neurons."""
number_of_output_neurons: int = field(default=0) layer_type: list[str] = field(default_factory=list)
forward_neuron_numbers: list[list[int]] = field(default_factory=list) forward_neuron_numbers: list[list[int]] = field(default_factory=list)
is_pooling_layer: list[bool] = field(default_factory=list)
forward_kernel_size: list[list[int]] = field(default_factory=list) forward_kernel_size: list[list[int]] = field(default_factory=list)
strides: list[list[int]] = field(default_factory=list) strides: list[list[int]] = field(default_factory=list)
dilation: list[list[int]] = field(default_factory=list) dilation: list[list[int]] = field(default_factory=list)
padding: list[list[int]] = field(default_factory=list) padding: list[list[int]] = field(default_factory=list)
w_trainable: list[bool] = field(default_factory=list) number_of_output_neurons: int = field(default=0)
eps_xy_trainable: list[bool] = field(default_factory=list)
eps_xy_mean: list[bool] = field(default_factory=list)
@dataclass @dataclass
@ -61,35 +26,35 @@ class LearningParameters:
learning_active: bool = field(default=True) learning_active: bool = field(default=True)
loss_mode: int = field(default=0)
loss_coeffs_mse: float = field(default=0.5) loss_coeffs_mse: float = field(default=0.5)
loss_coeffs_kldiv: float = field(default=1.0) loss_coeffs_kldiv: float = field(default=1.0)
optimizer_name: str = field(default="Adam") optimizer_name: str = field(default="Adam")
learning_rate_gamma_w: float = field(default=-1.0) learning_rate_gamma_w: float = field(default=-1.0)
learning_rate_gamma_eps_xy: float = field(default=-1.0)
learning_rate_threshold_w: float = field(default=0.00001) learning_rate_threshold_w: float = field(default=0.00001)
learning_rate_threshold_eps_xy: float = field(default=0.00001)
lr_schedule_name: str = field(default="ReduceLROnPlateau") lr_schedule_name: str = field(default="ReduceLROnPlateau")
lr_scheduler_use_performance: bool = field(default=True) lr_scheduler_use_performance: bool = field(default=False)
lr_scheduler_factor_w: float = field(default=0.75) lr_scheduler_factor_w: float = field(default=0.75)
lr_scheduler_patience_w: int = field(default=-1) lr_scheduler_patience_w: int = field(default=-1)
lr_scheduler_tau_w: int = field(default=10) lr_scheduler_tau_w: int = field(default=10)
lr_scheduler_factor_eps_xy: float = field(default=0.75)
lr_scheduler_patience_eps_xy: int = field(default=-1)
lr_scheduler_tau_eps_xy: int = field(default=10)
number_of_batches_for_one_update: int = field(default=1) number_of_batches_for_one_update: int = field(default=1)
overload_path: str = field(default="./Previous") overload_path: str = field(default="Previous")
weight_noise_amplitude: float = field(default=0.01) weight_noise_range: list[float] = field(default_factory=list)
eps_xy_intitial: float = field(default=0.1) eps_xy_intitial: float = field(default=0.1)
test_every_x_learning_steps: int = field(default=50) # disable_scale_grade: bool = field(default=False)
test_during_learning: bool = field(default=True) # kepp_last_grad_scale: bool = field(default=True)
alpha_number_of_iterations: int = field(default=0) sbs_skip_gradient_calculation: list[bool] = field(default_factory=list)
adapt_learning_rate_after_minibatch: bool = field(default=True)
w_trainable: list[bool] = field(default_factory=list)
@dataclass @dataclass
@ -105,8 +70,6 @@ class Augmentation:
jitter_saturation: float = field(default=0.1) jitter_saturation: float = field(default=0.1)
jitter_hue: float = field(default=0.15) jitter_hue: float = field(default=0.15)
use_on_off_filter: bool = field(default=True)
@dataclass @dataclass
class ImageStatistics: class ImageStatistics:
@ -117,6 +80,14 @@ class ImageStatistics:
the_size: list[int] = field(default_factory=list) the_size: list[int] = field(default_factory=list)
@dataclass
class ApproximationSetting:
# Approximation CONV2D Layer
approximation_enable: list[bool] = field(default_factory=list)
number_of_trunc_bits: list[int] = field(default_factory=list)
number_of_frac_bits: list[int] = field(default_factory=list)
@dataclass @dataclass
class Config: class Config:
"""Master config class.""" """Master config class."""
@ -126,25 +97,49 @@ class Config:
learning_parameters: LearningParameters = field(default_factory=LearningParameters) learning_parameters: LearningParameters = field(default_factory=LearningParameters)
augmentation: Augmentation = field(default_factory=Augmentation) augmentation: Augmentation = field(default_factory=Augmentation)
image_statistics: ImageStatistics = field(default_factory=ImageStatistics) image_statistics: ImageStatistics = field(default_factory=ImageStatistics)
approximation_setting: ApproximationSetting = field(
default_factory=ApproximationSetting
)
# For labeling simulations
# (not actively used)
simulation_id: int = field(default=0)
stage_id: int = field(default=-1)
# Size of one sub-mini-batch
# (the number of pattern processed at the same time)
batch_size: int = field(default=500) batch_size: int = field(default=500)
# The data set
# Identifier for Dataset.oy
data_mode: str = field(default="") data_mode: str = field(default="")
# The path to the data set
data_path: str = field(default="")
learning_step: int = field(default=0) # The epochs identifier
learning_step_max: int = field(default=10000) epoch_id: int = field(default=0)
# Maximum number of epochs
epoch_id_max: int = field(default=10000)
# Number of cpu threads
number_of_cpu_processes: int = field(default=-1) number_of_cpu_processes: int = field(default=-1)
# Adjust the number of pattern processed in
# one step to the amount of core or with HT threads
# of the cpu
enable_cpu_thread_balacing: bool = field(default=True)
number_of_spikes: int = field(default=0) # Path for storing information
weight_path: str = field(default="Parameters")
log_path: str = field(default="Log")
# Other SbS Settings
number_of_spikes: list[int] = field(default_factory=list)
cooldown_after_number_of_spikes: int = field(default=-1) cooldown_after_number_of_spikes: int = field(default=-1)
weight_path: str = field(default="./Weights/")
eps_xy_path: str = field(default="./EpsXY/")
data_path: str = field(default="./")
results_path: str = field(default="./Results")
reduction_cooldown: float = field(default=25.0) reduction_cooldown: float = field(default=25.0)
epsilon_0: float = field(default=1.0) epsilon_0: float = field(default=1.0)
forgetting_offset: float = field(default=-1.0)
def __post_init__(self) -> None: def __post_init__(self) -> None:
"""Post init determines the number of cores. """Post init determines the number of cores.
@ -159,10 +154,8 @@ class Config:
self.number_of_cpu_processes = number_of_cpu_processes_temp self.number_of_cpu_processes = number_of_cpu_processes_temp
os.makedirs(self.weight_path, exist_ok=True) os.makedirs(self.weight_path, exist_ok=True)
os.makedirs(self.eps_xy_path, exist_ok=True)
os.makedirs(self.data_path, exist_ok=True)
os.makedirs(self.results_path, exist_ok=True)
if self.enable_cpu_thread_balacing is True:
self.batch_size = ( self.batch_size = (
self.batch_size // self.number_of_cpu_processes self.batch_size // self.number_of_cpu_processes
) * self.number_of_cpu_processes ) * self.number_of_cpu_processes
@ -170,14 +163,18 @@ class Config:
self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes)) self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes))
self.batch_size = int(self.batch_size) self.batch_size = int(self.batch_size)
def get_epsilon_t(self): def get_epsilon_t(self, number_of_spikes: int):
"""Generates the time series of the basic epsilon.""" """Generates the time series of the basic epsilon."""
np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32) t = np.arange(0, number_of_spikes, dtype=np.float32) + 1
if (self.cooldown_after_number_of_spikes < self.number_of_spikes) and ( np_epsilon_t: np.ndarray = t ** (
-1.0 / 2.0
) # np.ones((number_of_spikes), dtype=np.float32)
if (self.cooldown_after_number_of_spikes < number_of_spikes) and (
self.cooldown_after_number_of_spikes >= 0 self.cooldown_after_number_of_spikes >= 0
): ):
np_epsilon_t[ np_epsilon_t[
self.cooldown_after_number_of_spikes : self.number_of_spikes self.cooldown_after_number_of_spikes : number_of_spikes
] /= self.reduction_cooldown ] /= self.reduction_cooldown
return torch.tensor(np_epsilon_t) return torch.tensor(np_epsilon_t)

714
network/SbS.py Normal file
View file

@ -0,0 +1,714 @@
import torch
from network.CPP.PySpikeGeneration2DManyIP import SpikeGeneration2DManyIP
from network.CPP.PyHDynamicCNNManyIP import HDynamicCNNManyIP
from network.calculate_output_size import calculate_output_size
class SbS(torch.nn.Module):
_epsilon_xy: torch.Tensor | None = None
_epsilon_0: float
_epsilon_t: torch.Tensor | None = None
_weights: torch.nn.parameter.Parameter
_weights_exists: bool = False
_kernel_size: list[int]
_stride: list[int]
_dilation: list[int]
_padding: list[int]
_output_size: torch.Tensor
_number_of_spikes: int
_number_of_cpu_processes: int
_number_of_neurons: int
_number_of_input_neurons: int
_epsilon_xy_intitial: float
_h_initial: torch.Tensor | None = None
_w_trainable: bool
# _last_grad_scale: torch.nn.parameter.Parameter
# _keep_last_grad_scale: bool
# _disable_scale_grade: bool
_forgetting_offset: torch.Tensor | None = None
_weight_noise_range: list[float]
_skip_gradient_calculation: bool
_is_pooling_layer: bool
_input_size: list[int]
_output_layer: bool = False
_local_learning: bool = False
device: torch.device
default_dtype: torch.dtype
_gpu_tuning_factor: int
_max_grad_weights: torch.Tensor | None = None
_number_of_grad_weight_contributions: float = 0.0
def __init__(
self,
number_of_input_neurons: int,
number_of_neurons: int,
input_size: list[int],
forward_kernel_size: list[int],
number_of_spikes: int,
epsilon_t: torch.Tensor,
epsilon_xy_intitial: float = 0.1,
epsilon_0: float = 1.0,
weight_noise_range: list[float] = [0.0, 1.0],
is_pooling_layer: bool = False,
strides: list[int] = [1, 1],
dilation: list[int] = [0, 0],
padding: list[int] = [0, 0],
number_of_cpu_processes: int = 1,
w_trainable: bool = False,
# keep_last_grad_scale: bool = False,
# disable_scale_grade: bool = True,
forgetting_offset: float = -1.0,
skip_gradient_calculation: bool = False,
device: torch.device | None = None,
default_dtype: torch.dtype | None = None,
gpu_tuning_factor: int = 5,
) -> None:
super().__init__()
assert device is not None
assert default_dtype is not None
self.device = device
self.default_dtype = default_dtype
self._w_trainable = bool(w_trainable)
# self._keep_last_grad_scale = bool(keep_last_grad_scale)
self._skip_gradient_calculation = bool(skip_gradient_calculation)
# self._disable_scale_grade = bool(disable_scale_grade)
self._epsilon_xy_intitial = float(epsilon_xy_intitial)
self._stride = strides
self._dilation = dilation
self._padding = padding
self._kernel_size = forward_kernel_size
self._number_of_input_neurons = int(number_of_input_neurons)
self._number_of_neurons = int(number_of_neurons)
self._epsilon_0 = float(epsilon_0)
self._number_of_cpu_processes = int(number_of_cpu_processes)
self._number_of_spikes = int(number_of_spikes)
self._weight_noise_range = weight_noise_range
self._is_pooling_layer = bool(is_pooling_layer)
assert len(input_size) == 2
self._input_size = input_size
# The GPU hates me...
# Too many SbS threads == bad
# Thus I need to limit them...
# (Reminder: We cannot access the mini-batch size here,
# which is part of the GPU thread size calculation...)
if (self._input_size[0] * self._input_size[1]) > gpu_tuning_factor:
self._gpu_tuning_factor = gpu_tuning_factor
else:
self._gpu_tuning_factor = 0
# self._last_grad_scale = torch.nn.parameter.Parameter(
# torch.tensor(-1.0, dtype=self.default_dtype),
# requires_grad=True,
# )
self._forgetting_offset = torch.tensor(
forgetting_offset, dtype=self.default_dtype, device=self.device
)
self.epsilon_t = epsilon_t.type(dtype=self.default_dtype).to(device=self.device)
self._output_size = calculate_output_size(
value=input_size,
kernel_size=self._kernel_size,
stride=self._stride,
dilation=self._dilation,
padding=self._padding,
)
self.set_h_init_to_uniform()
self.functional_sbs = FunctionalSbS.apply
# ###############################################################
# Initialize the weights
# ###############################################################
if self._is_pooling_layer is True:
self.weights = self._make_pooling_weights()
else:
assert len(self._weight_noise_range) == 2
weights = torch.empty(
(
int(self._kernel_size[0])
* int(self._kernel_size[1])
* int(self._number_of_input_neurons),
int(self._number_of_neurons),
),
dtype=self.default_dtype,
device=self.device,
)
torch.nn.init.uniform_(
weights,
a=float(self._weight_noise_range[0]),
b=float(self._weight_noise_range[1]),
)
self.weights = weights
####################################################################
# Variables in and out #
####################################################################
@property
def epsilon_t(self) -> torch.Tensor | None:
return self._epsilon_t
@epsilon_t.setter
def epsilon_t(self, value: torch.Tensor):
assert value is not None
assert torch.is_tensor(value) is True
assert value.dim() == 1
assert value.dtype == self.default_dtype
self._epsilon_t = (
value.detach()
.clone(memory_format=torch.contiguous_format)
.type(dtype=self.default_dtype)
.to(device=self.device)
.requires_grad_(False)
)
@property
def weights(self) -> torch.Tensor | None:
if self._weights_exists is False:
return None
else:
return self._weights
@weights.setter
def weights(self, value: torch.Tensor):
assert value is not None
assert torch.is_tensor(value) is True
assert value.dim() == 2
temp: torch.Tensor = (
value.detach()
.clone(memory_format=torch.contiguous_format)
.type(dtype=self.default_dtype)
.to(device=self.device)
)
temp /= temp.sum(dim=0, keepdim=True, dtype=self.default_dtype)
if self._weights_exists is False:
self._weights = torch.nn.parameter.Parameter(temp, requires_grad=True)
self._weights_exists = True
else:
self._weights.data = temp
@property
def h_initial(self) -> torch.Tensor | None:
return self._h_initial
@h_initial.setter
def h_initial(self, value: torch.Tensor):
assert value is not None
assert torch.is_tensor(value) is True
assert value.dim() == 1
assert value.dtype == self.default_dtype
self._h_initial = (
value.detach()
.clone(memory_format=torch.contiguous_format)
.type(dtype=self.default_dtype)
.to(device=self.device)
.requires_grad_(False)
)
def update_pre_care(self):
if self._weights.grad is not None:
assert self._number_of_grad_weight_contributions > 0
self._weights.grad /= self._number_of_grad_weight_contributions
self._number_of_grad_weight_contributions = 0.0
def update_after_care(self, threshold_weight: float):
if self._w_trainable is True:
self.norm_weights()
self.threshold_weights(threshold_weight)
self.norm_weights()
# def after_batch(self, new_state: bool = False):
# if self._keep_last_grad_scale is True:
# self._last_grad_scale.data = self._last_grad_scale.grad
# self._keep_last_grad_scale = new_state
# self._last_grad_scale.grad = torch.zeros_like(self._last_grad_scale.grad)
####################################################################
# Helper functions #
####################################################################
def _make_pooling_weights(self) -> torch.Tensor:
"""For generating the pooling weights."""
assert self._number_of_neurons is not None
assert self._kernel_size is not None
weights: torch.Tensor = torch.zeros(
(
int(self._kernel_size[0]),
int(self._kernel_size[1]),
int(self._number_of_neurons),
int(self._number_of_neurons),
),
dtype=self.default_dtype,
device=self.device,
)
for i in range(0, int(self._number_of_neurons)):
weights[:, :, i, i] = 1.0
weights = weights.moveaxis(-1, 0).moveaxis(-1, 1)
weights = torch.nn.functional.unfold(
input=weights,
kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])),
dilation=(1, 1),
padding=(0, 0),
stride=(1, 1),
).squeeze()
weights = torch.moveaxis(weights, 0, 1)
return weights
def set_h_init_to_uniform(self) -> None:
assert self._number_of_neurons > 2
self.h_initial: torch.Tensor = torch.full(
(self._number_of_neurons,),
(1.0 / float(self._number_of_neurons)),
dtype=self.default_dtype,
device=self.device,
)
def norm_weights(self) -> None:
assert self._weights_exists is True
temp: torch.Tensor = (
self._weights.data.detach()
.clone(memory_format=torch.contiguous_format)
.type(dtype=self.default_dtype)
.to(device=self.device)
)
temp /= temp.sum(dim=0, keepdim=True, dtype=self.default_dtype)
self._weights.data = temp
def threshold_weights(self, threshold: float) -> None:
assert self._weights_exists is True
assert threshold >= 0
torch.clamp(
self._weights.data,
min=float(threshold),
max=None,
out=self._weights.data,
)
####################################################################
# Forward #
####################################################################
def forward(
self, input: torch.Tensor, labels: torch.Tensor | None = None
) -> torch.Tensor:
# Are we happy with the input?
assert input is not None
assert torch.is_tensor(input) is True
assert input.dim() == 4
assert input.dtype == self.default_dtype
assert input.shape[1] == self._number_of_input_neurons
assert input.shape[2] == self._input_size[0]
assert input.shape[3] == self._input_size[1]
# Are we happy with the rest of the network?
assert self._epsilon_0 is not None
assert self._epsilon_t is not None
assert self._h_initial is not None
assert self._forgetting_offset is not None
assert self._weights_exists is True
assert self._weights is not None
input_convolved = torch.nn.functional.fold(
torch.nn.functional.unfold(
input.requires_grad_(True),
kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])),
dilation=(int(self._dilation[0]), int(self._dilation[1])),
padding=(int(self._padding[0]), int(self._padding[1])),
stride=(int(self._stride[0]), int(self._stride[1])),
),
output_size=tuple(self._output_size.tolist()),
kernel_size=(1, 1),
dilation=(1, 1),
padding=(0, 0),
stride=(1, 1),
)
epsilon_t_0: torch.Tensor = (
(self._epsilon_t * self._epsilon_0).type(input.dtype).to(input.device)
)
parameter_list = torch.tensor(
[
int(self._w_trainable), # 0
int(0), # int(self._disable_scale_grade), # 1
int(0), # int(self._keep_last_grad_scale), # 2
int(self._skip_gradient_calculation), # 3
int(self._number_of_spikes), # 4
int(self._number_of_cpu_processes), # 5
int(self._output_size[0]), # 6
int(self._output_size[1]), # 7
int(self._gpu_tuning_factor), # 8
int(self._output_layer), # 9
int(self._local_learning), # 10
],
dtype=torch.int64,
)
if self._epsilon_xy is None:
self._epsilon_xy = torch.full(
(
input_convolved.shape[1],
input_convolved.shape[2],
input_convolved.shape[3],
),
float(self._epsilon_xy_intitial),
dtype=self.default_dtype,
device=self.device,
)
assert self._epsilon_xy is not None
# In the case somebody tried to replace the matrix with wrong dimensions
assert self._epsilon_xy.shape[0] == input_convolved.shape[1]
assert self._epsilon_xy.shape[1] == input_convolved.shape[2]
assert self._epsilon_xy.shape[2] == input_convolved.shape[3]
# SbS forward functional
output = self.functional_sbs(
input_convolved,
self._epsilon_xy,
epsilon_t_0,
self._weights,
self._h_initial,
parameter_list,
# self._last_grad_scale,
self._forgetting_offset,
)
self._number_of_grad_weight_contributions += (
output.shape[0] * output.shape[-2] * output.shape[-1]
)
return output
class FunctionalSbS(torch.autograd.Function):
@staticmethod
def forward( # type: ignore
ctx,
input: torch.Tensor,
epsilon_xy: torch.Tensor,
epsilon_t_0: torch.Tensor,
weights: torch.Tensor,
h_initial: torch.Tensor,
parameter_list: torch.Tensor,
# grad_output_scale: torch.Tensor,
forgetting_offset: torch.Tensor,
) -> torch.Tensor:
assert input.dim() == 4
number_of_spikes: int = int(parameter_list[4])
if input.device == torch.device("cpu"):
spike_number_of_cpu_processes: int = int(parameter_list[5])
else:
spike_number_of_cpu_processes = -1
if input.device == torch.device("cpu"):
hdyn_number_of_cpu_processes: int = int(parameter_list[5])
else:
hdyn_number_of_cpu_processes = -1
output_size_0: int = int(parameter_list[6])
output_size_1: int = int(parameter_list[7])
gpu_tuning_factor: int = int(parameter_list[8])
# ###########################################################
# Spike generation
# ###########################################################
# ############################################
# Normalized cumsum
# (beware of the pytorch bug! Thus .clone()!)
# ############################################
input_cumsum: torch.Tensor = torch.cumsum(input, dim=1, dtype=input.dtype)
input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1).clone()
input_cumsum /= input_cumsum_last
# ############################################
# Get the required random numbers
# ############################################
random_values = torch.rand(
size=[
input_cumsum.shape[0],
number_of_spikes,
input_cumsum.shape[2],
input_cumsum.shape[3],
],
dtype=input.dtype,
device=input.device,
)
# ############################################
# Make space for the results
# ############################################
spikes = torch.empty_like(random_values, dtype=torch.int64, device=input.device)
assert input_cumsum.is_contiguous() is True
assert random_values.is_contiguous() is True
assert spikes.is_contiguous() is True
# time_start: float = time.perf_counter()
spike_generation: SpikeGeneration2DManyIP = SpikeGeneration2DManyIP()
spike_generation.spike_generation(
input_cumsum.data_ptr(),
int(input_cumsum.shape[0]),
int(input_cumsum.shape[1]),
int(input_cumsum.shape[2]),
int(input_cumsum.shape[3]),
random_values.data_ptr(),
int(random_values.shape[0]),
int(random_values.shape[1]),
int(random_values.shape[2]),
int(random_values.shape[3]),
spikes.data_ptr(),
int(spikes.shape[0]),
int(spikes.shape[1]),
int(spikes.shape[2]),
int(spikes.shape[3]),
int(spike_number_of_cpu_processes),
)
del random_values
del input_cumsum
# ###########################################################
# H dynamic
# ###########################################################
assert epsilon_t_0.ndim == 1
assert epsilon_t_0.shape[0] >= number_of_spikes
# ############################################
# Make space for the results
# ############################################
output = torch.empty(
(
int(input.shape[0]),
int(weights.shape[1]),
output_size_0,
output_size_1,
),
dtype=input.dtype,
device=input.device,
)
assert output.is_contiguous() is True
assert epsilon_xy.is_contiguous() is True
assert epsilon_t_0.is_contiguous() is True
assert weights.is_contiguous() is True
assert spikes.is_contiguous() is True
assert h_initial.is_contiguous() is True
assert epsilon_xy.ndim == 3
assert weights.ndim == 2
assert h_initial.ndim == 1
h_dynamic: HDynamicCNNManyIP = HDynamicCNNManyIP()
h_dynamic.update(
output.data_ptr(),
int(output.shape[0]),
int(output.shape[1]),
int(output.shape[2]),
int(output.shape[3]),
epsilon_xy.data_ptr(),
int(epsilon_xy.shape[0]),
int(epsilon_xy.shape[1]),
int(epsilon_xy.shape[2]),
epsilon_t_0.data_ptr(),
int(epsilon_t_0.shape[0]),
weights.data_ptr(),
int(weights.shape[0]),
int(weights.shape[1]),
spikes.data_ptr(),
int(spikes.shape[0]),
int(spikes.shape[1]),
int(spikes.shape[2]),
int(spikes.shape[3]),
h_initial.data_ptr(),
int(h_initial.shape[0]),
hdyn_number_of_cpu_processes,
float(forgetting_offset.item()),
int(gpu_tuning_factor),
)
del spikes
# ###########################################################
# Save the necessary data for the backward pass
# ###########################################################
ctx.save_for_backward(
input,
weights,
output,
parameter_list,
# grad_output_scale,
)
return output
@staticmethod
def backward(ctx, grad_output):
# ##############################################
# Get the variables back
# ##############################################
(
input,
weights,
output,
parameter_list,
# last_grad_scale,
) = ctx.saved_tensors
# ##############################################
# Default output
# ##############################################
grad_input = None
grad_eps_xy = None
grad_epsilon_t_0 = None
grad_weights = None
grad_h_initial = None
grad_parameter_list = None
grad_forgetting_offset = None
# ##############################################
# Parameters
# ##############################################
parameter_w_trainable: bool = bool(parameter_list[0])
# parameter_disable_scale_grade: bool = bool(parameter_list[1])
# parameter_keep_last_grad_scale: bool = bool(parameter_list[2])
parameter_skip_gradient_calculation: bool = bool(parameter_list[3])
parameter_output_layer: bool = bool(parameter_list[9])
parameter_local_learning: bool = bool(parameter_list[10])
# ##############################################
# Dealing with overall scale of the gradient
# ##############################################
# if parameter_disable_scale_grade is False:
# if parameter_keep_last_grad_scale is True:
# last_grad_scale = torch.tensor(
# [torch.abs(grad_output).max(), last_grad_scale]
# ).max()
# grad_output /= last_grad_scale
# grad_output_scale = last_grad_scale.clone()
input /= input.sum(dim=1, keepdim=True, dtype=weights.dtype)
# #################################################
# User doesn't want us to calculate the gradients
# #################################################
if parameter_skip_gradient_calculation is True:
return (
grad_input,
grad_eps_xy,
grad_epsilon_t_0,
grad_weights,
grad_h_initial,
grad_parameter_list,
# grad_output_scale,
grad_forgetting_offset,
)
# #################################################
# Calculate backprop error (grad_input)
# #################################################
backprop_r: torch.Tensor = weights.unsqueeze(0).unsqueeze(-1).unsqueeze(
-1
) * output.unsqueeze(1)
backprop_bigr: torch.Tensor = backprop_r.sum(dim=2)
backprop_z: torch.Tensor = backprop_r * (
1.0 / (backprop_bigr + 1e-20)
).unsqueeze(2)
grad_input: torch.Tensor = (backprop_z * grad_output.unsqueeze(1)).sum(2)
del backprop_z
# #################################################
# Calculate weight gradient (grad_weights)
# #################################################
if parameter_w_trainable is False:
# #################################################
# We don't train this weight
# #################################################
grad_weights = None
elif (parameter_output_layer is False) and (parameter_local_learning is True):
# #################################################
# Local learning
# #################################################
grad_weights = (
(-2 * (input - backprop_bigr).unsqueeze(2) * output.unsqueeze(1))
.sum(0)
.sum(-1)
.sum(-1)
)
else:
# #################################################
# Backprop
# #################################################
backprop_f: torch.Tensor = output.unsqueeze(1) * (
input / (backprop_bigr**2 + 1e-20)
).unsqueeze(2)
result_omega: torch.Tensor = backprop_bigr.unsqueeze(
2
) * grad_output.unsqueeze(1)
result_omega -= (backprop_r * grad_output.unsqueeze(1)).sum(2).unsqueeze(2)
result_omega *= backprop_f
del backprop_f
grad_weights = result_omega.sum(0).sum(-1).sum(-1)
del result_omega
del backprop_bigr
del backprop_r
return (
grad_input,
grad_eps_xy,
grad_epsilon_t_0,
grad_weights,
grad_h_initial,
grad_parameter_list,
# grad_output_scale,
grad_forgetting_offset,
)

View file

@ -0,0 +1,54 @@
import torch
class SplitOnOffLayer(torch.nn.Module):
device: torch.device
default_dtype: torch.dtype
mean: torch.Tensor | None = None
epsilon: float = 0.01
def __init__(
self,
device: torch.device | None = None,
default_dtype: torch.dtype | None = None,
) -> None:
super().__init__()
assert device is not None
assert default_dtype is not None
self.device = device
self.default_dtype = default_dtype
####################################################################
# Forward #
####################################################################
def forward(self, input: torch.Tensor) -> torch.Tensor:
assert input.ndim == 4
# self.training is switched by network.eval() and network.train()
if self.training is True:
mean_temp = (
input.mean(dim=0, keepdim=True)
.mean(dim=1, keepdim=True)
.detach()
.clone()
)
if self.mean is None:
self.mean = mean_temp
else:
self.mean = (1.0 - self.epsilon) * self.mean + self.epsilon * mean_temp
assert self.mean is not None
temp = input - self.mean.detach().clone()
temp_a = torch.nn.functional.relu(temp)
temp_b = torch.nn.functional.relu(-temp)
output = torch.cat((temp_a, temp_b), dim=1)
output /= output.sum(dim=1, keepdim=True) + 1e-20
return output

68
network/build_datasets.py Normal file
View file

@ -0,0 +1,68 @@
# %%
import torch
from network.Dataset import (
DatasetMaster,
DatasetCIFAR,
DatasetMNIST,
DatasetFashionMNIST,
)
from network.Parameter import Config
def build_datasets(
cfg: Config,
) -> tuple[
DatasetMaster,
DatasetMaster,
torch.utils.data.DataLoader,
torch.utils.data.DataLoader,
]:
# Load the input data
the_dataset_train: DatasetMaster
the_dataset_test: DatasetMaster
if cfg.data_mode == "CIFAR10":
the_dataset_train = DatasetCIFAR(
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
)
the_dataset_test = DatasetCIFAR(
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
)
elif cfg.data_mode == "MNIST":
the_dataset_train = DatasetMNIST(
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
)
the_dataset_test = DatasetMNIST(
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
)
elif cfg.data_mode == "MNIST_FASHION":
the_dataset_train = DatasetFashionMNIST(
train=True, path_pattern=cfg.data_path, path_label=cfg.data_path
)
the_dataset_test = DatasetFashionMNIST(
train=False, path_pattern=cfg.data_path, path_label=cfg.data_path
)
else:
raise Exception("data_mode unknown")
if len(cfg.image_statistics.mean) == 0:
cfg.image_statistics.mean = the_dataset_train.mean
# The basic size
cfg.image_statistics.the_size = [
the_dataset_train.pattern_storage.shape[2],
the_dataset_train.pattern_storage.shape[3],
]
# Minus the stuff we cut away in the pattern filter
cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel
cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel
my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
the_dataset_test, batch_size=cfg.batch_size, shuffle=False
)
my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
the_dataset_train, batch_size=cfg.batch_size, shuffle=True
)
return the_dataset_train, the_dataset_test, my_loader_test, my_loader_train

View file

@ -0,0 +1,86 @@
# %%
import torch
from network.Parameter import Config
try:
from network.SbSLRScheduler import SbSLRScheduler
sbs_lr_scheduler: bool = True
except Exception:
sbs_lr_scheduler = False
def build_lr_scheduler(
optimizer, cfg: Config, logging
) -> list[torch.optim.lr_scheduler.ReduceLROnPlateau | SbSLRScheduler | None]:
assert len(optimizer) > 0
lr_scheduler_list: list[
torch.optim.lr_scheduler.ReduceLROnPlateau | SbSLRScheduler | None
] = []
for id_optimizer in range(0, len(optimizer)):
if cfg.learning_parameters.lr_schedule_name == "None":
logging.info(f"Using lr scheduler for optimizer {id_optimizer} : None")
lr_scheduler_list.append(None)
elif cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau":
logging.info(
f"Using lr scheduler for optimizer {id_optimizer}: ReduceLROnPlateau"
)
if optimizer[id_optimizer] is None:
lr_scheduler_list.append(None)
elif (cfg.learning_parameters.lr_scheduler_factor_w <= 0) or (
cfg.learning_parameters.lr_scheduler_patience_w <= 0
):
lr_scheduler_list.append(
torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer[id_optimizer],eps=1e-14,
)
)
else:
lr_scheduler_list.append(
torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer[id_optimizer],
factor=cfg.learning_parameters.lr_scheduler_factor_w,
patience=cfg.learning_parameters.lr_scheduler_patience_w,
eps=1e-14,
)
)
elif cfg.learning_parameters.lr_schedule_name == "SbSLRScheduler":
logging.info(
f"Using lr scheduler for optimizer {id_optimizer}: SbSLRScheduler"
)
if sbs_lr_scheduler is False:
raise Exception(
f"lr_scheduler for optimizer {id_optimizer}: SbSLRScheduler.py missing"
)
if optimizer[id_optimizer] is None:
lr_scheduler_list.append(None)
elif (
(cfg.learning_parameters.lr_scheduler_factor_w <= 0)
or (cfg.learning_parameters.lr_scheduler_patience_w <= 0)
or (cfg.learning_parameters.lr_scheduler_tau_w <= 0)
):
lr_scheduler_list.append(None)
else:
lr_scheduler_list.append(
SbSLRScheduler(
optimizer[id_optimizer],
factor=cfg.learning_parameters.lr_scheduler_factor_w,
patience=cfg.learning_parameters.lr_scheduler_patience_w,
tau=cfg.learning_parameters.lr_scheduler_tau_w,
)
)
else:
raise Exception("lr_scheduler not implemented")
return lr_scheduler_list

354
network/build_network.py Normal file
View file

@ -0,0 +1,354 @@
# %%
import torch
from network.calculate_output_size import calculate_output_size
from network.Parameter import Config
from network.SbS import SbS
from network.SplitOnOffLayer import SplitOnOffLayer
from network.Conv2dApproximation import Conv2dApproximation
def build_network(
cfg: Config, device: torch.device, default_dtype: torch.dtype, logging
) -> torch.nn.Sequential:
network = torch.nn.Sequential()
input_size: list[list[int]] = []
input_size.append(cfg.image_statistics.the_size)
for layer_id in range(0, len(cfg.network_structure.layer_type)):
# #############################################################
# Show infos about the layer:
# #############################################################
logging.info("")
logging.info(f"Layer ID: {layer_id}")
logging.info(f"Layer type: {cfg.network_structure.layer_type[layer_id]}")
# #############################################################
# Fill in the default values
# #############################################################
kernel_size: list[int] = [1, 1]
if len(cfg.network_structure.forward_kernel_size) > layer_id:
kernel_size = cfg.network_structure.forward_kernel_size[layer_id]
padding: list[int] = [0, 0]
if len(cfg.network_structure.padding) > layer_id:
padding = cfg.network_structure.padding[layer_id]
dilation: list[int] = [1, 1]
if len(cfg.network_structure.dilation) > layer_id:
dilation = cfg.network_structure.dilation[layer_id]
strides: list[int] = [1, 1]
if len(cfg.network_structure.strides) > layer_id:
if len(cfg.network_structure.strides[layer_id]) == 2:
strides = cfg.network_structure.strides[layer_id]
in_channels: int = -1
out_channels: int = -1
if len(cfg.network_structure.forward_neuron_numbers) > layer_id:
if len(cfg.network_structure.forward_neuron_numbers[layer_id]) == 2:
in_channels = cfg.network_structure.forward_neuron_numbers[layer_id][0]
out_channels = cfg.network_structure.forward_neuron_numbers[layer_id][1]
weight_noise_range: list[float] = [1.0, 1.1]
if len(cfg.learning_parameters.weight_noise_range) == 2:
weight_noise_range = [
float(cfg.learning_parameters.weight_noise_range[0]),
float(cfg.learning_parameters.weight_noise_range[1]),
]
logging.info(f"Input channels: {in_channels}")
logging.info(f"Output channels: {out_channels}")
logging.info(f"Kernel size: {kernel_size}")
logging.info(f"Stride: {strides}")
logging.info(f"Dilation: {dilation}")
logging.info(f"Padding: {padding}")
# Conv2D
bias: bool = True
# Approx settings
approximation_enable: bool = False
if len(cfg.approximation_setting.approximation_enable) > layer_id:
approximation_enable = cfg.approximation_setting.approximation_enable[
layer_id
]
logging.info(f"Approximation Enable: {approximation_enable}")
elif len(cfg.approximation_setting.approximation_enable) == 1:
approximation_enable = cfg.approximation_setting.approximation_enable[0]
logging.info(f"Approximation Enable: {approximation_enable}")
number_of_trunc_bits: int = -1
if len(cfg.approximation_setting.number_of_trunc_bits) > layer_id:
number_of_trunc_bits = cfg.approximation_setting.number_of_trunc_bits[
layer_id
]
logging.info(f"Number of trunc bits: {number_of_trunc_bits}")
elif len(cfg.approximation_setting.number_of_trunc_bits) == 1:
number_of_trunc_bits = cfg.approximation_setting.number_of_trunc_bits[0]
logging.info(f"Number of trunc bits: {number_of_trunc_bits}")
number_of_frac_bits: int = -1
if len(cfg.approximation_setting.number_of_frac_bits) > layer_id:
number_of_frac_bits = cfg.approximation_setting.number_of_frac_bits[
layer_id
]
logging.info(f"Number of frac bits: {number_of_trunc_bits}")
elif len(cfg.approximation_setting.number_of_frac_bits) == 1:
number_of_frac_bits = cfg.approximation_setting.number_of_frac_bits[0]
logging.info(f"Number of frac bits: {number_of_trunc_bits}")
# Weights: Trainable?
w_trainable: bool = False
if len(cfg.learning_parameters.w_trainable) > layer_id:
w_trainable = cfg.learning_parameters.w_trainable[layer_id]
elif len(cfg.learning_parameters.w_trainable) == 1:
w_trainable = cfg.learning_parameters.w_trainable[0]
logging.info(f"W trainable?: {w_trainable}")
# SbS Setting
sbs_skip_gradient_calculation: bool = False
if len(cfg.learning_parameters.sbs_skip_gradient_calculation) > layer_id:
sbs_skip_gradient_calculation = (
cfg.learning_parameters.sbs_skip_gradient_calculation[layer_id]
)
elif len(cfg.learning_parameters.sbs_skip_gradient_calculation) == 1:
sbs_skip_gradient_calculation = (
cfg.learning_parameters.sbs_skip_gradient_calculation[0]
)
# #############################################################
# SbS layer:
# #############################################################
if cfg.network_structure.layer_type[layer_id].upper().startswith("SBS") is True:
assert in_channels > 0
assert out_channels > 0
number_of_spikes: int = -1
if len(cfg.number_of_spikes) > layer_id:
number_of_spikes = cfg.number_of_spikes[layer_id]
elif len(cfg.number_of_spikes) == 1:
number_of_spikes = cfg.number_of_spikes[0]
assert number_of_spikes > 0
logging.info(
f"Layer: {layer_id} -> SbS Layer with {number_of_spikes} spikes"
)
is_pooling_layer: bool = False
if cfg.network_structure.layer_type[layer_id].upper().find("POOLING") != -1:
is_pooling_layer = True
network.append(
SbS(
number_of_input_neurons=in_channels,
number_of_neurons=out_channels,
input_size=input_size[-1],
forward_kernel_size=kernel_size,
number_of_spikes=number_of_spikes,
epsilon_t=cfg.get_epsilon_t(number_of_spikes),
epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial,
epsilon_0=cfg.epsilon_0,
weight_noise_range=weight_noise_range,
is_pooling_layer=is_pooling_layer,
strides=strides,
dilation=dilation,
padding=padding,
number_of_cpu_processes=cfg.number_of_cpu_processes,
w_trainable=w_trainable,
# keep_last_grad_scale=cfg.learning_parameters.kepp_last_grad_scale,
# disable_scale_grade=cfg.learning_parameters.disable_scale_grade,
forgetting_offset=cfg.forgetting_offset,
skip_gradient_calculation=sbs_skip_gradient_calculation,
device=device,
default_dtype=default_dtype,
)
)
# Adding the x,y output dimensions
input_size.append(network[-1]._output_size.tolist())
network[-1]._output_layer = False
if layer_id == len(cfg.network_structure.layer_type) - 1:
network[-1]._output_layer = True
network[-1]._local_learning = False
if cfg.network_structure.layer_type[layer_id].upper().find("LOCAL") != -1:
network[-1]._local_learning = True
# #############################################################
# Split On Off Layer:
# #############################################################
elif (
cfg.network_structure.layer_type[layer_id].upper().startswith("ONOFF")
is True
):
logging.info(f"Layer: {layer_id} -> Split On Off Layer")
network.append(
SplitOnOffLayer(
device=device,
default_dtype=default_dtype,
)
)
input_size.append(input_size[-1])
# #############################################################
# PyTorch CONV2D layer:
# #############################################################
elif (
cfg.network_structure.layer_type[layer_id].upper().startswith("CONV2D")
is True
):
assert in_channels > 0
assert out_channels > 0
logging.info(f"Layer: {layer_id} -> CONV2D Layer")
network.append(
torch.nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
stride=(int(strides[0]), int(strides[1])),
dilation=(int(dilation[0]), int(dilation[1])),
bias=bias,
padding=(int(padding[0]), int(padding[1])),
device=device,
dtype=default_dtype,
)
)
# I need this later...
network[-1]._w_trainable = w_trainable
# Calculate the x,y output dimensions
input_size_temp = calculate_output_size(
value=input_size[-1],
kernel_size=kernel_size,
stride=strides,
dilation=dilation,
padding=padding,
).tolist()
input_size.append(input_size_temp)
# #############################################################
# PyTorch RELU layer:
# #############################################################
elif (
cfg.network_structure.layer_type[layer_id].upper().startswith("RELU")
is True
):
logging.info(f"Layer: {layer_id} -> RELU Layer")
network.append(torch.nn.ReLU())
input_size.append(input_size[-1])
# #############################################################
# PyTorch MAX Pooling layer:
# #############################################################
elif (
cfg.network_structure.layer_type[layer_id].upper().startswith("MAX POOLING")
is True
):
logging.info(f"Layer: {layer_id} -> MAX POOLING Layer")
network.append(
torch.nn.MaxPool2d(
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
stride=(int(strides[0]), int(strides[1])),
padding=(int(padding[0]), int(padding[1])),
dilation=(int(dilation[0]), int(dilation[1])),
)
)
# Calculate the x,y output dimensions
input_size_temp = calculate_output_size(
value=input_size[-1],
kernel_size=kernel_size,
stride=strides,
dilation=dilation,
padding=padding,
).tolist()
input_size.append(input_size_temp)
# #############################################################
# PyTorch Average Pooling layer:
# #############################################################
elif (
cfg.network_structure.layer_type[layer_id]
.upper()
.startswith("AVERAGE POOLING")
is True
):
logging.info(f"Layer: {layer_id} -> AVERAGE POOLING Layer")
network.append(
torch.nn.AvgPool2d(
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
stride=(int(strides[0]), int(strides[1])),
padding=(int(padding[0]), int(padding[1])),
)
)
# Calculate the x,y output dimensions
input_size_temp = calculate_output_size(
value=input_size[-1],
kernel_size=kernel_size,
stride=strides,
dilation=dilation,
padding=padding,
).tolist()
input_size.append(input_size_temp)
# #############################################################
# Approx CONV2D layer:
# #############################################################
elif (
cfg.network_structure.layer_type[layer_id]
.upper()
.startswith("APPROX CONV2D")
is True
):
assert in_channels > 0
assert out_channels > 0
logging.info(f"Layer: {layer_id} -> Approximation CONV2D Layer")
network.append(
Conv2dApproximation(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=(int(kernel_size[0]), int(kernel_size[1])),
stride=(int(strides[0]), int(strides[1])),
dilation=(int(dilation[0]), int(dilation[1])),
bias=bias,
padding=(int(padding[0]), int(padding[1])),
device=device,
dtype=default_dtype,
approximation_enable=approximation_enable,
number_of_trunc_bits=number_of_trunc_bits,
number_of_frac=number_of_frac_bits,
number_of_processes=cfg.number_of_cpu_processes,
)
)
# I need this later...
network[-1]._w_trainable = w_trainable
# Calculate the x,y output dimensions
input_size_temp = calculate_output_size(
value=input_size[-1],
kernel_size=kernel_size,
stride=strides,
dilation=dilation,
padding=padding,
).tolist()
input_size.append(input_size_temp)
# #############################################################
# Failure becaue we didn't found the selection of layer
# #############################################################
else:
raise Exception(
f"Unknown layer type: {cfg.network_structure.layer_type[layer_id]}"
)
return network

View file

@ -0,0 +1,83 @@
# %%
import torch
from network.Parameter import Config
from network.SbS import SbS
from network.Conv2dApproximation import Conv2dApproximation
from network.Adam import Adam
def build_optimizer(
network: torch.nn.Sequential, cfg: Config, logging
) -> list[torch.optim.Optimizer | None]:
parameter_list_weights: list = []
parameter_list_sbs: list = []
# ###############################################
# Put all parameter that needs to be learned
# in a parameter list.
# ###############################################
for id in range(0, len(network)):
if (isinstance(network[id], SbS) is True) and (
network[id]._w_trainable is True
):
parameter_list_weights.append(network[id]._weights)
parameter_list_sbs.append(True)
if (isinstance(network[id], torch.nn.modules.conv.Conv2d) is True) and (
network[id]._w_trainable is True
):
for id_parameter in network[id].parameters():
parameter_list_weights.append(id_parameter)
parameter_list_sbs.append(False)
if (isinstance(network[id], Conv2dApproximation) is True) and (
network[id]._w_trainable is True
):
for id_parameter in network[id].parameters():
parameter_list_weights.append(id_parameter)
parameter_list_sbs.append(False)
logging.info(
f"Number of parameters found to optimize: {len(parameter_list_weights)}"
)
# ###############################################
# Connect the parameters to an optimizer
# ###############################################
if cfg.learning_parameters.optimizer_name == "Adam":
logging.info("Using optimizer: Adam")
if len(parameter_list_weights) == 0:
optimizer_wf: torch.optim.Optimizer | None = None
elif cfg.learning_parameters.learning_rate_gamma_w > 0:
optimizer_wf = Adam(
parameter_list_weights,
parameter_list_sbs,
lr=cfg.learning_parameters.learning_rate_gamma_w,
)
else:
optimizer_wf = Adam(parameter_list_weights, parameter_list_sbs)
elif cfg.learning_parameters.optimizer_name == "SGD":
logging.info("Using optimizer: SGD")
if len(parameter_list_weights) == 0:
optimizer_wf = None
elif cfg.learning_parameters.learning_rate_gamma_w > 0:
optimizer_wf = torch.optim.SGD(
parameter_list_weights,
lr=cfg.learning_parameters.learning_rate_gamma_w,
)
else:
assert cfg.learning_parameters.learning_rate_gamma_w > 0
else:
raise Exception("Optimizer not implemented")
optimizer = []
optimizer.append(optimizer_wf)
return optimizer

View file

@ -0,0 +1,95 @@
# %%
import torch
def calculate_output_size(
value: list[int],
kernel_size: list[int],
stride: list[int],
dilation: list[int],
padding: list[int],
) -> torch.Tensor:
assert len(value) == 2
assert len(kernel_size) == 2
assert len(stride) == 2
assert len(dilation) == 2
assert len(padding) == 2
coordinates_0, coordinates_1 = get_coordinates(
value=value,
kernel_size=kernel_size,
stride=stride,
dilation=dilation,
padding=padding,
)
output_size: torch.Tensor = torch.tensor(
[
coordinates_0.shape[1],
coordinates_1.shape[1],
],
dtype=torch.int64,
)
return output_size
def get_coordinates(
value: list[int],
kernel_size: list[int],
stride: list[int],
dilation: list[int],
padding: list[int],
) -> tuple[torch.Tensor, torch.Tensor]:
"""Function converts parameter in coordinates
for the convolution window"""
unfold_0: torch.nn.Unfold = torch.nn.Unfold(
kernel_size=(int(kernel_size[0]), 1),
dilation=int(dilation[0]),
padding=int(padding[0]),
stride=int(stride[0]),
)
unfold_1: torch.nn.Unfold = torch.nn.Unfold(
kernel_size=(1, int(kernel_size[1])),
dilation=int(dilation[1]),
padding=int(padding[1]),
stride=int(stride[1]),
)
coordinates_0: torch.Tensor = (
unfold_0(
torch.unsqueeze(
torch.unsqueeze(
torch.unsqueeze(
torch.arange(0, int(value[0]), dtype=torch.float32),
1,
),
0,
),
0,
)
)
.squeeze(0)
.type(torch.int64)
)
coordinates_1: torch.Tensor = (
unfold_1(
torch.unsqueeze(
torch.unsqueeze(
torch.unsqueeze(
torch.arange(0, int(value[1]), dtype=torch.float32),
0,
),
0,
),
0,
)
)
.squeeze(0)
.type(torch.int64)
)
return coordinates_0, coordinates_1

View file

@ -0,0 +1,4 @@
{
"data_path": "./DATA_CIFAR10/",
"data_mode": "CIFAR10"
}

View file

@ -0,0 +1,8 @@
https://github.com/zalandoresearch/fashion-mnist
We need:
t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
Then
gzip -d *.gz
python convert.py

View file

@ -0,0 +1,4 @@
{
"data_path": "./DATA_FASHION_MNIST/",
"data_mode": "MNIST_FASHION"
}

View file

@ -0,0 +1,161 @@
# MIT License
# Copyright 2022 University of Bremen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# David Rotermund ( davrot@uni-bremen.de )
#
#
# Release history:
# ================
# 1.0.0 -- 01.05.2022: first release
#
#
import numpy as np
# [offset] [type] [value] [description]
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
# 0004 32 bit integer 60000 number of items
# 0008 unsigned byte ?? label
# 0009 unsigned byte ?? label
# ........
# xxxx unsigned byte ?? label
# The labels values are 0 to 9.
class ReadLabel:
"""Class for reading the labels from an MNIST label file"""
def __init__(self, filename):
self.filename: str = filename
self.data = self.read_from_file(filename)
def read_from_file(self, filename):
int32_data = np.dtype(np.uint32)
int32_data = int32_data.newbyteorder(">")
file = open(filename, "rb")
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
if magic_flag != 2049:
data = np.zeros(0)
number_of_elements = 0
else:
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
if number_of_elements < 1:
data = np.zeros(0)
else:
data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8)
file.close()
return data
# [offset] [type] [value] [description]
# 0000 32 bit integer 0x00000803(2051) magic number
# 0004 32 bit integer 60000 number of images
# 0008 32 bit integer 28 number of rows
# 0012 32 bit integer 28 number of columns
# 0016 unsigned byte ?? pixel
# 0017 unsigned byte ?? pixel
# ........
# xxxx unsigned byte ?? pixel
# Pixels are organized row-wise.
# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
class ReadPicture:
"""Class for reading the images from an MNIST image file"""
def __init__(self, filename):
self.filename: str = filename
self.data = self.read_from_file(filename)
def read_from_file(self, filename):
int32_data = np.dtype(np.uint32)
int32_data = int32_data.newbyteorder(">")
file = open(filename, "rb")
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
if magic_flag != 2051:
data = np.zeros(0)
number_of_elements = 0
else:
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
if number_of_elements < 1:
data = np.zeros(0)
number_of_rows = 0
else:
number_of_rows = np.frombuffer(file.read(4), int32_data)[0]
if number_of_rows != 28:
data = np.zeros(0)
number_of_columns = 0
else:
number_of_columns = np.frombuffer(file.read(4), int32_data)[0]
if number_of_columns != 28:
data = np.zeros(0)
else:
data = np.frombuffer(
file.read(number_of_elements * number_of_rows * number_of_columns),
dtype=np.uint8,
)
data = data.reshape(number_of_elements, number_of_columns, number_of_rows)
file.close()
return data
def proprocess_data_set(test_mode):
if test_mode is True:
filename_out_pattern: str = "TestPatternStorage.npy"
filename_out_label: str = "TestLabelStorage.npy"
filename_in_image: str = "t10k-images-idx3-ubyte"
filename_in_label = "t10k-labels-idx1-ubyte"
else:
filename_out_pattern = "TrainPatternStorage.npy"
filename_out_label = "TrainLabelStorage.npy"
filename_in_image = "train-images-idx3-ubyte"
filename_in_label = "train-labels-idx1-ubyte"
pictures = ReadPicture(filename_in_image)
labels = ReadLabel(filename_in_label)
# Down to 0 ... 1.0
max_value = np.max(pictures.data.astype(np.float32))
d = np.float32(pictures.data.astype(np.float32) / max_value)
label_storage = np.uint64(labels.data)
pattern_storage = d.astype(np.float32)
np.save(filename_out_pattern, pattern_storage)
np.save(filename_out_label, label_storage)
proprocess_data_set(True)
proprocess_data_set(False)

View file

@ -0,0 +1,4 @@
{
"data_path": "./DATA_MNIST/",
"data_mode": "MNIST"
}

View file

@ -0,0 +1,144 @@
# %%
import torch
import glob
import numpy as np
from network.SbS import SbS
from network.SplitOnOffLayer import SplitOnOffLayer
from network.Conv2dApproximation import Conv2dApproximation
def load_previous_weights(
network: torch.nn.Sequential,
overload_path: str,
logging,
device: torch.device,
default_dtype: torch.dtype,
) -> None:
for id in range(0, len(network)):
# #################################################
# SbS
# #################################################
if isinstance(network[id], SbS) is True:
# Are there weights that overwrite the initial weights?
file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy")
if len(file_to_load) > 1:
raise Exception(
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id].weights = torch.tensor(
np.load(file_to_load[0]),
dtype=default_dtype,
device=device,
)
logging.info(f"Weights file used for layer {id} : {file_to_load[0]}")
if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True:
# #################################################
# Conv2d weights
# #################################################
# Are there weights that overwrite the initial weights?
file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy")
if len(file_to_load) > 1:
raise Exception(
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id]._parameters["weight"].data = torch.tensor(
np.load(file_to_load[0]),
dtype=default_dtype,
device=device,
)
logging.info(f"Weights file used for layer {id} : {file_to_load[0]}")
# #################################################
# Conv2d bias
# #################################################
# Are there biases that overwrite the initial weights?
file_to_load = glob.glob(overload_path + "/Bias_L" + str(id) + "_*.npy")
if len(file_to_load) > 1:
raise Exception(
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id]._parameters["bias"].data = torch.tensor(
np.load(file_to_load[0]),
dtype=default_dtype,
device=device,
)
logging.info(f"Bias file used for layer {id} : {file_to_load[0]}")
if isinstance(network[id], Conv2dApproximation) is True:
# #################################################
# Approximate Conv2d weights
# #################################################
# Are there weights that overwrite the initial weights?
file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy")
if len(file_to_load) > 1:
raise Exception(
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id].weights.data = torch.tensor(
np.load(file_to_load[0]),
dtype=default_dtype,
device=device,
)
logging.info(f"Weights file used for layer {id} : {file_to_load[0]}")
# #################################################
# Approximate Conv2d bias
# #################################################
# Are there biases that overwrite the initial weights?
file_to_load = glob.glob(overload_path + "/Bias_L" + str(id) + "_*.npy")
if len(file_to_load) > 1:
raise Exception(
f"Too many previous weights files {overload_path}/Weight_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id].bias.data = torch.tensor(
np.load(file_to_load[0]),
dtype=default_dtype,
device=device,
)
logging.info(f"Bias file used for layer {id} : {file_to_load[0]}")
# #################################################
# SplitOnOffLayer
# #################################################
if isinstance(network[id], SplitOnOffLayer) is True:
# Are there weights that overwrite the initial weights?
file_to_load = glob.glob(overload_path + "/Mean_L" + str(id) + "_*.npy")
if len(file_to_load) > 1:
raise Exception(
f"Too many previous mean files {overload_path}/Mean_L{id}*.npy"
)
if len(file_to_load) == 1:
network[id].mean = torch.tensor(
np.load(file_to_load[0]),
dtype=default_dtype,
device=device,
)
logging.info(f"Meanfile used for layer {id} : {file_to_load[0]}")

512
network/loop_train_test.py Normal file
View file

@ -0,0 +1,512 @@
import torch
import time
from network.Parameter import Config
from torch.utils.tensorboard import SummaryWriter
from network.SbS import SbS
from network.save_weight_and_bias import save_weight_and_bias
def add_weight_and_bias_to_histogram(
network: torch.nn.modules.container.Sequential,
tb: SummaryWriter,
iteration_number: int,
) -> None:
for id in range(0, len(network)):
# ################################################
# Log the SbS Weights
# ################################################
if isinstance(network[id], SbS) is True:
if network[id]._w_trainable is True:
try:
tb.add_histogram(
f"Weights Layer {id}",
network[id].weights,
iteration_number,
)
except ValueError:
pass
# ################################################
# Log the Conv2 Weights and Biases
# ################################################
if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True:
if network[id]._w_trainable is True:
try:
tb.add_histogram(
f"Weights Layer {id}",
network[id]._parameters["weight"].data,
iteration_number,
)
except ValueError:
pass
try:
tb.add_histogram(
f"Bias Layer {id}",
network[id]._parameters["bias"].data,
iteration_number,
)
except ValueError:
pass
tb.flush()
# loss_mode == 0: "normal" SbS loss function mixture
# loss_mode == 1: cross_entropy
def loss_function(
h: torch.Tensor,
labels: torch.Tensor,
device: torch.device,
default_dtype: torch.dtype,
loss_mode: int = 0,
number_of_output_neurons: int = 10,
loss_coeffs_mse: float = 0.0,
loss_coeffs_kldiv: float = 0.0,
) -> torch.Tensor | None:
assert loss_mode >= 0
assert loss_mode <= 1
h = h.squeeze(-1).squeeze(-1)
assert h.ndim == 2
if loss_mode == 0:
# Convert label into one hot
target_one_hot: torch.Tensor = torch.zeros(
(
labels.shape[0],
number_of_output_neurons,
),
device=device,
dtype=default_dtype,
)
target_one_hot.scatter_(
1,
labels.to(device).unsqueeze(1),
torch.ones(
(labels.shape[0], 1),
device=device,
dtype=default_dtype,
),
).unsqueeze(-1).unsqueeze(-1)
h_y1 = torch.log(h + 1e-20)
my_loss: torch.Tensor = (
torch.nn.functional.mse_loss(
h,
target_one_hot,
reduction="sum",
)
* loss_coeffs_mse
+ torch.nn.functional.kl_div(h_y1, target_one_hot + 1e-20, reduction="sum")
* loss_coeffs_kldiv
) / (loss_coeffs_kldiv + loss_coeffs_mse)
return my_loss
elif loss_mode == 1:
my_loss = torch.nn.functional.cross_entropy(
h.squeeze(-1).squeeze(-1), labels.to(device)
)
return my_loss
else:
return None
def forward_pass_train(
input: torch.Tensor,
labels: torch.Tensor,
the_dataset_train,
cfg: Config,
network: torch.nn.modules.container.Sequential,
device: torch.device,
default_dtype: torch.dtype,
) -> list[torch.Tensor]:
h_collection = []
h_collection.append(
the_dataset_train.pattern_filter_train(input, cfg)
.type(dtype=default_dtype)
.to(device=device)
)
for id in range(0, len(network)):
if isinstance(network[id], SbS) is True:
h_collection.append(network[id](h_collection[-1], labels))
else:
h_collection.append(network[id](h_collection[-1]))
return h_collection
def forward_pass_test(
input: torch.Tensor,
the_dataset_test,
cfg: Config,
network: torch.nn.modules.container.Sequential,
device: torch.device,
default_dtype: torch.dtype,
) -> list[torch.Tensor]:
h_collection = []
h_collection.append(
the_dataset_test.pattern_filter_test(input, cfg)
.type(dtype=default_dtype)
.to(device=device)
)
for id in range(0, len(network)):
h_collection.append(network[id](h_collection[-1]))
return h_collection
def run_optimizer(
network: torch.nn.modules.container.Sequential,
optimizer: list,
cfg: Config,
) -> None:
for id in range(0, len(network)):
if isinstance(network[id], SbS) is True:
network[id].update_pre_care()
for optimizer_item in optimizer:
if optimizer_item is not None:
optimizer_item.step()
for id in range(0, len(network)):
if isinstance(network[id], SbS) is True:
network[id].update_after_care(
cfg.learning_parameters.learning_rate_threshold_w
/ float(
network[id]._number_of_input_neurons
# * network[id]._kernel_size[0]
# * network[id]._kernel_size[1]
),
)
# ####################################
# Update the learning rate
# ####################################
def run_lr_scheduler(
cfg: Config,
lr_scheduler,
optimizer,
performance_for_batch: float,
my_loss_for_batch: float,
tb,
logging,
) -> None:
# Inter-epoch learning rate adaptation
for lr_scheduler_item in lr_scheduler:
if (
(lr_scheduler_item is not None)
and (performance_for_batch >= 0.0)
and (my_loss_for_batch >= 0.0)
):
if cfg.learning_parameters.lr_scheduler_use_performance is True:
lr_scheduler_item.step(100.0 - performance_for_batch)
else:
lr_scheduler_item.step(my_loss_for_batch)
tb.add_scalar(
"Train Error",
100.0 - performance_for_batch,
cfg.epoch_id,
)
tb.add_scalar("Train Loss", my_loss_for_batch, cfg.epoch_id)
tb.add_scalar(
"Learning Rate Scale WF",
optimizer[0].param_groups[-1]["lr"],
cfg.epoch_id,
)
tb.flush()
# def deal_with_gradient_scale(epoch_id: int, mini_batch_number: int, network):
# if (epoch_id == 0) and (mini_batch_number == 0):
# for id in range(0, len(network)):
# if isinstance(network[id], SbS) is True:
# network[id].after_batch(True)
# else:
# for id in range(0, len(network)):
# if isinstance(network[id], SbS) is True:
# network[id].after_batch()
def loop_train(
cfg: Config,
network: torch.nn.modules.container.Sequential,
my_loader_train: torch.utils.data.dataloader.DataLoader,
the_dataset_train,
optimizer: list,
device: torch.device,
default_dtype: torch.dtype,
logging,
adapt_learning_rate: bool,
tb: SummaryWriter,
lr_scheduler,
last_test_performance: float,
) -> tuple[float, float, float, float]:
correct_in_minibatch: int = 0
loss_in_minibatch: float = 0.0
number_of_pattern_in_minibatch: int = 0
mini_batch_number: int = -1
full_loss: float = 0.0
full_correct: float = 0.0
full_count: float = 0.0
epoch_id: int = cfg.epoch_id
my_loss_for_batch: float = -1.0
performance_for_batch: float = -1.0
time_forward: float = 0.0
time_backward: float = 0.0
with torch.enable_grad():
for h_x, h_x_labels in my_loader_train:
time_mini_batch_start: float = time.perf_counter()
# ############################################################
# Reset the gradient after an update (or the first loop pass)
# ############################################################
if number_of_pattern_in_minibatch == 0:
# Reset the gradient of the torch optimizers
for optimizer_item in optimizer:
if optimizer_item is not None:
optimizer_item.zero_grad()
loss_in_minibatch = 0.0
mini_batch_number += 1
correct_in_minibatch = 0
time_forward = 0.0
time_backward = 0.0
# ####################################
# Update the learning rate
# ####################################
if adapt_learning_rate is True:
run_lr_scheduler(
cfg=cfg,
lr_scheduler=lr_scheduler,
optimizer=optimizer,
performance_for_batch=performance_for_batch,
my_loss_for_batch=my_loss_for_batch,
tb=tb,
logging=logging,
)
logging.info(
(
f"\t\t\tLearning rate: "
f"weights:{optimizer[0].param_groups[-1]['lr']:^15.3e} "
)
)
if last_test_performance < 0:
logging.info("")
else:
logging.info(
(
f"\t\t\tLast test performance: "
f"{last_test_performance/100.0:^6.2%}"
)
)
logging.info("----------------")
number_of_pattern_in_minibatch += h_x_labels.shape[0]
full_count += h_x_labels.shape[0]
# #####################################################
# The network does the forward pass (training)
# #####################################################
h_collection = forward_pass_train(
input=h_x,
labels=h_x_labels,
the_dataset_train=the_dataset_train,
cfg=cfg,
network=network,
device=device,
default_dtype=default_dtype,
)
# #####################################################
# Calculate the loss function
# #####################################################
my_loss: torch.Tensor | None = loss_function(
h=h_collection[-1],
labels=h_x_labels,
device=device,
default_dtype=default_dtype,
loss_mode=cfg.learning_parameters.loss_mode,
number_of_output_neurons=int(
cfg.network_structure.number_of_output_neurons
),
loss_coeffs_mse=float(cfg.learning_parameters.loss_coeffs_mse),
loss_coeffs_kldiv=float(cfg.learning_parameters.loss_coeffs_kldiv),
)
assert my_loss is not None
time_after_forward_and_loss: float = time.perf_counter()
# #####################################################
# Backward pass
# #####################################################
my_loss.backward()
loss_in_minibatch += my_loss.item()
full_loss += my_loss.item()
time_after_backward: float = time.perf_counter()
# #####################################################
# Performance measures
# #####################################################
correct_in_minibatch += (
(h_collection[-1].argmax(dim=1).squeeze().cpu() == h_x_labels)
.sum()
.item()
)
full_correct += (
(h_collection[-1].argmax(dim=1).squeeze().cpu() == h_x_labels)
.sum()
.item()
)
# We measure the scale of the propagated error
# during the first minibatch
# then we remember this size and scale
# the future error with it
# Kind of deals with the vanishing /
# exploding gradients
# deal_with_gradient_scale(
# epoch_id=epoch_id,
# mini_batch_number=mini_batch_number,
# network=network,
# )
# Measure the time for one mini-batch
time_forward += time_after_forward_and_loss - time_mini_batch_start
time_backward += time_after_backward - time_after_forward_and_loss
if number_of_pattern_in_minibatch >= cfg.get_update_after_x_pattern():
logging.info(
(
f"{epoch_id:^6}=>{mini_batch_number:^6} "
f"\t\tTraining {number_of_pattern_in_minibatch^6} pattern "
f"with {correct_in_minibatch/number_of_pattern_in_minibatch:^6.2%} "
f"\tForward time: \t{time_forward:^6.2f}sec"
)
)
logging.info(
(
f"\t\t\tLoss: {loss_in_minibatch/number_of_pattern_in_minibatch:^15.3e} "
f"\t\t\tBackward time: \t{time_backward:^6.2f}sec "
)
)
my_loss_for_batch = loss_in_minibatch / number_of_pattern_in_minibatch
performance_for_batch = (
100.0 * correct_in_minibatch / number_of_pattern_in_minibatch
)
# ################################################
# Update the weights and biases
# ################################################
run_optimizer(network=network, optimizer=optimizer, cfg=cfg)
# ################################################
# Save the Weights and Biases
# ################################################
save_weight_and_bias(
cfg=cfg, network=network, iteration_number=epoch_id
)
# ################################################
# Log the Weights and Biases
# ################################################
add_weight_and_bias_to_histogram(
network=network,
tb=tb,
iteration_number=epoch_id,
)
# ################################################
# Mark mini batch as done
# ################################################
number_of_pattern_in_minibatch = 0
return (
my_loss_for_batch,
performance_for_batch,
(full_loss / full_count),
(100.0 * full_correct / full_count),
)
def loop_test(
epoch_id: int,
cfg: Config,
network: torch.nn.modules.container.Sequential,
my_loader_test: torch.utils.data.dataloader.DataLoader,
the_dataset_test,
device: torch.device,
default_dtype: torch.dtype,
logging,
tb: SummaryWriter,
) -> float:
test_correct = 0
test_count = 0
test_complete: int = the_dataset_test.__len__()
logging.info("")
logging.info("Testing:")
for h_x, h_x_labels in my_loader_test:
time_0 = time.perf_counter()
h_collection = forward_pass_test(
input=h_x,
the_dataset_test=the_dataset_test,
cfg=cfg,
network=network,
device=device,
default_dtype=default_dtype,
)
h_h: torch.Tensor = h_collection[-1].detach().clone().cpu()
test_correct += (h_h.argmax(dim=1).squeeze() == h_x_labels).sum().numpy()
test_count += h_h.shape[0]
performance = 100.0 * test_correct / test_count
time_1 = time.perf_counter()
time_measure_a = time_1 - time_0
logging.info(
(
f"\t\t{test_count} of {test_complete}"
f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec"
)
)
logging.info("")
tb.add_scalar("Test Error", 100.0 - performance, epoch_id)
tb.flush()
return performance

View file

@ -0,0 +1,71 @@
import torch
from network.Parameter import Config
import numpy as np
from network.SbS import SbS
from network.SplitOnOffLayer import SplitOnOffLayer
from network.Conv2dApproximation import Conv2dApproximation
def save_weight_and_bias(
cfg: Config, network: torch.nn.modules.container.Sequential, iteration_number: int
) -> None:
for id in range(0, len(network)):
# ################################################
# Save the SbS Weights
# ################################################
if isinstance(network[id], SbS) is True:
if network[id]._w_trainable is True:
np.save(
f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy",
network[id].weights.detach().cpu().numpy(),
)
# ################################################
# Save the Conv2 Weights and Biases
# ################################################
if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True:
if network[id]._w_trainable is True:
# Save the new values
np.save(
f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy",
network[id]._parameters["weight"].data.detach().cpu().numpy(),
)
# Save the new values
np.save(
f"{cfg.weight_path}/Bias_L{id}_S{iteration_number}.npy",
network[id]._parameters["bias"].data.detach().cpu().numpy(),
)
# ################################################
# Save the Approximate Conv2 Weights and Biases
# ################################################
if isinstance(network[id], Conv2dApproximation) is True:
if network[id]._w_trainable is True:
# Save the new values
np.save(
f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy",
network[id].weights.data.detach().cpu().numpy(),
)
# Save the new values
if network[id].bias is not None:
np.save(
f"{cfg.weight_path}/Bias_L{id}_S{iteration_number}.npy",
network[id].bias.data.detach().cpu().numpy(),
)
if isinstance(network[id], SplitOnOffLayer) is True:
np.save(
f"{cfg.weight_path}/Mean_L{id}_S{iteration_number}.npy",
network[id].mean.detach().cpu().numpy(),
)

Some files were not shown because too many files have changed in this diff Show more