diff --git a/C++/HDynamicCNNManyIP.cpp b/C++/HDynamicCNNManyIP.cpp deleted file mode 100644 index 8fea948..0000000 --- a/C++/HDynamicCNNManyIP.cpp +++ /dev/null @@ -1,294 +0,0 @@ -// MIT License -// Copyright 2022 University of Bremen -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -// THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// -// David Rotermund ( davrot@uni-bremen.de ) -// -// -// Release history: -// ================ -// 1.0.0 -- 01.05.2022: first release -// -// - -#include "HDynamicCNNManyIP.h" - -#include -#include -#include - -#include -#include -#include -#include - -HDynamicCNNManyIP::HDynamicCNNManyIP(){ - -}; - -HDynamicCNNManyIP::~HDynamicCNNManyIP(){ - -}; - -bool HDynamicCNNManyIP::update( - int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1, - int64_t np_h_dim_2, int64_t np_h_dim_3, int64_t np_epsilon_xy_pointer_addr, - int64_t np_epsilon_xy_dim_0, int64_t np_epsilon_xy_dim_1, - int64_t np_epsilon_xy_dim_2, int64_t np_epsilon_t_pointer_addr, - int64_t np_epsilon_t_dim_0, int64_t np_weights_pointer_addr, - int64_t np_weights_dim_0, int64_t np_weights_dim_1, - int64_t np_input_pointer_addr, int64_t np_input_dim_0, - int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3, - float *np_init_vector_pointer_ptr, int64_t np_init_vector_dim_0, - int64_t id_pattern) { - float *np_h_pointer = (float *)np_h_pointer_addr; - float *np_epsilon_xy_pointer = (float *)np_epsilon_xy_pointer_addr; - float *np_epsilon_t_pointer = (float *)np_epsilon_t_pointer_addr; - float *np_weights_pointer = (float *)np_weights_pointer_addr; - int64_t *np_input_pointer = (int64_t *)np_input_pointer_addr; - - int64_t number_of_pattern = np_input_dim_0; - - assert((id_pattern >= 0)); - assert((id_pattern < number_of_pattern)); - - assert((np_h_pointer != nullptr)); - assert((np_h_dim_0 > 0)); - assert((np_h_dim_1 > 0)); - assert((np_h_dim_2 > 0)); - assert((np_h_dim_3 > 0)); - - int64_t np_h_dim_c0 = np_h_dim_1 * np_h_dim_2 * np_h_dim_3; - int64_t np_h_dim_c1 = np_h_dim_2 * np_h_dim_3; - int64_t np_h_dim_c2 = np_h_dim_3; - - float *np_h_pointer_pattern; - float *np_h_pointer_pattern_0; - float *np_h_pointer_pattern_01; - - assert((np_epsilon_xy_pointer != nullptr)); - assert((np_epsilon_xy_dim_0 > 0)); - assert((np_epsilon_xy_dim_1 > 0)); - - int64_t np_epsilon_xy_dim_c0 = np_epsilon_xy_dim_2 * np_epsilon_xy_dim_1; - int64_t np_epsilon_xy_dim_c1 = np_epsilon_xy_dim_2; - - float *np_epsilon_xy_pointer_0; - float *np_epsilon_xy_pointer_01; - - assert((np_epsilon_t_pointer != nullptr)); - assert((np_epsilon_t_dim_0 > 0)); - - assert((np_weights_pointer != nullptr)); - assert((np_weights_dim_0 > 0)); - assert((np_weights_dim_1 > 0)); - - int64_t np_weights_dim_c0 = np_weights_dim_1; - - float *w_ptr; - - assert((np_input_pointer != nullptr)); - assert((np_input_dim_0 > 0)); - assert((np_input_dim_1 > 0)); - assert((np_input_dim_2 > 0)); - assert((np_input_dim_3 > 0)); - - int64_t np_input_dim_c0 = np_input_dim_1 * np_input_dim_2 * np_input_dim_3; - int64_t np_input_dim_c1 = np_input_dim_2 * np_input_dim_3; - int64_t np_input_dim_c2 = np_input_dim_3; - - int64_t *np_input_pointer_pattern; - int64_t *np_input_pointer_pattern_0; - int64_t *np_input_pointer_pattern_01; - int64_t *np_input_pointer_pattern_01_spike; - - assert((np_init_vector_pointer_ptr != nullptr)); - assert((np_init_vector_dim_0 == np_weights_dim_1)); - - int64_t number_of_spikes = np_input_dim_1; - - int64_t h_dim = np_weights_dim_1; - - std::vector h_temp_vector; - h_temp_vector.resize(h_dim); - float *h_temp = h_temp_vector.data(); - - std::vector h_subsegment_vector; - h_subsegment_vector.resize(h_dim); - float *h_subsegment = h_subsegment_vector.data(); - - float h_temp_sum; - - int64_t id_0; - int64_t id_1; - int64_t id_spike; - int64_t counter; - - float temp_value; - - float epsilon_scale; - float epsilon_subsegment; - - // epsilon_subsegment = np_epsilon_xy_pointer[ - // id_0 * np_epsilon_xy_dim_c0 + - // id_1 ] - // * np_epsilon_t_pointer[id_spike]; - - // spike = np_input_pointer[ - // id_pattern * np_input_dim_c0 + - // id_spike * np_input_dim_c1 + - // id_0 * np_input_dim_c2 + - // id_1]; - - // w_ptr = np_weights_pointer + - // spike * np_weights_dim_c0; - - // h_ptr = np_h_pointer + - // id_pattern * np_h_dim_c0 + - // id_0 * np_h_dim_c2 + - // id_1; - // // 0 * np_h_dim_c1 + - - np_input_pointer_pattern = np_input_pointer + id_pattern * np_input_dim_c0; - np_h_pointer_pattern = np_h_pointer + id_pattern * np_h_dim_c0; - - for (id_0 = 0; id_0 < np_input_dim_2; id_0++) { - np_epsilon_xy_pointer_0 = - np_epsilon_xy_pointer + id_0 * np_epsilon_xy_dim_c1; - - np_h_pointer_pattern_0 = np_h_pointer_pattern + id_0 * np_h_dim_c2; - - np_input_pointer_pattern_0 = - np_input_pointer_pattern + id_0 * np_input_dim_c2; - - for (id_1 = 0; id_1 < np_input_dim_3; id_1++) { - np_epsilon_xy_pointer_01 = np_epsilon_xy_pointer_0 + id_1; - - np_h_pointer_pattern_01 = np_h_pointer_pattern_0 + id_1; - - np_input_pointer_pattern_01 = np_input_pointer_pattern_0 + id_1; - - memcpy(h_subsegment, np_init_vector_pointer_ptr, sizeof(float) * h_dim); - - epsilon_scale = 1.0; - - for (id_spike = 0; id_spike < number_of_spikes; id_spike++) { - if (epsilon_scale > 1E10) { - temp_value = 1.0 / epsilon_scale; - -#pragma omp simd - for (counter = 0; counter < h_dim; counter++) { - h_subsegment[counter] *= temp_value; - } - - epsilon_scale = 1.0; - } - - np_input_pointer_pattern_01_spike = - np_input_pointer_pattern_01 + id_spike * np_input_dim_c1; - - epsilon_subsegment = - np_epsilon_xy_pointer_01[np_input_pointer_pattern_01_spike[0] * - np_epsilon_xy_dim_c0] * - np_epsilon_t_pointer[id_spike]; - - w_ptr = np_weights_pointer + - np_input_pointer_pattern_01_spike[0] * np_weights_dim_c0; - - memcpy(h_temp, h_subsegment, sizeof(float) * h_dim); - -#pragma omp simd - for (counter = 0; counter < h_dim; counter++) { - h_temp[counter] *= w_ptr[counter]; - } - - h_temp_sum = 0.0; -#pragma omp simd reduction(+ : h_temp_sum) - for (counter = 0; counter < h_dim; counter++) { - h_temp_sum += h_temp[counter]; - } - - if (h_temp_sum > 1E-10) { - temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum; - -#pragma omp simd - for (counter = 0; counter < h_dim; counter++) { - h_temp[counter] *= temp_value; - } - -#pragma omp simd - for (counter = 0; counter < h_dim; counter++) { - h_subsegment[counter] += h_temp[counter]; - } - - epsilon_scale *= 1.0 + epsilon_subsegment; - // IF - } - // spike End - } - - temp_value = 1.0 / epsilon_scale; -#pragma omp simd - for (counter = 0; counter < h_dim; counter++) { - np_h_pointer_pattern_01[counter * np_h_dim_c1] = - h_subsegment[counter] * temp_value; - } - - // id_1 End - } - - // id_0 End - } - - return true; -}; - -bool HDynamicCNNManyIP::update_with_init_vector_multi_pattern( - int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1, - int64_t np_h_dim_2, int64_t np_h_dim_3, int64_t np_epsilon_xy_pointer_addr, - int64_t np_epsilon_xy_dim_0, int64_t np_epsilon_xy_dim_1, - int64_t np_epsilon_xy_dim_2, int64_t np_epsilon_t_pointer_addr, - int64_t np_epsilon_t_dim_0, int64_t np_weights_pointer_addr, - int64_t np_weights_dim_0, int64_t np_weights_dim_1, - int64_t np_input_pointer_addr, int64_t np_input_dim_0, - int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3, - int64_t np_init_vector_pointer_addr, int64_t np_init_vector_dim_0, - int64_t number_of_processes) { - int64_t number_of_pattern = np_input_dim_0; - int64_t pattern_id; - - int64_t h_dim = np_init_vector_dim_0; - float *h_init_ptr = (float *)np_init_vector_pointer_addr; - - omp_set_num_threads(number_of_processes); - -#pragma omp parallel for - for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) { - update(np_h_pointer_addr, np_h_dim_0, np_h_dim_1, np_h_dim_2, np_h_dim_3, - np_epsilon_xy_pointer_addr, np_epsilon_xy_dim_0, np_epsilon_xy_dim_1, - np_epsilon_xy_dim_2, np_epsilon_t_pointer_addr, np_epsilon_t_dim_0, - np_weights_pointer_addr, np_weights_dim_0, np_weights_dim_1, - np_input_pointer_addr, np_input_dim_0, np_input_dim_1, - np_input_dim_2, np_input_dim_3, h_init_ptr, h_dim, pattern_id); - } - - return true; -}; diff --git a/C++/HDynamicCNNManyIP.h b/C++/HDynamicCNNManyIP.h deleted file mode 100644 index 5d8ba6a..0000000 --- a/C++/HDynamicCNNManyIP.h +++ /dev/null @@ -1,72 +0,0 @@ -// MIT License -// Copyright 2022 University of Bremen -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -// THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// -// David Rotermund ( davrot@uni-bremen.de ) -// -// -// Release history: -// ================ -// 1.0.0 -- 01.05.2022: first release -// -// - -#ifndef SRC_HDYNAMICCNNMANYIP_H_ -#define SRC_HDYNAMICCNNMANYIP_H_ - -#include - -#include -#include - -class HDynamicCNNManyIP { - public: - HDynamicCNNManyIP(); - ~HDynamicCNNManyIP(); - - bool update(int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1, - int64_t np_h_dim_2, int64_t np_h_dim_3, - int64_t np_epsilon_xy_pointer_addr, int64_t np_epsilon_xy_dim_0, - int64_t np_epsilon_xy_dim_1, int64_t np_epsilon_xy_dim_2, - int64_t np_epsilon_t_pointer_addr, int64_t np_epsilon_t_dim_0, - int64_t np_weights_pointer_addr, int64_t np_weights_dim_0, - int64_t np_weights_dim_1, int64_t np_input_pointer_addr, - int64_t np_input_dim_0, int64_t np_input_dim_1, - int64_t np_input_dim_2, int64_t np_input_dim_3, - float *np_init_vector_pointer_ptr, int64_t np_init_vector_dim_0, - int64_t id_pattern); - - bool update_with_init_vector_multi_pattern( - int64_t np_h_pointer_addr, int64_t np_h_dim_0, int64_t np_h_dim_1, - int64_t np_h_dim_2, int64_t np_h_dim_3, - int64_t np_epsilon_xy_pointer_addr, int64_t np_epsilon_xy_dim_0, - int64_t np_epsilon_xy_dim_1, int64_t np_epsilon_xy_dim_2, - int64_t np_epsilon_t_pointer_addr, int64_t np_epsilon_t_dim_0, - int64_t np_weights_pointer_addr, int64_t np_weights_dim_0, - int64_t np_weights_dim_1, int64_t np_input_pointer_addr, - int64_t np_input_dim_0, int64_t np_input_dim_1, int64_t np_input_dim_2, - int64_t np_input_dim_3, int64_t np_init_vector_pointer_addr, - int64_t np_init_vector_dim_0, int64_t number_of_processes); - - private: -}; - -#endif /* SRC_HDYNAMICCNNMANYIP_H_ */ \ No newline at end of file diff --git a/C++/Makefile b/C++/Makefile deleted file mode 100644 index a00d72a..0000000 --- a/C++/Makefile +++ /dev/null @@ -1,75 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -# Change to your python bin directory (tested with Python 3.10.4) -PYBIN=~/P3.10/bin/ -COMPILER=clang++ - -PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes` - -PARAMETERS= -O3 -std=c++14 -fPIC $(PYBIND11INCLUDE) -Wall -fopenmp=libomp - - -all: PyHDynamicCNNManyIP \ - PySpikeGeneration2DManyIP \ - -####################### - -HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cpp - $(COMPILER) $(PARAMETERS) -c HDynamicCNNManyIP.cpp -o HDynamicCNNManyIP.o - -PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp - $(COMPILER) $(PARAMETERS) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o - -PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o - $(COMPILER) -shared -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o -lm -lomp -lstdc++ -Wall - cp PyHDynamicCNNManyIP ../PyHDynamicCNNManyIP`$(PYBIN)python3-config --extension-suffix` - -####################### - -SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cpp - $(COMPILER) $(PARAMETERS) -c SpikeGeneration2DManyIP.cpp -o SpikeGeneration2DManyIP.o - -PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp - $(COMPILER) $(PARAMETERS) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o - -PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o - $(COMPILER) -shared -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o -lm -lomp -lstdc++ -Wall - cp PySpikeGeneration2DManyIP ../PySpikeGeneration2DManyIP`$(PYBIN)python3-config --extension-suffix` - - -####################### - -clean: - rm -f PyHDynamicCNNManyIP - rm -f PySpikeGeneration2DManyIP - rm -f *.o - diff --git a/C++/PyHDynamicCNNManyIP.cpp b/C++/PyHDynamicCNNManyIP.cpp deleted file mode 100644 index 5a03532..0000000 --- a/C++/PyHDynamicCNNManyIP.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// MIT License -// Copyright 2022 University of Bremen -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -// THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// -// David Rotermund ( davrot@uni-bremen.de ) -// -// -// Release history: -// ================ -// 1.0.0 -- 01.05.2022: first release -// -// - -#include - -#include "HDynamicCNNManyIP.h" - -namespace py = pybind11; - -PYBIND11_MODULE(PyHDynamicCNNManyIP, m) { - m.doc() = "HDynamicCNNManyIP Module"; - py::class_(m, "HDynamicCNNManyIP") - .def(py::init<>()) - .def("update_with_init_vector_multi_pattern", - &HDynamicCNNManyIP::update_with_init_vector_multi_pattern); -} \ No newline at end of file diff --git a/C++/PySpikeGeneration2DManyIP.cpp b/C++/PySpikeGeneration2DManyIP.cpp deleted file mode 100644 index b2438c7..0000000 --- a/C++/PySpikeGeneration2DManyIP.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// MIT License -// Copyright 2022 University of Bremen -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -// THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// -// David Rotermund ( davrot@uni-bremen.de ) -// -// -// Release history: -// ================ -// 1.0.0 -- 01.05.2022: first release -// -// - -#include - -#include "SpikeGeneration2DManyIP.h" - -namespace py = pybind11; - -PYBIND11_MODULE(PySpikeGeneration2DManyIP, m) { - m.doc() = "SpikeGeneration2DManyIP Module"; - py::class_(m, "SpikeGeneration2DManyIP") - .def(py::init<>()) - .def("spike_generation_multi_pattern", - &SpikeGeneration2DManyIP::spike_generation_multi_pattern); -} diff --git a/C++/SpikeGeneration2DManyIP.cpp b/C++/SpikeGeneration2DManyIP.cpp deleted file mode 100644 index 8e8be68..0000000 --- a/C++/SpikeGeneration2DManyIP.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// MIT License -// Copyright 2022 University of Bremen -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -// THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// -// David Rotermund ( davrot@uni-bremen.de ) -// -// -// Release history: -// ================ -// 1.0.0 -- 01.05.2022: first release -// -// - -#include "SpikeGeneration2DManyIP.h" - -#include -#include -#include - -#include -#include -#include -#include - -SpikeGeneration2DManyIP::SpikeGeneration2DManyIP(){ - -}; - -SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP(){ - -}; - -bool SpikeGeneration2DManyIP::spike_generation_multi_pattern( - int64_t np_input_pointer_addr, int64_t np_input_dim_0, - int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3, - int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0, - int64_t np_random_values_dim_1, int64_t np_random_values_dim_2, - int64_t np_random_values_dim_3, int64_t np_output_pointer_addr, - int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2, - int64_t np_output_dim_3, int64_t number_of_cpu_processes) { - int64_t number_of_pattern = np_input_dim_0; - int64_t pattern_id; - - omp_set_num_threads(number_of_cpu_processes); - -#pragma omp parallel for - for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) { - spike_generation( - np_input_pointer_addr, np_input_dim_0, np_input_dim_1, np_input_dim_2, - np_input_dim_3, np_random_values_pointer_addr, np_random_values_dim_0, - np_random_values_dim_1, np_random_values_dim_2, np_random_values_dim_3, - np_output_pointer_addr, np_output_dim_0, np_output_dim_1, - np_output_dim_2, np_output_dim_3, pattern_id); - } - - return true; -}; - -bool SpikeGeneration2DManyIP::spike_generation( - int64_t np_input_pointer_addr, int64_t np_input_dim_0, - int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3, - int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0, - int64_t np_random_values_dim_1, int64_t np_random_values_dim_2, - int64_t np_random_values_dim_3, int64_t np_output_pointer_addr, - int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2, - int64_t np_output_dim_3, int64_t id_pattern) { - float *np_input_pointer = (float *)np_input_pointer_addr; - float *np_random_values_pointer = (float *)np_random_values_pointer_addr; - int64_t *np_output_pointer = (int64_t *)np_output_pointer_addr; - - assert((id_pattern >= 0)); - assert((id_pattern < np_input_dim_0)); - - // Input - assert((np_input_pointer != nullptr)); - assert((np_input_dim_0 > 0)); - assert((np_input_dim_1 > 0)); - assert((np_input_dim_2 > 0)); - assert((np_input_dim_3 > 0)); - - int64_t np_input_dim_c0 = np_input_dim_1 * np_input_dim_2 * np_input_dim_3; - int64_t np_input_dim_c1 = np_input_dim_2 * np_input_dim_3; - int64_t np_input_dim_c2 = np_input_dim_3; - - // Random - assert((np_random_values_pointer != nullptr)); - assert((np_random_values_dim_0 > 0)); - assert((np_random_values_dim_1 > 0)); - assert((np_random_values_dim_2 > 0)); - assert((np_random_values_dim_3 > 0)); - - int64_t np_random_values_dim_c0 = - np_random_values_dim_1 * np_random_values_dim_2 * np_random_values_dim_3; - int64_t np_random_values_dim_c1 = - np_random_values_dim_2 * np_random_values_dim_3; - int64_t np_random_values_dim_c2 = np_random_values_dim_3; - - // Output - assert((np_output_pointer != nullptr)); - assert((np_output_dim_0 > 0)); - assert((np_output_dim_1 > 0)); - assert((np_output_dim_2 > 0)); - assert((np_output_dim_3 > 0)); - - int64_t np_output_dim_c0 = - np_output_dim_1 * np_output_dim_2 * np_output_dim_3; - int64_t np_output_dim_c1 = np_output_dim_2 * np_output_dim_3; - int64_t np_output_dim_c2 = np_output_dim_3; - - // ------------------------------- - - int64_t h_dim = np_input_dim_1; - int64_t spike_dim = np_output_dim_1; - - std::vector temp_p; - temp_p.resize(h_dim); - float *temp_p_ptr = temp_p.data(); - - std::vector temp_out; - temp_out.resize(spike_dim); - int64_t *temp_out_ptr = temp_out.data(); - - std::vector temp_rand; - temp_rand.resize(spike_dim); - float *temp_rand_ptr = temp_rand.data(); - - int64_t counter; - - int64_t counter_x = 0; - int64_t counter_y = 0; - - float *p_ptr = nullptr; - int64_t *out_ptr = nullptr; - float *rand_ptr = nullptr; - - std::vector::iterator position_iterator; - - for (counter_x = 0; counter_x < np_output_dim_2; counter_x++) { - for (counter_y = 0; counter_y < np_output_dim_3; counter_y++) { - p_ptr = np_input_pointer + id_pattern * np_input_dim_c0 + - counter_x * np_input_dim_c2 + counter_y; - // + counter * np_input_dim_c1 - - out_ptr = np_output_pointer + id_pattern * np_output_dim_c0 + - counter_x * np_output_dim_c2 + counter_y; - // + counter * np_output_dim_c1 - - rand_ptr = np_random_values_pointer + - id_pattern * np_random_values_dim_c0 + - counter_x * np_random_values_dim_c2 + counter_y; - // + counter * np_random_values_dim_c1 - -#pragma omp simd - for (counter = 0; counter < h_dim; counter++) { - temp_p_ptr[counter] = p_ptr[counter * np_input_dim_c1]; - } - -#pragma omp simd - for (counter = 0; counter < spike_dim; counter++) { - temp_rand_ptr[counter] = rand_ptr[counter * np_random_values_dim_c1]; - } - - // ---------------------------- - for (counter = 0; counter < spike_dim; counter++) { - position_iterator = std::lower_bound(temp_p.begin(), temp_p.end(), - temp_rand_ptr[counter]); - temp_out_ptr[counter] = position_iterator - temp_p.begin(); - } - // ---------------------------- - -#pragma omp simd - for (counter = 0; counter < spike_dim; counter++) { - out_ptr[counter * np_output_dim_c1] = temp_out_ptr[counter]; - } - } - } - - return true; -}; diff --git a/C++/SpikeGeneration2DManyIP.h b/C++/SpikeGeneration2DManyIP.h deleted file mode 100644 index 99e7810..0000000 --- a/C++/SpikeGeneration2DManyIP.h +++ /dev/null @@ -1,66 +0,0 @@ -// MIT License -// Copyright 2022 University of Bremen -// -// Permission is hereby granted, free of charge, to any person obtaining -// a copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -// THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// -// David Rotermund ( davrot@uni-bremen.de ) -// -// -// Release history: -// ================ -// 1.0.0 -- 01.05.2022: first release -// -// - -#ifndef SRC_SPIKEGENERATION2DMANYIP_H_ -#define SRC_SPIKEGENERATION2DMANYIP_H_ - -#include - -#include -#include - -class SpikeGeneration2DManyIP { - public: - SpikeGeneration2DManyIP(); - ~SpikeGeneration2DManyIP(); - - bool spike_generation_multi_pattern( - int64_t np_input_pointer_addr, int64_t np_input_dim_0, - int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3, - int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0, - int64_t np_random_values_dim_1, int64_t np_random_values_dim_2, - int64_t np_random_values_dim_3, int64_t np_output_pointer_addr, - int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2, - int64_t np_output_dim_3, int64_t number_of_cpu_processes); - - bool spike_generation( - int64_t np_input_pointer_addr, int64_t np_input_dim_0, - int64_t np_input_dim_1, int64_t np_input_dim_2, int64_t np_input_dim_3, - int64_t np_random_values_pointer_addr, int64_t np_random_values_dim_0, - int64_t np_random_values_dim_1, int64_t np_random_values_dim_2, - int64_t np_random_values_dim_3, int64_t np_output_pointer_addr, - int64_t np_output_dim_0, int64_t np_output_dim_1, int64_t np_output_dim_2, - int64_t np_output_dim_3, int64_t id_pattern); - - private: -}; - -#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */ \ No newline at end of file diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Dataset.py b/DATA_CIFAR10/PyTorch_Non_Spike_Network/Dataset.py deleted file mode 100644 index 11f9854..0000000 --- a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Dataset.py +++ /dev/null @@ -1,422 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -from abc import ABC, abstractmethod -import torch -import numpy as np -import torchvision as tv # type: ignore -from Parameter import Config - - -class DatasetMaster(torch.utils.data.Dataset, ABC): - - path_label: str - label_storage: np.ndarray - pattern_storage: np.ndarray - number_of_pattern: int - mean: list[float] - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__() - - if train is True: - self.label_storage = np.load(path_label + "/TrainLabelStorage.npy") - else: - self.label_storage = np.load(path_label + "/TestLabelStorage.npy") - - if train is True: - self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy") - else: - self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy") - - self.number_of_pattern = self.label_storage.shape[0] - - self.mean = [] - - def __len__(self) -> int: - return self.number_of_pattern - - # Get one pattern at position index - @abstractmethod - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - pass - - @abstractmethod - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - pass - - @abstractmethod - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - pass - - -class DatasetMNIST(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32) - ) - - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, 0:1, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - -class DatasetFashionMNIST(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32) - ) - - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, 0:1, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - -class DatasetCIFAR(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1) - ) - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, :, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 3 channel version (e.g. r,g,b channels). - """ - - assert len(cfg.image_statistics.mean) == 3 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - - my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) - my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2]) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) - - new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) - return new_tensor - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. is randomly horizontally flipped - 3. is randomly color jitteres - 4. on/off filteres - 5. returned. - - This is a 3 channel version (e.g. r,g,b channels). - """ - assert len(cfg.image_statistics.mean) == 3 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) - my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2]) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) - - new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) - return new_tensor - - -class OnOffFilter(torch.nn.Module): - def __init__(self, p: float = 0.5) -> None: - super(OnOffFilter, self).__init__() - self.p: float = p - - def forward(self, tensor: torch.Tensor) -> torch.Tensor: - - assert tensor.shape[1] == 1 - - tensor_clone = 2.0 * (tensor - self.p) - - temp_0: torch.Tensor = torch.where( - tensor_clone < 0.0, - -tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - temp_1: torch.Tensor = torch.where( - tensor_clone >= 0.0, - tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1) - - return new_tensor - - def __repr__(self) -> str: - return self.__class__.__name__ + "(p={0})".format(self.p) - - -if __name__ == "__main__": - pass diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Error.png b/DATA_CIFAR10/PyTorch_Non_Spike_Network/Error.png deleted file mode 100644 index ec03281..0000000 Binary files a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Error.png and /dev/null differ diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Model_MNIST_A_499.pt.gz b/DATA_CIFAR10/PyTorch_Non_Spike_Network/Model_MNIST_A_499.pt.gz deleted file mode 100644 index 80bd539..0000000 Binary files a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Model_MNIST_A_499.pt.gz and /dev/null differ diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Parameter.py b/DATA_CIFAR10/PyTorch_Non_Spike_Network/Parameter.py deleted file mode 100644 index 92fe247..0000000 --- a/DATA_CIFAR10/PyTorch_Non_Spike_Network/Parameter.py +++ /dev/null @@ -1,164 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -# %% -from dataclasses import dataclass, field -import numpy as np -import torch -import os - - -@dataclass -class Network: - """Parameters of the network. The details about - its layers and the number of output neurons.""" - - number_of_output_neurons: int = field(default=0) - forward_kernel_size: list[list[int]] = field(default_factory=list) - forward_neuron_numbers: list[list[int]] = field(default_factory=list) - strides: list[list[int]] = field(default_factory=list) - dilation: list[list[int]] = field(default_factory=list) - padding: list[list[int]] = field(default_factory=list) - is_pooling_layer: list[bool] = field(default_factory=list) - w_trainable: list[bool] = field(default_factory=list) - eps_xy_trainable: list[bool] = field(default_factory=list) - eps_xy_mean: list[bool] = field(default_factory=list) - - -@dataclass -class LearningParameters: - """Parameter required for training""" - - loss_coeffs_mse: float = field(default=0.5) - loss_coeffs_kldiv: float = field(default=1.0) - learning_rate_gamma_w: float = field(default=-1.0) - learning_rate_gamma_eps_xy: float = field(default=-1.0) - learning_rate_threshold_w: float = field(default=0.00001) - learning_rate_threshold_eps_xy: float = field(default=0.00001) - learning_active: bool = field(default=True) - weight_noise_amplitude: float = field(default=0.01) - eps_xy_intitial: float = field(default=0.1) - test_every_x_learning_steps: int = field(default=50) - test_during_learning: bool = field(default=True) - lr_scheduler_factor: float = field(default=0.75) - lr_scheduler_patience: int = field(default=10) - optimizer_name: str = field(default="Adam") - lr_schedule_name: str = field(default="ReduceLROnPlateau") - number_of_batches_for_one_update: int = field(default=1) - alpha_number_of_iterations: int = field(default=0) - overload_path: str = field(default="./Previous") - - -@dataclass -class Augmentation: - """Parameters used for data augmentation.""" - - crop_width_in_pixel: int = field(default=2) - flip_p: float = field(default=0.5) - jitter_brightness: float = field(default=0.5) - jitter_contrast: float = field(default=0.1) - jitter_saturation: float = field(default=0.1) - jitter_hue: float = field(default=0.15) - - -@dataclass -class ImageStatistics: - """(Statistical) information about the input. i.e. - mean values and the x and y size of the input""" - - mean: list[float] = field(default_factory=list) - the_size: list[int] = field(default_factory=list) - - -@dataclass -class Config: - """Master config class.""" - - # Sub classes - network_structure: Network = field(default_factory=Network) - learning_parameters: LearningParameters = field(default_factory=LearningParameters) - augmentation: Augmentation = field(default_factory=Augmentation) - image_statistics: ImageStatistics = field(default_factory=ImageStatistics) - - batch_size: int = field(default=500) - data_mode: str = field(default="") - - learning_step: int = field(default=0) - learning_step_max: int = field(default=10000) - - number_of_cpu_processes: int = field(default=-1) - - number_of_spikes: int = field(default=0) - cooldown_after_number_of_spikes: int = field(default=0) - - weight_path: str = field(default="./Weights/") - eps_xy_path: str = field(default="./EpsXY/") - data_path: str = field(default="./") - - reduction_cooldown: float = field(default=25.0) - epsilon_0: float = field(default=1.0) - - update_after_x_batch: float = field(default=1.0) - - def __post_init__(self) -> None: - """Post init determines the number of cores. - Creates the required directory and gives us an optimized - (for the amount of cores) batch size.""" - number_of_cpu_processes_temp = os.cpu_count() - - if self.number_of_cpu_processes < 1: - if number_of_cpu_processes_temp is None: - self.number_of_cpu_processes = 1 - else: - self.number_of_cpu_processes = number_of_cpu_processes_temp - - os.makedirs(self.weight_path, exist_ok=True) - os.makedirs(self.eps_xy_path, exist_ok=True) - os.makedirs(self.data_path, exist_ok=True) - - self.batch_size = ( - self.batch_size // self.number_of_cpu_processes - ) * self.number_of_cpu_processes - - self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes)) - self.batch_size = int(self.batch_size) - - def get_epsilon_t(self): - """Generates the time series of the basic epsilon.""" - np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32) - np_epsilon_t[ - self.cooldown_after_number_of_spikes : self.number_of_spikes - ] /= self.reduction_cooldown - return torch.tensor(np_epsilon_t) - - def get_update_after_x_pattern(self): - """Tells us after how many pattern we need to update the weights.""" - return self.batch_size * self.update_after_x_batch diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/events.out.tfevents.1651334099.fedora.121264.0 b/DATA_CIFAR10/PyTorch_Non_Spike_Network/events.out.tfevents.1651334099.fedora.121264.0 deleted file mode 100644 index 3e5aae1..0000000 Binary files a/DATA_CIFAR10/PyTorch_Non_Spike_Network/events.out.tfevents.1651334099.fedora.121264.0 and /dev/null differ diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/info.md b/DATA_CIFAR10/PyTorch_Non_Spike_Network/info.md deleted file mode 100644 index 1ebeb3f..0000000 --- a/DATA_CIFAR10/PyTorch_Non_Spike_Network/info.md +++ /dev/null @@ -1 +0,0 @@ -Performance reached (test data correct classifications): 76.60% \ No newline at end of file diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/plot.py b/DATA_CIFAR10/PyTorch_Non_Spike_Network/plot.py deleted file mode 100644 index 82af53b..0000000 --- a/DATA_CIFAR10/PyTorch_Non_Spike_Network/plot.py +++ /dev/null @@ -1,31 +0,0 @@ -import os - -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" - -import numpy as np -import matplotlib.pyplot as plt -from tensorboard.backend.event_processing import event_accumulator - -filename: str = "events.out.tfevents.1651334099.fedora.121264.0" - -acc = event_accumulator.EventAccumulator(filename) -acc.Reload() - -# What is available? -# available_scalar = acc.Tags()["scalars"] -# print("Available Scalars") -# print(available_scalar) - -which_scalar: str = "Test Number Correct" -te = acc.Scalars(which_scalar) - -temp: list = [] -for te_item in te: - temp.append((te_item[1], te_item[2])) -temp_np = np.array(temp) - -plt.semilogy(temp_np[:, 0], (1.0 - (temp_np[:, 1] / 10000)) * 100) -plt.xlabel("Epochs") -plt.ylabel("Error [%]") -plt.savefig("Error.png") -plt.show() diff --git a/DATA_CIFAR10/PyTorch_Non_Spike_Network/run.py b/DATA_CIFAR10/PyTorch_Non_Spike_Network/run.py deleted file mode 100644 index a7f59ab..0000000 --- a/DATA_CIFAR10/PyTorch_Non_Spike_Network/run.py +++ /dev/null @@ -1,203 +0,0 @@ -# %% -import torch -from Dataset import DatasetCIFAR -from Parameter import Config -import torchvision as tv # type: ignore - -# Some parameters - -cfg = Config() - -input_number_of_channel: int = 3 -input_dim_x: int = 28 -input_dim_y: int = 28 - -number_of_output_channels_conv1: int = 96 -number_of_output_channels_conv2: int = 192 -number_of_output_channels_flatten1: int = 3072 -number_of_output_channels_full1: int = 10 - -kernel_size_conv1: tuple[int, int] = (5, 5) -kernel_size_pool1: tuple[int, int] = (2, 2) -kernel_size_conv2: tuple[int, int] = (5, 5) -kernel_size_pool2: tuple[int, int] = (2, 2) - -stride_conv1: tuple[int, int] = (1, 1) -stride_pool1: tuple[int, int] = (2, 2) -stride_conv2: tuple[int, int] = (1, 1) -stride_pool2: tuple[int, int] = (2, 2) - -padding_conv1: int = 0 -padding_pool1: int = 0 -padding_conv2: int = 0 -padding_pool2: int = 0 - -network = torch.nn.Sequential( - torch.nn.Conv2d( - in_channels=input_number_of_channel, - out_channels=number_of_output_channels_conv1, - kernel_size=kernel_size_conv1, - stride=stride_conv1, - padding=padding_conv1, - ), - torch.nn.ReLU(), - torch.nn.MaxPool2d( - kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1 - ), - torch.nn.Conv2d( - in_channels=number_of_output_channels_conv1, - out_channels=number_of_output_channels_conv2, - kernel_size=kernel_size_conv2, - stride=stride_conv2, - padding=padding_conv2, - ), - torch.nn.ReLU(), - torch.nn.MaxPool2d( - kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2 - ), - torch.nn.Flatten( - start_dim=1, - ), - torch.nn.Linear( - in_features=number_of_output_channels_flatten1, - out_features=number_of_output_channels_full1, - bias=True, - ), - torch.nn.Softmax(dim=1), -) -# %% -path_pattern: str = "./DATA_CIFAR10/" -path_label: str = "./DATA_CIFAR10/" - -dataset_train = DatasetCIFAR( - train=True, path_pattern=path_pattern, path_label=path_label -) -dataset_test = DatasetCIFAR( - train=False, path_pattern=path_pattern, path_label=path_label -) -cfg.image_statistics.mean = dataset_train.mean -# The basic size -cfg.image_statistics.the_size = [ - dataset_train.pattern_storage.shape[2], - dataset_train.pattern_storage.shape[3], -] -# Minus the stuff we cut away in the pattern filter -cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel -cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel - - -batch_size_train: int = 100 -batch_size_test: int = 100 - - -train_data_load = torch.utils.data.DataLoader( - dataset_train, batch_size=batch_size_train, shuffle=True -) - -test_data_load = torch.utils.data.DataLoader( - dataset_test, batch_size=batch_size_test, shuffle=False -) - -transforms_test: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), -) -scripted_transforms_test = torch.jit.script(transforms_test) - -transforms_train: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), -) -scripted_transforms_train = torch.jit.script(transforms_train) -# %% -# The optimizer -optimizer = torch.optim.Adam(network.parameters(), lr=0.001) -# The LR Scheduler -lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75) - -# %% -number_of_test_pattern: int = dataset_test.__len__() -number_of_train_pattern: int = dataset_train.__len__() - -number_of_epoch: int = 500 - -# %% -import time -from torch.utils.tensorboard import SummaryWriter - -tb = SummaryWriter() - -# %% -loss_function = torch.nn.CrossEntropyLoss() - -for epoch_id in range(0, number_of_epoch): - print(f"Epoch: {epoch_id}") - t_start: float = time.perf_counter() - - train_loss: float = 0.0 - train_correct: int = 0 - train_number: int = 0 - test_correct: int = 0 - test_number: int = 0 - - # Switch the network into training mode - network.train() - - # This runs in total for one epoch split up into mini-batches - for image, target in train_data_load: - - # Clean the gradient - optimizer.zero_grad() - - output = network(scripted_transforms_train(image)) - - loss = loss_function(output, target) - - train_loss += loss.item() - train_correct += (output.argmax(dim=1) == target).sum().numpy() - train_number += target.shape[0] - # Calculate backprop - loss.backward() - - # Update the parameter - optimizer.step() - - # Update the learning rate - lr_scheduler.step(train_loss) - - t_training: float = time.perf_counter() - - # Switch the network into evalution mode - network.eval() - with torch.no_grad(): - for image, target in test_data_load: - - output = network(scripted_transforms_test(image)) - - test_correct += (output.argmax(dim=1) == target).sum().numpy() - test_number += target.shape[0] - - t_testing = time.perf_counter() - - perfomance_test_correct: float = 100.0 * test_correct / test_number - perfomance_train_correct: float = 100.0 * train_correct / train_number - - tb.add_scalar("Train Loss", train_loss, epoch_id) - tb.add_scalar("Train Number Correct", train_correct, epoch_id) - tb.add_scalar("Test Number Correct", test_correct, epoch_id) - - print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%") - print(f"Testing: Correct={perfomance_test_correct:.2f}%") - print( - f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec" - ) - torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt") - print() - -# %% -tb.close() diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Dataset.py b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Dataset.py deleted file mode 100644 index 11f9854..0000000 --- a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Dataset.py +++ /dev/null @@ -1,422 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -from abc import ABC, abstractmethod -import torch -import numpy as np -import torchvision as tv # type: ignore -from Parameter import Config - - -class DatasetMaster(torch.utils.data.Dataset, ABC): - - path_label: str - label_storage: np.ndarray - pattern_storage: np.ndarray - number_of_pattern: int - mean: list[float] - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__() - - if train is True: - self.label_storage = np.load(path_label + "/TrainLabelStorage.npy") - else: - self.label_storage = np.load(path_label + "/TestLabelStorage.npy") - - if train is True: - self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy") - else: - self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy") - - self.number_of_pattern = self.label_storage.shape[0] - - self.mean = [] - - def __len__(self) -> int: - return self.number_of_pattern - - # Get one pattern at position index - @abstractmethod - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - pass - - @abstractmethod - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - pass - - @abstractmethod - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - pass - - -class DatasetMNIST(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32) - ) - - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, 0:1, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - -class DatasetFashionMNIST(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32) - ) - - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, 0:1, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - - return gray - - -class DatasetCIFAR(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1) - ) - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, :, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 3 channel version (e.g. r,g,b channels). - """ - - assert len(cfg.image_statistics.mean) == 3 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - - my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) - my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2]) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) - - new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) - return new_tensor - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. is randomly horizontally flipped - 3. is randomly color jitteres - 4. on/off filteres - 5. returned. - - This is a 3 channel version (e.g. r,g,b channels). - """ - assert len(cfg.image_statistics.mean) == 3 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) - my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2]) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) - - new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) - return new_tensor - - -class OnOffFilter(torch.nn.Module): - def __init__(self, p: float = 0.5) -> None: - super(OnOffFilter, self).__init__() - self.p: float = p - - def forward(self, tensor: torch.Tensor) -> torch.Tensor: - - assert tensor.shape[1] == 1 - - tensor_clone = 2.0 * (tensor - self.p) - - temp_0: torch.Tensor = torch.where( - tensor_clone < 0.0, - -tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - temp_1: torch.Tensor = torch.where( - tensor_clone >= 0.0, - tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1) - - return new_tensor - - def __repr__(self) -> str: - return self.__class__.__name__ + "(p={0})".format(self.p) - - -if __name__ == "__main__": - pass diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Error.png b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Error.png deleted file mode 100644 index 5f6494d..0000000 Binary files a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Error.png and /dev/null differ diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Model_MNIST_A_199.pt.gz b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Model_MNIST_A_199.pt.gz deleted file mode 100644 index a5a59bc..0000000 Binary files a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Model_MNIST_A_199.pt.gz and /dev/null differ diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Parameter.py b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Parameter.py deleted file mode 100644 index 92fe247..0000000 --- a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/Parameter.py +++ /dev/null @@ -1,164 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -# %% -from dataclasses import dataclass, field -import numpy as np -import torch -import os - - -@dataclass -class Network: - """Parameters of the network. The details about - its layers and the number of output neurons.""" - - number_of_output_neurons: int = field(default=0) - forward_kernel_size: list[list[int]] = field(default_factory=list) - forward_neuron_numbers: list[list[int]] = field(default_factory=list) - strides: list[list[int]] = field(default_factory=list) - dilation: list[list[int]] = field(default_factory=list) - padding: list[list[int]] = field(default_factory=list) - is_pooling_layer: list[bool] = field(default_factory=list) - w_trainable: list[bool] = field(default_factory=list) - eps_xy_trainable: list[bool] = field(default_factory=list) - eps_xy_mean: list[bool] = field(default_factory=list) - - -@dataclass -class LearningParameters: - """Parameter required for training""" - - loss_coeffs_mse: float = field(default=0.5) - loss_coeffs_kldiv: float = field(default=1.0) - learning_rate_gamma_w: float = field(default=-1.0) - learning_rate_gamma_eps_xy: float = field(default=-1.0) - learning_rate_threshold_w: float = field(default=0.00001) - learning_rate_threshold_eps_xy: float = field(default=0.00001) - learning_active: bool = field(default=True) - weight_noise_amplitude: float = field(default=0.01) - eps_xy_intitial: float = field(default=0.1) - test_every_x_learning_steps: int = field(default=50) - test_during_learning: bool = field(default=True) - lr_scheduler_factor: float = field(default=0.75) - lr_scheduler_patience: int = field(default=10) - optimizer_name: str = field(default="Adam") - lr_schedule_name: str = field(default="ReduceLROnPlateau") - number_of_batches_for_one_update: int = field(default=1) - alpha_number_of_iterations: int = field(default=0) - overload_path: str = field(default="./Previous") - - -@dataclass -class Augmentation: - """Parameters used for data augmentation.""" - - crop_width_in_pixel: int = field(default=2) - flip_p: float = field(default=0.5) - jitter_brightness: float = field(default=0.5) - jitter_contrast: float = field(default=0.1) - jitter_saturation: float = field(default=0.1) - jitter_hue: float = field(default=0.15) - - -@dataclass -class ImageStatistics: - """(Statistical) information about the input. i.e. - mean values and the x and y size of the input""" - - mean: list[float] = field(default_factory=list) - the_size: list[int] = field(default_factory=list) - - -@dataclass -class Config: - """Master config class.""" - - # Sub classes - network_structure: Network = field(default_factory=Network) - learning_parameters: LearningParameters = field(default_factory=LearningParameters) - augmentation: Augmentation = field(default_factory=Augmentation) - image_statistics: ImageStatistics = field(default_factory=ImageStatistics) - - batch_size: int = field(default=500) - data_mode: str = field(default="") - - learning_step: int = field(default=0) - learning_step_max: int = field(default=10000) - - number_of_cpu_processes: int = field(default=-1) - - number_of_spikes: int = field(default=0) - cooldown_after_number_of_spikes: int = field(default=0) - - weight_path: str = field(default="./Weights/") - eps_xy_path: str = field(default="./EpsXY/") - data_path: str = field(default="./") - - reduction_cooldown: float = field(default=25.0) - epsilon_0: float = field(default=1.0) - - update_after_x_batch: float = field(default=1.0) - - def __post_init__(self) -> None: - """Post init determines the number of cores. - Creates the required directory and gives us an optimized - (for the amount of cores) batch size.""" - number_of_cpu_processes_temp = os.cpu_count() - - if self.number_of_cpu_processes < 1: - if number_of_cpu_processes_temp is None: - self.number_of_cpu_processes = 1 - else: - self.number_of_cpu_processes = number_of_cpu_processes_temp - - os.makedirs(self.weight_path, exist_ok=True) - os.makedirs(self.eps_xy_path, exist_ok=True) - os.makedirs(self.data_path, exist_ok=True) - - self.batch_size = ( - self.batch_size // self.number_of_cpu_processes - ) * self.number_of_cpu_processes - - self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes)) - self.batch_size = int(self.batch_size) - - def get_epsilon_t(self): - """Generates the time series of the basic epsilon.""" - np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32) - np_epsilon_t[ - self.cooldown_after_number_of_spikes : self.number_of_spikes - ] /= self.reduction_cooldown - return torch.tensor(np_epsilon_t) - - def get_update_after_x_pattern(self): - """Tells us after how many pattern we need to update the weights.""" - return self.batch_size * self.update_after_x_batch diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/events.out.tfevents.1651328399.fedora.118340.0 b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/events.out.tfevents.1651328399.fedora.118340.0 deleted file mode 100644 index 2316e51..0000000 Binary files a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/events.out.tfevents.1651328399.fedora.118340.0 and /dev/null differ diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/info.md b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/info.md deleted file mode 100644 index c8a4090..0000000 --- a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/info.md +++ /dev/null @@ -1 +0,0 @@ -Performance reached (test data correct classifications): 89.82% \ No newline at end of file diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/plot.py b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/plot.py deleted file mode 100644 index 7b9ea58..0000000 --- a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/plot.py +++ /dev/null @@ -1,31 +0,0 @@ -import os - -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" - -import numpy as np -import matplotlib.pyplot as plt -from tensorboard.backend.event_processing import event_accumulator - -filename: str = "events.out.tfevents.1651328399.fedora.118340.0" - -acc = event_accumulator.EventAccumulator(filename) -acc.Reload() - -# What is available? -# available_scalar = acc.Tags()["scalars"] -# print("Available Scalars") -# print(available_scalar) - -which_scalar: str = "Test Number Correct" -te = acc.Scalars(which_scalar) - -temp: list = [] -for te_item in te: - temp.append((te_item[1], te_item[2])) -temp_np = np.array(temp) - -plt.semilogy(temp_np[:, 0], (1.0 - (temp_np[:, 1] / 10000)) * 100) -plt.xlabel("Epochs") -plt.ylabel("Error [%]") -plt.savefig("Error.png") -plt.show() diff --git a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/run.py b/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/run.py deleted file mode 100644 index 8b6360d..0000000 --- a/DATA_FASHION_MNIST/PyTorch_Non_Spike_Network/run.py +++ /dev/null @@ -1,203 +0,0 @@ -# %% -import torch -from Dataset import DatasetFashionMNIST -from Parameter import Config -import torchvision as tv # type: ignore - -# Some parameters - -cfg = Config() - -input_number_of_channel: int = 1 -input_dim_x: int = 24 -input_dim_y: int = 24 - -number_of_output_channels_conv1: int = 32 -number_of_output_channels_conv2: int = 64 -number_of_output_channels_flatten1: int = 576 -number_of_output_channels_full1: int = 10 - -kernel_size_conv1: tuple[int, int] = (5, 5) -kernel_size_pool1: tuple[int, int] = (2, 2) -kernel_size_conv2: tuple[int, int] = (5, 5) -kernel_size_pool2: tuple[int, int] = (2, 2) - -stride_conv1: tuple[int, int] = (1, 1) -stride_pool1: tuple[int, int] = (2, 2) -stride_conv2: tuple[int, int] = (1, 1) -stride_pool2: tuple[int, int] = (2, 2) - -padding_conv1: int = 0 -padding_pool1: int = 0 -padding_conv2: int = 0 -padding_pool2: int = 0 - -network = torch.nn.Sequential( - torch.nn.Conv2d( - in_channels=input_number_of_channel, - out_channels=number_of_output_channels_conv1, - kernel_size=kernel_size_conv1, - stride=stride_conv1, - padding=padding_conv1, - ), - torch.nn.ReLU(), - torch.nn.MaxPool2d( - kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1 - ), - torch.nn.Conv2d( - in_channels=number_of_output_channels_conv1, - out_channels=number_of_output_channels_conv2, - kernel_size=kernel_size_conv2, - stride=stride_conv2, - padding=padding_conv2, - ), - torch.nn.ReLU(), - torch.nn.MaxPool2d( - kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2 - ), - torch.nn.Flatten( - start_dim=1, - ), - torch.nn.Linear( - in_features=number_of_output_channels_flatten1, - out_features=number_of_output_channels_full1, - bias=True, - ), - torch.nn.Softmax(dim=1), -) -# %% -path_pattern: str = "./DATA_FASHION_MNIST/" -path_label: str = "./DATA_FASHION_MNIST/" - -dataset_train = DatasetFashionMNIST( - train=True, path_pattern=path_pattern, path_label=path_label -) -dataset_test = DatasetFashionMNIST( - train=False, path_pattern=path_pattern, path_label=path_label -) -cfg.image_statistics.mean = dataset_train.mean -# The basic size -cfg.image_statistics.the_size = [ - dataset_train.pattern_storage.shape[2], - dataset_train.pattern_storage.shape[3], -] -# Minus the stuff we cut away in the pattern filter -cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel -cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel - - -batch_size_train: int = 100 -batch_size_test: int = 100 - - -train_data_load = torch.utils.data.DataLoader( - dataset_train, batch_size=batch_size_train, shuffle=True -) - -test_data_load = torch.utils.data.DataLoader( - dataset_test, batch_size=batch_size_test, shuffle=False -) - -transforms_test: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), -) -scripted_transforms_test = torch.jit.script(transforms_test) - -transforms_train: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), -) -scripted_transforms_train = torch.jit.script(transforms_train) -# %% -# The optimizer -optimizer = torch.optim.Adam(network.parameters(), lr=0.001) -# The LR Scheduler -lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75) - -# %% -number_of_test_pattern: int = dataset_test.__len__() -number_of_train_pattern: int = dataset_train.__len__() - -number_of_epoch: int = 200 - -# %% -import time -from torch.utils.tensorboard import SummaryWriter - -tb = SummaryWriter() - -# %% -loss_function = torch.nn.CrossEntropyLoss() - -for epoch_id in range(0, number_of_epoch): - print(f"Epoch: {epoch_id}") - t_start: float = time.perf_counter() - - train_loss: float = 0.0 - train_correct: int = 0 - train_number: int = 0 - test_correct: int = 0 - test_number: int = 0 - - # Switch the network into training mode - network.train() - - # This runs in total for one epoch split up into mini-batches - for image, target in train_data_load: - - # Clean the gradient - optimizer.zero_grad() - - output = network(scripted_transforms_train(image)) - - loss = loss_function(output, target) - - train_loss += loss.item() - train_correct += (output.argmax(dim=1) == target).sum().numpy() - train_number += target.shape[0] - # Calculate backprop - loss.backward() - - # Update the parameter - optimizer.step() - - # Update the learning rate - lr_scheduler.step(train_loss) - - t_training: float = time.perf_counter() - - # Switch the network into evalution mode - network.eval() - with torch.no_grad(): - for image, target in test_data_load: - - output = network(scripted_transforms_test(image)) - - test_correct += (output.argmax(dim=1) == target).sum().numpy() - test_number += target.shape[0] - - t_testing = time.perf_counter() - - perfomance_test_correct: float = 100.0 * test_correct / test_number - perfomance_train_correct: float = 100.0 * train_correct / train_number - - tb.add_scalar("Train Loss", train_loss, epoch_id) - tb.add_scalar("Train Number Correct", train_correct, epoch_id) - tb.add_scalar("Test Number Correct", test_correct, epoch_id) - - print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%") - print(f"Testing: Correct={perfomance_test_correct:.2f}%") - print( - f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec" - ) - torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt") - print() - -# %% -tb.close() diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/Error.png b/DATA_MNIST/PyTorch_Non_Spike_Network/Error.png deleted file mode 100644 index ee326fe..0000000 Binary files a/DATA_MNIST/PyTorch_Non_Spike_Network/Error.png and /dev/null differ diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/Model_MNIST_A_49.pt.gz b/DATA_MNIST/PyTorch_Non_Spike_Network/Model_MNIST_A_49.pt.gz deleted file mode 100644 index 818da94..0000000 Binary files a/DATA_MNIST/PyTorch_Non_Spike_Network/Model_MNIST_A_49.pt.gz and /dev/null differ diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/Parameter.py b/DATA_MNIST/PyTorch_Non_Spike_Network/Parameter.py deleted file mode 100644 index 92fe247..0000000 --- a/DATA_MNIST/PyTorch_Non_Spike_Network/Parameter.py +++ /dev/null @@ -1,164 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -# %% -from dataclasses import dataclass, field -import numpy as np -import torch -import os - - -@dataclass -class Network: - """Parameters of the network. The details about - its layers and the number of output neurons.""" - - number_of_output_neurons: int = field(default=0) - forward_kernel_size: list[list[int]] = field(default_factory=list) - forward_neuron_numbers: list[list[int]] = field(default_factory=list) - strides: list[list[int]] = field(default_factory=list) - dilation: list[list[int]] = field(default_factory=list) - padding: list[list[int]] = field(default_factory=list) - is_pooling_layer: list[bool] = field(default_factory=list) - w_trainable: list[bool] = field(default_factory=list) - eps_xy_trainable: list[bool] = field(default_factory=list) - eps_xy_mean: list[bool] = field(default_factory=list) - - -@dataclass -class LearningParameters: - """Parameter required for training""" - - loss_coeffs_mse: float = field(default=0.5) - loss_coeffs_kldiv: float = field(default=1.0) - learning_rate_gamma_w: float = field(default=-1.0) - learning_rate_gamma_eps_xy: float = field(default=-1.0) - learning_rate_threshold_w: float = field(default=0.00001) - learning_rate_threshold_eps_xy: float = field(default=0.00001) - learning_active: bool = field(default=True) - weight_noise_amplitude: float = field(default=0.01) - eps_xy_intitial: float = field(default=0.1) - test_every_x_learning_steps: int = field(default=50) - test_during_learning: bool = field(default=True) - lr_scheduler_factor: float = field(default=0.75) - lr_scheduler_patience: int = field(default=10) - optimizer_name: str = field(default="Adam") - lr_schedule_name: str = field(default="ReduceLROnPlateau") - number_of_batches_for_one_update: int = field(default=1) - alpha_number_of_iterations: int = field(default=0) - overload_path: str = field(default="./Previous") - - -@dataclass -class Augmentation: - """Parameters used for data augmentation.""" - - crop_width_in_pixel: int = field(default=2) - flip_p: float = field(default=0.5) - jitter_brightness: float = field(default=0.5) - jitter_contrast: float = field(default=0.1) - jitter_saturation: float = field(default=0.1) - jitter_hue: float = field(default=0.15) - - -@dataclass -class ImageStatistics: - """(Statistical) information about the input. i.e. - mean values and the x and y size of the input""" - - mean: list[float] = field(default_factory=list) - the_size: list[int] = field(default_factory=list) - - -@dataclass -class Config: - """Master config class.""" - - # Sub classes - network_structure: Network = field(default_factory=Network) - learning_parameters: LearningParameters = field(default_factory=LearningParameters) - augmentation: Augmentation = field(default_factory=Augmentation) - image_statistics: ImageStatistics = field(default_factory=ImageStatistics) - - batch_size: int = field(default=500) - data_mode: str = field(default="") - - learning_step: int = field(default=0) - learning_step_max: int = field(default=10000) - - number_of_cpu_processes: int = field(default=-1) - - number_of_spikes: int = field(default=0) - cooldown_after_number_of_spikes: int = field(default=0) - - weight_path: str = field(default="./Weights/") - eps_xy_path: str = field(default="./EpsXY/") - data_path: str = field(default="./") - - reduction_cooldown: float = field(default=25.0) - epsilon_0: float = field(default=1.0) - - update_after_x_batch: float = field(default=1.0) - - def __post_init__(self) -> None: - """Post init determines the number of cores. - Creates the required directory and gives us an optimized - (for the amount of cores) batch size.""" - number_of_cpu_processes_temp = os.cpu_count() - - if self.number_of_cpu_processes < 1: - if number_of_cpu_processes_temp is None: - self.number_of_cpu_processes = 1 - else: - self.number_of_cpu_processes = number_of_cpu_processes_temp - - os.makedirs(self.weight_path, exist_ok=True) - os.makedirs(self.eps_xy_path, exist_ok=True) - os.makedirs(self.data_path, exist_ok=True) - - self.batch_size = ( - self.batch_size // self.number_of_cpu_processes - ) * self.number_of_cpu_processes - - self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes)) - self.batch_size = int(self.batch_size) - - def get_epsilon_t(self): - """Generates the time series of the basic epsilon.""" - np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32) - np_epsilon_t[ - self.cooldown_after_number_of_spikes : self.number_of_spikes - ] /= self.reduction_cooldown - return torch.tensor(np_epsilon_t) - - def get_update_after_x_pattern(self): - """Tells us after how many pattern we need to update the weights.""" - return self.batch_size * self.update_after_x_batch diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/events.out.tfevents.1651325827.fedora.115860.0 b/DATA_MNIST/PyTorch_Non_Spike_Network/events.out.tfevents.1651325827.fedora.115860.0 deleted file mode 100644 index af6401f..0000000 Binary files a/DATA_MNIST/PyTorch_Non_Spike_Network/events.out.tfevents.1651325827.fedora.115860.0 and /dev/null differ diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/info.md b/DATA_MNIST/PyTorch_Non_Spike_Network/info.md deleted file mode 100644 index f08af5c..0000000 --- a/DATA_MNIST/PyTorch_Non_Spike_Network/info.md +++ /dev/null @@ -1 +0,0 @@ -Performance reached (test data correct classifications): 99.26% diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/plot.py b/DATA_MNIST/PyTorch_Non_Spike_Network/plot.py deleted file mode 100644 index 1ef1a33..0000000 --- a/DATA_MNIST/PyTorch_Non_Spike_Network/plot.py +++ /dev/null @@ -1,31 +0,0 @@ -import os - -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" - -import numpy as np -import matplotlib.pyplot as plt -from tensorboard.backend.event_processing import event_accumulator - -filename: str = "events.out.tfevents.1651325827.fedora.115860.0" - -acc = event_accumulator.EventAccumulator(filename) -acc.Reload() - -# What is available? -# available_scalar = acc.Tags()["scalars"] -# print("Available Scalars") -# print(available_scalar) - -which_scalar: str = "Test Number Correct" -te = acc.Scalars(which_scalar) - -temp: list = [] -for te_item in te: - temp.append((te_item[1], te_item[2])) -temp_np = np.array(temp) - -plt.semilogy(temp_np[:, 0], (1.0 - (temp_np[:, 1] / 10000)) * 100) -plt.xlabel("Epochs") -plt.ylabel("Error [%]") -plt.savefig("Error.png") -plt.show() diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/run.py b/DATA_MNIST/PyTorch_Non_Spike_Network/run.py deleted file mode 100644 index 1c625d2..0000000 --- a/DATA_MNIST/PyTorch_Non_Spike_Network/run.py +++ /dev/null @@ -1,180 +0,0 @@ -# %% -import torch -from Dataset import DatasetMNIST - -# Some parameters -input_number_of_channel: int = 1 -input_dim_x: int = 24 -input_dim_y: int = 24 - -number_of_output_channels_conv1: int = 32 -number_of_output_channels_conv2: int = 64 -number_of_output_channels_flatten1: int = 576 -number_of_output_channels_full1: int = 10 - -kernel_size_conv1: tuple[int, int] = (5, 5) -kernel_size_pool1: tuple[int, int] = (2, 2) -kernel_size_conv2: tuple[int, int] = (5, 5) -kernel_size_pool2: tuple[int, int] = (2, 2) - -stride_conv1: tuple[int, int] = (1, 1) -stride_pool1: tuple[int, int] = (2, 2) -stride_conv2: tuple[int, int] = (1, 1) -stride_pool2: tuple[int, int] = (2, 2) - -padding_conv1: int = 0 -padding_pool1: int = 0 -padding_conv2: int = 0 -padding_pool2: int = 0 - -network = torch.nn.Sequential( - torch.nn.Conv2d( - in_channels=input_number_of_channel, - out_channels=number_of_output_channels_conv1, - kernel_size=kernel_size_conv1, - stride=stride_conv1, - padding=padding_conv1, - ), - torch.nn.ReLU(), - torch.nn.MaxPool2d( - kernel_size=kernel_size_pool1, stride=stride_pool1, padding=padding_pool1 - ), - torch.nn.Conv2d( - in_channels=number_of_output_channels_conv1, - out_channels=number_of_output_channels_conv2, - kernel_size=kernel_size_conv2, - stride=stride_conv2, - padding=padding_conv2, - ), - torch.nn.ReLU(), - torch.nn.MaxPool2d( - kernel_size=kernel_size_pool2, stride=stride_pool2, padding=padding_pool2 - ), - torch.nn.Flatten( - start_dim=1, - ), - torch.nn.Linear( - in_features=number_of_output_channels_flatten1, - out_features=number_of_output_channels_full1, - bias=True, - ), - torch.nn.Softmax(dim=1), -) -# %% -import torchvision - -test_processing_chain = torchvision.transforms.Compose( - transforms=[torchvision.transforms.CenterCrop((24, 24))], -) -train_processing_chain = torchvision.transforms.Compose( - transforms=[torchvision.transforms.RandomCrop((24, 24))], -) - -path_pattern: str = "./DATA_MNIST/" -path_label: str = "./DATA_MNIST/" - -dataset_train = DatasetMNIST( - train=True, path_pattern=path_pattern, path_label=path_label -) -dataset_test = DatasetMNIST( - train=False, path_pattern=path_pattern, path_label=path_label -) -batch_size_train: int = 100 -batch_size_test: int = 100 - - -train_data_load = torch.utils.data.DataLoader( - dataset_train, batch_size=batch_size_train, shuffle=True -) - -test_data_load = torch.utils.data.DataLoader( - dataset_test, batch_size=batch_size_test, shuffle=False -) - -# %% -# The optimizer -optimizer = torch.optim.Adam(network.parameters(), lr=0.001) -# The LR Scheduler -lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.75) - -# %% -number_of_test_pattern: int = dataset_test.__len__() -number_of_train_pattern: int = dataset_train.__len__() - -number_of_epoch: int = 50 - -# %% -import time -from torch.utils.tensorboard import SummaryWriter - -tb = SummaryWriter() - -# %% -loss_function = torch.nn.CrossEntropyLoss() - -for epoch_id in range(0, number_of_epoch): - print(f"Epoch: {epoch_id}") - t_start: float = time.perf_counter() - - train_loss: float = 0.0 - train_correct: int = 0 - train_number: int = 0 - test_correct: int = 0 - test_number: int = 0 - - # Switch the network into training mode - network.train() - - # This runs in total for one epoch split up into mini-batches - for image, target in train_data_load: - - # Clean the gradient - optimizer.zero_grad() - - output = network(train_processing_chain(image)) - - loss = loss_function(output, target) - - train_loss += loss.item() - train_correct += (output.argmax(dim=1) == target).sum().numpy() - train_number += target.shape[0] - # Calculate backprop - loss.backward() - - # Update the parameter - optimizer.step() - - # Update the learning rate - lr_scheduler.step(train_loss) - - t_training: float = time.perf_counter() - - # Switch the network into evalution mode - network.eval() - with torch.no_grad(): - for image, target in test_data_load: - - output = network(test_processing_chain(image)) - - test_correct += (output.argmax(dim=1) == target).sum().numpy() - test_number += target.shape[0] - - t_testing = time.perf_counter() - - perfomance_test_correct: float = 100.0 * test_correct / test_number - perfomance_train_correct: float = 100.0 * train_correct / train_number - - tb.add_scalar("Train Loss", train_loss, epoch_id) - tb.add_scalar("Train Number Correct", train_correct, epoch_id) - tb.add_scalar("Test Number Correct", test_correct, epoch_id) - - print(f"Training: Loss={train_loss:.5f} Correct={perfomance_train_correct:.2f}%") - print(f"Testing: Correct={perfomance_test_correct:.2f}%") - print( - f"Time: Training={(t_training-t_start):.1f}sec, Testing={(t_testing-t_training):.1f}sec" - ) - torch.save(network, "Model_MNIST_A_" + str(epoch_id) + ".pt") - print() - -# %% -tb.close() diff --git a/Dataset.py b/Dataset.py deleted file mode 100644 index 8808ddb..0000000 --- a/Dataset.py +++ /dev/null @@ -1,455 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -from abc import ABC, abstractmethod -import torch -import numpy as np -import torchvision as tv # type: ignore -from Parameter import Config - - -class DatasetMaster(torch.utils.data.Dataset, ABC): - - path_label: str - label_storage: np.ndarray - pattern_storage: np.ndarray - number_of_pattern: int - mean: list[float] - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__() - - if train is True: - self.label_storage = np.load(path_label + "/TrainLabelStorage.npy") - else: - self.label_storage = np.load(path_label + "/TestLabelStorage.npy") - - if train is True: - self.pattern_storage = np.load(path_pattern + "/TrainPatternStorage.npy") - else: - self.pattern_storage = np.load(path_pattern + "/TestPatternStorage.npy") - - self.number_of_pattern = self.label_storage.shape[0] - - self.mean = [] - - def __len__(self) -> int: - return self.number_of_pattern - - # Get one pattern at position index - @abstractmethod - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - pass - - @abstractmethod - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - pass - - @abstractmethod - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - pass - - -class DatasetMNIST(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32) - ) - - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, 0:1, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - if cfg.augmentation.use_on_off_filter is True: - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - else: - gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps - - return gray - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - if cfg.augmentation.use_on_off_filter is True: - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - else: - gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps - - return gray - - -class DatasetFashionMNIST(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - self.pattern_storage[:, np.newaxis, :, :].astype(dtype=np.float32) - ) - - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, 0:1, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - if cfg.augmentation.use_on_off_filter is True: - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - else: - gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps - - return gray - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. on/off filteres - 3. returned. - - This is a 1 channel version (e.g. one gray channel). - """ - - assert len(cfg.image_statistics.mean) == 1 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - if cfg.augmentation.use_on_off_filter is True: - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) - else: - gray = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps - - return gray - - -class DatasetCIFAR(DatasetMaster): - """Contstructor""" - - # Initialize - def __init__( - self, - train: bool = False, - path_pattern: str = "./", - path_label: str = "./", - ) -> None: - super().__init__(train, path_pattern, path_label) - - self.pattern_storage = np.ascontiguousarray( - np.moveaxis(self.pattern_storage.astype(dtype=np.float32), 3, 1) - ) - self.pattern_storage /= np.max(self.pattern_storage) - - mean = self.pattern_storage.mean(3).mean(2).mean(0) - self.mean = [*mean] - - def __getitem__(self, index: int) -> tuple[torch.Tensor, int]: - - image = self.pattern_storage[index, :, :, :] - target = int(self.label_storage[index]) - return torch.tensor(image), target - - def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The test image comes in - 1. is center cropped - 2. on/off filteres - 3. returned. - - This is a 3 channel version (e.g. r,g,b channels). - """ - - assert len(cfg.image_statistics.mean) == 3 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.CenterCrop(size=cfg.image_statistics.the_size), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - if cfg.augmentation.use_on_off_filter is True: - my_on_off_filter_r: OnOffFilter = OnOffFilter( - p=cfg.image_statistics.mean[0] - ) - my_on_off_filter_g: OnOffFilter = OnOffFilter( - p=cfg.image_statistics.mean[1] - ) - my_on_off_filter_b: OnOffFilter = OnOffFilter( - p=cfg.image_statistics.mean[2] - ) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) - else: - r = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps - g = pattern[:, 1:2, :, :] + torch.finfo(torch.float32).eps - b = pattern[:, 2:3, :, :] + torch.finfo(torch.float32).eps - - new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) - return new_tensor - - def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: - """0. The training image comes in - 1. is cropped from a random position - 2. is randomly horizontally flipped - 3. is randomly color jitteres - 4. on/off filteres - 5. returned. - - This is a 3 channel version (e.g. r,g,b channels). - """ - assert len(cfg.image_statistics.mean) == 3 - assert len(cfg.image_statistics.the_size) == 2 - assert cfg.image_statistics.the_size[0] > 0 - assert cfg.image_statistics.the_size[1] > 0 - - # Transformation chain - my_transforms: torch.nn.Sequential = torch.nn.Sequential( - tv.transforms.RandomCrop(size=cfg.image_statistics.the_size), - tv.transforms.RandomHorizontalFlip(p=cfg.augmentation.flip_p), - tv.transforms.ColorJitter( - brightness=cfg.augmentation.jitter_brightness, - contrast=cfg.augmentation.jitter_contrast, - saturation=cfg.augmentation.jitter_saturation, - hue=cfg.augmentation.jitter_hue, - ), - ) - scripted_transforms = torch.jit.script(my_transforms) - - # Preprocess the input data - pattern = scripted_transforms(pattern) - - # => On/Off - if cfg.augmentation.use_on_off_filter is True: - my_on_off_filter_r: OnOffFilter = OnOffFilter( - p=cfg.image_statistics.mean[0] - ) - my_on_off_filter_g: OnOffFilter = OnOffFilter( - p=cfg.image_statistics.mean[1] - ) - my_on_off_filter_b: OnOffFilter = OnOffFilter( - p=cfg.image_statistics.mean[2] - ) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) - else: - r = pattern[:, 0:1, :, :] + torch.finfo(torch.float32).eps - g = pattern[:, 1:2, :, :] + torch.finfo(torch.float32).eps - b = pattern[:, 2:3, :, :] + torch.finfo(torch.float32).eps - - new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) - return new_tensor - - -class OnOffFilter(torch.nn.Module): - def __init__(self, p: float = 0.5) -> None: - super(OnOffFilter, self).__init__() - self.p: float = p - - def forward(self, tensor: torch.Tensor) -> torch.Tensor: - - assert tensor.shape[1] == 1 - - tensor_clone = 2.0 * (tensor - self.p) - - temp_0: torch.Tensor = torch.where( - tensor_clone < 0.0, - -tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - temp_1: torch.Tensor = torch.where( - tensor_clone >= 0.0, - tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1) - - return new_tensor - - def __repr__(self) -> str: - return self.__class__.__name__ + "(p={0})".format(self.p) - - -if __name__ == "__main__": - pass diff --git a/PyHDynamicCNNManyIP.pyi b/PyHDynamicCNNManyIP.pyi deleted file mode 100644 index d5d1f6f..0000000 --- a/PyHDynamicCNNManyIP.pyi +++ /dev/null @@ -1,18 +0,0 @@ -# -# AUTOMATICALLY GENERATED FILE, DO NOT EDIT! -# - -"""HDynamicCNNManyIP Module""" -from __future__ import annotations -import PyHDynamicCNNManyIP -import typing - -__all__ = [ - "HDynamicCNNManyIP" -] - - -class HDynamicCNNManyIP(): - def __init__(self) -> None: ... - def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int) -> bool: ... - pass diff --git a/README.md b/README.md index b1d1837..4c3eebb 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ It was programmed with 3.10.4. And I used some 3.10 Python expression. Thus you # C++ You need to modify the Makefile in the C++ directory to your Python installation. -In addition yor Python installation needs the PyBind11 package installed. You might want to perform a + +In addition your Python installation needs the PyBind11 package installed. You might want to perform a pip install pybind11 The Makefile uses clang as a compiler. If you want something else then you need to change the Makefile. - - +For CUDA I used version 12.0. diff --git a/SbS.py b/SbS.py deleted file mode 100644 index 59cd9fe..0000000 --- a/SbS.py +++ /dev/null @@ -1,1291 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -# %% -import torch -import numpy as np - -try: - import PySpikeGeneration2DManyIP - - cpp_spike: bool = True -except Exception: - cpp_spike = False - -try: - import PyHDynamicCNNManyIP - - cpp_sbs: bool = True -except Exception: - cpp_sbs = False - - -class SbS(torch.nn.Module): - - _epsilon_xy: torch.nn.parameter.Parameter - _epsilon_xy_exists: bool = False - _epsilon_0: torch.Tensor | None = None - _epsilon_t: torch.Tensor | None = None - _weights: torch.nn.parameter.Parameter - _weights_exists: bool = False - _kernel_size: torch.Tensor | None = None - _stride: torch.Tensor | None = None - _dilation: torch.Tensor | None = None - _padding: torch.Tensor | None = None - _output_size: torch.Tensor | None = None - _number_of_spikes: torch.Tensor | None = None - _number_of_cpu_processes: torch.Tensor | None = None - _number_of_neurons: torch.Tensor | None = None - _number_of_input_neurons: torch.Tensor | None = None - _h_initial: torch.Tensor | None = None - _epsilon_xy_backup: torch.Tensor | None = None - _weights_backup: torch.Tensor | None = None - _alpha_number_of_iterations: torch.Tensor | None = None - - def __init__( - self, - number_of_input_neurons: int, - number_of_neurons: int, - input_size: list[int], - forward_kernel_size: list[int], - number_of_spikes: int, - epsilon_t: torch.Tensor, - epsilon_xy_intitial: float = 0.1, - epsilon_0: float = 1.0, - weight_noise_amplitude: float = 0.01, - is_pooling_layer: bool = False, - strides: list[int] = [1, 1], - dilation: list[int] = [0, 0], - padding: list[int] = [0, 0], - alpha_number_of_iterations: int = 0, - number_of_cpu_processes: int = 1, - ) -> None: - """Constructor""" - super().__init__() - - self.stride = torch.tensor(strides, dtype=torch.int64) - - self.dilation = torch.tensor(dilation, dtype=torch.int64) - - self.padding = torch.tensor(padding, dtype=torch.int64) - - self.kernel_size = torch.tensor( - forward_kernel_size, - dtype=torch.int64, - ) - - self.number_of_input_neurons = torch.tensor( - number_of_input_neurons, - dtype=torch.int64, - ) - - self.number_of_neurons = torch.tensor( - number_of_neurons, - dtype=torch.int64, - ) - - self.alpha_number_of_iterations = torch.tensor( - alpha_number_of_iterations, dtype=torch.int64 - ) - - self.calculate_output_size(torch.tensor(input_size, dtype=torch.int64)) - - self.set_h_init_to_uniform() - - self.initialize_epsilon_xy(epsilon_xy_intitial) - - self.epsilon_0 = torch.tensor(epsilon_0, dtype=torch.float32) - - self.number_of_cpu_processes = torch.tensor( - number_of_cpu_processes, dtype=torch.int64 - ) - - self.number_of_spikes = torch.tensor(number_of_spikes, dtype=torch.int64) - - self.epsilon_t = epsilon_t.type(dtype=torch.float32) - - self.initialize_weights( - is_pooling_layer=is_pooling_layer, - noise_amplitude=weight_noise_amplitude, - ) - - self.functional_sbs = FunctionalSbS.apply - - #################################################################### - # Variables in and out # - #################################################################### - - @property - def epsilon_xy(self) -> torch.Tensor | None: - if self._epsilon_xy_exists is False: - return None - else: - return self._epsilon_xy - - @epsilon_xy.setter - def epsilon_xy(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 4 - assert value.dtype == torch.float32 - if self._epsilon_xy_exists is False: - self._epsilon_xy = torch.nn.parameter.Parameter( - value.detach().clone(memory_format=torch.contiguous_format), - requires_grad=True, - ) - self._epsilon_xy_exists = True - else: - self._epsilon_xy.data = value.detach().clone( - memory_format=torch.contiguous_format - ) - - @property - def epsilon_0(self) -> torch.Tensor | None: - return self._epsilon_0 - - @epsilon_0.setter - def epsilon_0(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert torch.numel(value) == 1 - assert value.dtype == torch.float32 - assert value.item() > 0 - self._epsilon_0 = value.detach().clone(memory_format=torch.contiguous_format) - self._epsilon_0.requires_grad_(False) - - @property - def epsilon_t(self) -> torch.Tensor | None: - return self._epsilon_t - - @epsilon_t.setter - def epsilon_t(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert value.dtype == torch.float32 - self._epsilon_t = value.detach().clone(memory_format=torch.contiguous_format) - self._epsilon_t.requires_grad_(False) - - @property - def weights(self) -> torch.Tensor | None: - if self._weights_exists is False: - return None - else: - return self._weights - - @weights.setter - def weights(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 2 - assert value.dtype == torch.float32 - temp: torch.Tensor = value.detach().clone(memory_format=torch.contiguous_format) - temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float32) - if self._weights_exists is False: - self._weights = torch.nn.parameter.Parameter( - temp, - requires_grad=True, - ) - self._weights_exists = True - else: - self._weights.data = temp - - @property - def kernel_size(self) -> torch.Tensor | None: - return self._kernel_size - - @kernel_size.setter - def kernel_size(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert torch.numel(value) == 2 - assert value.dtype == torch.int64 - assert value[0] > 0 - assert value[1] > 0 - self._kernel_size = value.detach().clone(memory_format=torch.contiguous_format) - self._kernel_size.requires_grad_(False) - - @property - def stride(self) -> torch.Tensor | None: - return self._stride - - @stride.setter - def stride(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert torch.numel(value) == 2 - assert value.dtype == torch.int64 - assert value[0] > 0 - assert value[1] > 0 - self._stride = value.detach().clone(memory_format=torch.contiguous_format) - self._stride.requires_grad_(False) - - @property - def dilation(self) -> torch.Tensor | None: - return self._dilation - - @dilation.setter - def dilation(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert torch.numel(value) == 2 - assert value.dtype == torch.int64 - assert value[0] > 0 - assert value[1] > 0 - self._dilation = value.detach().clone(memory_format=torch.contiguous_format) - self._dilation.requires_grad_(False) - - @property - def padding(self) -> torch.Tensor | None: - return self._padding - - @padding.setter - def padding(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert torch.numel(value) == 2 - assert value.dtype == torch.int64 - assert value[0] >= 0 - assert value[1] >= 0 - self._padding = value.detach().clone(memory_format=torch.contiguous_format) - self._padding.requires_grad_(False) - - @property - def output_size(self) -> torch.Tensor | None: - return self._output_size - - @output_size.setter - def output_size(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert torch.numel(value) == 2 - assert value.dtype == torch.int64 - assert value[0] > 0 - assert value[1] > 0 - self._output_size = value.detach().clone(memory_format=torch.contiguous_format) - self._output_size.requires_grad_(False) - - @property - def number_of_spikes(self) -> torch.Tensor | None: - return self._number_of_spikes - - @number_of_spikes.setter - def number_of_spikes(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert torch.numel(value) == 1 - assert value.dtype == torch.int64 - assert value.item() > 0 - self._number_of_spikes = value.detach().clone( - memory_format=torch.contiguous_format - ) - self._number_of_spikes.requires_grad_(False) - - @property - def number_of_cpu_processes(self) -> torch.Tensor | None: - return self._number_of_cpu_processes - - @number_of_cpu_processes.setter - def number_of_cpu_processes(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert torch.numel(value) == 1 - assert value.dtype == torch.int64 - assert value.item() > 0 - self._number_of_cpu_processes = value.detach().clone( - memory_format=torch.contiguous_format - ) - self._number_of_cpu_processes.requires_grad_(False) - - @property - def number_of_neurons(self) -> torch.Tensor | None: - return self._number_of_neurons - - @number_of_neurons.setter - def number_of_neurons(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert torch.numel(value) == 1 - assert value.dtype == torch.int64 - assert value.item() > 0 - self._number_of_neurons = value.detach().clone( - memory_format=torch.contiguous_format - ) - self._number_of_neurons.requires_grad_(False) - - @property - def number_of_input_neurons(self) -> torch.Tensor | None: - return self._number_of_input_neurons - - @number_of_input_neurons.setter - def number_of_input_neurons(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert torch.numel(value) == 1 - assert value.dtype == torch.int64 - assert value.item() > 0 - self._number_of_input_neurons = value.detach().clone( - memory_format=torch.contiguous_format - ) - self._number_of_input_neurons.requires_grad_(False) - - @property - def h_initial(self) -> torch.Tensor | None: - return self._h_initial - - @h_initial.setter - def h_initial(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert value.dtype == torch.float32 - self._h_initial = value.detach().clone(memory_format=torch.contiguous_format) - self._h_initial.requires_grad_(False) - - @property - def alpha_number_of_iterations(self) -> torch.Tensor | None: - return self._alpha_number_of_iterations - - @alpha_number_of_iterations.setter - def alpha_number_of_iterations(self, value: torch.Tensor): - assert value is not None - assert torch.is_tensor(value) is True - assert torch.numel(value) == 1 - assert value.dtype == torch.int64 - assert value.item() >= 0 - self._alpha_number_of_iterations = value.detach().clone( - memory_format=torch.contiguous_format - ) - self._alpha_number_of_iterations.requires_grad_(False) - - #################################################################### - # Forward # - #################################################################### - - def forward(self, input: torch.Tensor) -> torch.Tensor: - """PyTorch Forward method. Does the work.""" - - # Are we happy with the input? - assert input is not None - assert torch.is_tensor(input) is True - assert input.dim() == 4 - assert input.dtype == torch.float32 - - # Are we happy with the rest of the network? - assert self._epsilon_xy_exists is True - assert self._epsilon_xy is not None - assert self._epsilon_0 is not None - assert self._epsilon_t is not None - assert self._weights_exists is True - assert self._weights is not None - assert self._kernel_size is not None - assert self._stride is not None - assert self._dilation is not None - assert self._padding is not None - assert self._output_size is not None - assert self._number_of_spikes is not None - assert self._number_of_cpu_processes is not None - assert self._h_initial is not None - assert self._alpha_number_of_iterations is not None - - # SbS forward functional - return self.functional_sbs( - input, - self._epsilon_xy, - self._epsilon_0, - self._epsilon_t, - self._weights, - self._kernel_size, - self._stride, - self._dilation, - self._padding, - self._output_size, - self._number_of_spikes, - self._number_of_cpu_processes, - self._h_initial, - self._alpha_number_of_iterations, - ) - - #################################################################### - # Helper functions # - #################################################################### - - def calculate_output_size(self, value: torch.Tensor) -> None: - - coordinates_0, coordinates_1 = self._get_coordinates(value) - - self._output_size: torch.Tensor = torch.tensor( - [ - coordinates_0.shape[1], - coordinates_1.shape[1], - ], - dtype=torch.int64, - ) - self._output_size.requires_grad_(False) - - def _get_coordinates( - self, value: torch.Tensor - ) -> tuple[torch.Tensor, torch.Tensor]: - """Function converts parameter in coordinates - for the convolution window""" - - assert value is not None - assert torch.is_tensor(value) is True - assert value.dim() == 1 - assert torch.numel(value) == 2 - assert value.dtype == torch.int64 - assert value[0] > 0 - assert value[1] > 0 - - assert self._kernel_size is not None - assert self._stride is not None - assert self._dilation is not None - assert self._padding is not None - - assert torch.numel(self._kernel_size) == 2 - assert torch.numel(self._stride) == 2 - assert torch.numel(self._dilation) == 2 - assert torch.numel(self._padding) == 2 - - unfold_0: torch.nn.Unfold = torch.nn.Unfold( - kernel_size=(int(self._kernel_size[0]), 1), - dilation=int(self._dilation[0]), - padding=int(self._padding[0]), - stride=int(self._stride[0]), - ) - - unfold_1: torch.nn.Unfold = torch.nn.Unfold( - kernel_size=(1, int(self._kernel_size[1])), - dilation=int(self._dilation[1]), - padding=int(self._padding[1]), - stride=int(self._stride[1]), - ) - - coordinates_0: torch.Tensor = ( - unfold_0( - torch.unsqueeze( - torch.unsqueeze( - torch.unsqueeze( - torch.arange(0, int(value[0]), dtype=torch.float32), - 1, - ), - 0, - ), - 0, - ) - ) - .squeeze(0) - .type(torch.int64) - ) - - coordinates_1: torch.Tensor = ( - unfold_1( - torch.unsqueeze( - torch.unsqueeze( - torch.unsqueeze( - torch.arange(0, int(value[1]), dtype=torch.float32), - 0, - ), - 0, - ), - 0, - ) - ) - .squeeze(0) - .type(torch.int64) - ) - - return coordinates_0, coordinates_1 - - def _initial_random_weights(self, noise_amplitude: torch.Tensor) -> torch.Tensor: - """Creates initial weights - Uniform plus random noise plus normalization - """ - - assert torch.numel(noise_amplitude) == 1 - assert noise_amplitude.item() >= 0 - assert noise_amplitude.dtype == torch.float32 - - assert self._number_of_neurons is not None - assert self._number_of_input_neurons is not None - assert self._kernel_size is not None - - weights = torch.empty( - ( - int(self._kernel_size[0]), - int(self._kernel_size[1]), - int(self._number_of_input_neurons), - int(self._number_of_neurons), - ), - dtype=torch.float32, - ) - torch.nn.init.uniform_(weights, a=1.0, b=(1.0 + noise_amplitude.item())) - - return weights - - def _make_pooling_weights(self) -> torch.Tensor: - """For generating the pooling weights.""" - - assert self._number_of_neurons is not None - assert self._kernel_size is not None - - norm: float = 1.0 / (self._kernel_size[0] * self._kernel_size[1]) - - weights: torch.Tensor = torch.zeros( - ( - int(self._kernel_size[0]), - int(self._kernel_size[1]), - int(self._number_of_neurons), - int(self._number_of_neurons), - ), - dtype=torch.float32, - ) - - for i in range(0, int(self._number_of_neurons)): - weights[:, :, i, i] = norm - - return weights - - def initialize_weights( - self, - is_pooling_layer: bool = False, - noise_amplitude: float = 0.01, - ) -> None: - """For the generation of the initital weights. - Switches between normal initial random weights and pooling weights.""" - - assert self._kernel_size is not None - - if is_pooling_layer is True: - weights = self._make_pooling_weights() - else: - weights = self._initial_random_weights( - torch.tensor(noise_amplitude, dtype=torch.float32) - ) - - weights = weights.moveaxis(-1, 0).moveaxis(-1, 1) - - weights_t = torch.nn.functional.unfold( - input=weights, - kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])), - dilation=(1, 1), - padding=(0, 0), - stride=(1, 1), - ).squeeze() - - weights_t = torch.moveaxis(weights_t, 0, 1) - - self.weights = weights_t - - def initialize_epsilon_xy( - self, - eps_xy_intitial: float, - ) -> None: - """Creates initial epsilon xy matrices""" - - assert self._output_size is not None - assert self._kernel_size is not None - assert eps_xy_intitial > 0 - - eps_xy_temp: torch.Tensor = torch.full( - ( - int(self._output_size[0]), - int(self._output_size[1]), - int(self._kernel_size[0]), - int(self._kernel_size[1]), - ), - eps_xy_intitial, - dtype=torch.float32, - ) - - self.epsilon_xy = eps_xy_temp - - def set_h_init_to_uniform(self) -> None: - - assert self._number_of_neurons is not None - - h_initial: torch.Tensor = torch.full( - (int(self._number_of_neurons.item()),), - (1.0 / float(self._number_of_neurons.item())), - dtype=torch.float32, - ) - - self.h_initial = h_initial - - # Epsilon XY - def backup_epsilon_xy(self) -> None: - assert self._epsilon_xy_exists is True - self._epsilon_xy_backup = self._epsilon_xy.data.clone() - - def restore_epsilon_xy(self) -> None: - assert self._epsilon_xy_backup is not None - assert self._epsilon_xy_exists is True - self._epsilon_xy.data = self._epsilon_xy_backup.clone() - - def mean_epsilon_xy(self) -> None: - assert self._epsilon_xy_exists is True - - fill_value: float = float(self._epsilon_xy.data.mean()) - self._epsilon_xy.data = torch.full_like( - self._epsilon_xy.data, fill_value, dtype=torch.float32 - ) - - def threshold_epsilon_xy(self, threshold: float) -> None: - assert self._epsilon_xy_exists is True - assert threshold >= 0 - torch.clamp( - self._epsilon_xy.data, - min=float(threshold), - max=None, - out=self._epsilon_xy.data, - ) - - # Weights - def backup_weights(self) -> None: - assert self._weights_exists is True - self._weights_backup = self._weights.data.clone() - - def restore_weights(self) -> None: - assert self._weights_backup is not None - assert self._weights_exists is True - self._weights.data = self._weights_backup.clone() - - def norm_weights(self) -> None: - assert self._weights_exists is True - temp: torch.Tensor = ( - self._weights.data.detach() - .clone(memory_format=torch.contiguous_format) - .type(dtype=torch.float32) - ) - temp /= temp.sum(dim=0, keepdim=True, dtype=torch.float32) - self._weights.data = temp - - def threshold_weights(self, threshold: float) -> None: - assert self._weights_exists is True - assert threshold >= 0 - torch.clamp( - self._weights.data, - min=float(threshold), - max=None, - out=self._weights.data, - ) - - -class FunctionalSbS(torch.autograd.Function): - @staticmethod - def forward( # type: ignore - ctx, - input: torch.Tensor, - epsilon_xy: torch.Tensor, - epsilon_0: torch.Tensor, - epsilon_t: torch.Tensor, - weights: torch.Tensor, - kernel_size: torch.Tensor, - stride: torch.Tensor, - dilation: torch.Tensor, - padding: torch.Tensor, - output_size: torch.Tensor, - number_of_spikes: torch.Tensor, - number_of_cpu_processes: torch.Tensor, - h_initial: torch.Tensor, - alpha_number_of_iterations: torch.Tensor, - ) -> torch.Tensor: - - torch.set_default_dtype(torch.float32) - - assert input.dim() == 4 - assert torch.numel(kernel_size) == 2 - assert torch.numel(dilation) == 2 - assert torch.numel(padding) == 2 - assert torch.numel(stride) == 2 - assert torch.numel(output_size) == 2 - - assert torch.numel(epsilon_0) == 1 - assert torch.numel(number_of_spikes) == 1 - assert torch.numel(number_of_cpu_processes) == 1 - assert torch.numel(alpha_number_of_iterations) == 1 - - input_size = torch.tensor([input.shape[2], input.shape[3]]) - - ############################################################ - # Pre convolving the input # - ############################################################ - - input_convolved_temp = torch.nn.functional.unfold( - input, - kernel_size=tuple(kernel_size.tolist()), - dilation=tuple(dilation.tolist()), - padding=tuple(padding.tolist()), - stride=tuple(stride.tolist()), - ) - - input_convolved = torch.nn.functional.fold( - input_convolved_temp, - output_size=tuple(output_size.tolist()), - kernel_size=(1, 1), - dilation=(1, 1), - padding=(0, 0), - stride=(1, 1), - ).requires_grad_(True) - - epsilon_xy_convolved: torch.Tensor = ( - ( - torch.nn.functional.unfold( - epsilon_xy.reshape( - ( - int(epsilon_xy.shape[0]) * int(epsilon_xy.shape[1]), - int(epsilon_xy.shape[2]), - int(epsilon_xy.shape[3]), - ) - ) - .unsqueeze(1) - .tile((1, input.shape[1], 1, 1)), - kernel_size=tuple(kernel_size.tolist()), - dilation=1, - padding=0, - stride=1, - ) - .squeeze(-1) - .reshape( - ( - int(epsilon_xy.shape[0]), - int(epsilon_xy.shape[1]), - int(input_convolved.shape[1]), - ) - ) - ) - .moveaxis(-1, 0) - .contiguous(memory_format=torch.contiguous_format) - ) - - ############################################################ - # Spike generation # - ############################################################ - - if cpp_spike is False: - # Alternative to the C++ module but 5x slower: - spikes = ( - ( - input_convolved.movedim(source=(2, 3), destination=(0, 1)) - .reshape( - shape=( - input_convolved.shape[2] - * input_convolved.shape[3] - * input_convolved.shape[0], - input_convolved.shape[1], - ) - ) - .multinomial( - num_samples=int(number_of_spikes.item()), replacement=True - ) - ) - .reshape( - shape=( - input_convolved.shape[2], - input_convolved.shape[3], - input_convolved.shape[0], - int(number_of_spikes.item()), - ) - ) - .movedim(source=(0, 1), destination=(2, 3)) - ).contiguous(memory_format=torch.contiguous_format) - else: - # Normalized cumsum - input_cumsum: torch.Tensor = torch.cumsum( - input_convolved, dim=1, dtype=torch.float32 - ) - input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1) - input_cumsum /= input_cumsum_last - - random_values = torch.rand( - size=[ - input_cumsum.shape[0], - int(number_of_spikes.item()), - input_cumsum.shape[2], - input_cumsum.shape[3], - ], - dtype=torch.float32, - ) - - spikes = torch.empty_like(random_values, dtype=torch.int64) - - # Prepare for Export (Pointer and stuff)-> - np_input: np.ndarray = input_cumsum.detach().numpy() - assert input_cumsum.dtype == torch.float32 - assert np_input.flags["C_CONTIGUOUS"] is True - assert np_input.ndim == 4 - - np_random_values: np.ndarray = random_values.detach().numpy() - assert random_values.dtype == torch.float32 - assert np_random_values.flags["C_CONTIGUOUS"] is True - assert np_random_values.ndim == 4 - - np_spikes: np.ndarray = spikes.detach().numpy() - assert spikes.dtype == torch.int64 - assert np_spikes.flags["C_CONTIGUOUS"] is True - assert np_spikes.ndim == 4 - # <- Prepare for Export - - spike_generation: PySpikeGeneration2DManyIP.SpikeGeneration2DManyIP = ( - PySpikeGeneration2DManyIP.SpikeGeneration2DManyIP() - ) - - spike_generation.spike_generation_multi_pattern( - np_input.__array_interface__["data"][0], - int(np_input.shape[0]), - int(np_input.shape[1]), - int(np_input.shape[2]), - int(np_input.shape[3]), - np_random_values.__array_interface__["data"][0], - int(np_random_values.shape[0]), - int(np_random_values.shape[1]), - int(np_random_values.shape[2]), - int(np_random_values.shape[3]), - np_spikes.__array_interface__["data"][0], - int(np_spikes.shape[0]), - int(np_spikes.shape[1]), - int(np_spikes.shape[2]), - int(np_spikes.shape[3]), - int(number_of_cpu_processes.item()), - ) - - ############################################################ - # H dynamic # - ############################################################ - - assert epsilon_t.ndim == 1 - assert epsilon_t.shape[0] >= number_of_spikes - - if cpp_sbs is False: - h = torch.tile( - h_initial.unsqueeze(0).unsqueeze(0).unsqueeze(0), - dims=[int(input.shape[0]), int(output_size[0]), int(output_size[1]), 1], - ) - - epsilon_scale: torch.Tensor = torch.ones( - size=[ - int(spikes.shape[0]), - int(spikes.shape[2]), - int(spikes.shape[3]), - 1, - ], - dtype=torch.float32, - ) - - for t in range(0, spikes.shape[1]): - - if epsilon_scale.max() > 1e10: - h /= epsilon_scale - epsilon_scale = torch.ones_like(epsilon_scale) - - h_temp: torch.Tensor = weights[spikes[:, t, :, :], :] * h - wx = 0 - wy = 0 - - if t == 0: - epsilon_temp: torch.Tensor = torch.empty( - ( - int(spikes.shape[0]), - int(spikes.shape[2]), - int(spikes.shape[3]), - ), - dtype=torch.float32, - ) - for wx in range(0, int(spikes.shape[2])): - for wy in range(0, int(spikes.shape[3])): - epsilon_temp[:, wx, wy] = epsilon_xy_convolved[ - spikes[:, t, wx, wy], wx, wy - ] - - epsilon_subsegment: torch.Tensor = ( - epsilon_temp.unsqueeze(-1) * epsilon_t[t] * epsilon_0 - ) - - h_temp_sum: torch.Tensor = ( - epsilon_scale * epsilon_subsegment / h_temp.sum(dim=3, keepdim=True) - ) - torch.nan_to_num( - h_temp_sum, out=h_temp_sum, nan=0.0, posinf=0.0, neginf=0.0 - ) - h_temp *= h_temp_sum - h += h_temp - - epsilon_scale *= 1.0 + epsilon_subsegment - - h /= epsilon_scale - output = h.movedim(3, 1) - - else: - epsilon_t_0: torch.Tensor = epsilon_t * epsilon_0 - - h_shape: tuple[int, int, int, int] = ( - int(input.shape[0]), - int(weights.shape[1]), - int(output_size[0]), - int(output_size[1]), - ) - - output = torch.empty(h_shape, dtype=torch.float32) - - # Prepare the export to C++ -> - np_h: np.ndarray = output.detach().numpy() - assert output.dtype == torch.float32 - assert np_h.flags["C_CONTIGUOUS"] is True - assert np_h.ndim == 4 - - np_epsilon_xy: np.ndarray = epsilon_xy_convolved.detach().numpy() - assert epsilon_xy.dtype == torch.float32 - assert np_epsilon_xy.flags["C_CONTIGUOUS"] is True - assert np_epsilon_xy.ndim == 3 - - np_epsilon_t: np.ndarray = epsilon_t_0.detach().numpy() - assert epsilon_t_0.dtype == torch.float32 - assert np_epsilon_t.flags["C_CONTIGUOUS"] is True - assert np_epsilon_t.ndim == 1 - - np_weights: np.ndarray = weights.detach().numpy() - assert weights.dtype == torch.float32 - assert np_weights.flags["C_CONTIGUOUS"] is True - assert np_weights.ndim == 2 - - np_spikes = spikes.contiguous().detach().numpy() - assert spikes.dtype == torch.int64 - assert np_spikes.flags["C_CONTIGUOUS"] is True - assert np_spikes.ndim == 4 - - np_h_initial = h_initial.contiguous().detach().numpy() - assert h_initial.dtype == torch.float32 - assert np_h_initial.flags["C_CONTIGUOUS"] is True - assert np_h_initial.ndim == 1 - # <- Prepare the export to C++ - - h_dynamic: PyHDynamicCNNManyIP.HDynamicCNNManyIP = ( - PyHDynamicCNNManyIP.HDynamicCNNManyIP() - ) - - h_dynamic.update_with_init_vector_multi_pattern( - np_h.__array_interface__["data"][0], - int(np_h.shape[0]), - int(np_h.shape[1]), - int(np_h.shape[2]), - int(np_h.shape[3]), - np_epsilon_xy.__array_interface__["data"][0], - int(np_epsilon_xy.shape[0]), - int(np_epsilon_xy.shape[1]), - int(np_epsilon_xy.shape[2]), - np_epsilon_t.__array_interface__["data"][0], - int(np_epsilon_t.shape[0]), - np_weights.__array_interface__["data"][0], - int(np_weights.shape[0]), - int(np_weights.shape[1]), - np_spikes.__array_interface__["data"][0], - int(np_spikes.shape[0]), - int(np_spikes.shape[1]), - int(np_spikes.shape[2]), - int(np_spikes.shape[3]), - np_h_initial.__array_interface__["data"][0], - int(np_h_initial.shape[0]), - int(number_of_cpu_processes.item()), - ) - - ############################################################ - # Alpha # - ############################################################ - alpha_number_of_iterations_int: int = int(alpha_number_of_iterations.item()) - - if alpha_number_of_iterations_int > 0: - # Initialization - virtual_reconstruction_weight: torch.Tensor = torch.einsum( - "bixy,ji->bjxy", output, weights - ) - alpha_fill_value: float = 1.0 / ( - virtual_reconstruction_weight.shape[2] - * virtual_reconstruction_weight.shape[3] - ) - alpha_dynamic: torch.Tensor = torch.full( - ( - int(virtual_reconstruction_weight.shape[0]), - 1, - int(virtual_reconstruction_weight.shape[2]), - int(virtual_reconstruction_weight.shape[3]), - ), - alpha_fill_value, - dtype=torch.float32, - ) - - # Iterations - for _ in range(0, alpha_number_of_iterations_int): - alpha_temp: torch.Tensor = alpha_dynamic * virtual_reconstruction_weight - alpha_temp /= alpha_temp.sum(dim=3, keepdim=True).sum( - dim=2, keepdim=True - ) - torch.nan_to_num( - alpha_temp, out=alpha_temp, nan=0.0, posinf=0.0, neginf=0.0 - ) - - alpha_temp = torch.nn.functional.unfold( - alpha_temp, - kernel_size=(1, 1), - dilation=1, - padding=0, - stride=1, - ) - - alpha_temp = torch.nn.functional.fold( - alpha_temp, - output_size=tuple(input_size.tolist()), - kernel_size=tuple(kernel_size.tolist()), - dilation=tuple(dilation.tolist()), - padding=tuple(padding.tolist()), - stride=tuple(stride.tolist()), - ) - - alpha_temp = (alpha_temp * input).sum(dim=1, keepdim=True) - - alpha_temp = torch.nn.functional.unfold( - alpha_temp, - kernel_size=tuple(kernel_size.tolist()), - dilation=tuple(dilation.tolist()), - padding=tuple(padding.tolist()), - stride=tuple(stride.tolist()), - ) - - alpha_temp = torch.nn.functional.fold( - alpha_temp, - output_size=tuple(output_size.tolist()), - kernel_size=(1, 1), - dilation=(1, 1), - padding=(0, 0), - stride=(1, 1), - ) - alpha_dynamic = alpha_temp.sum(dim=1, keepdim=True) - - alpha_dynamic += 1e-20 - - # Alpha normalization - alpha_dynamic /= alpha_dynamic.sum(dim=3, keepdim=True).sum( - dim=2, keepdim=True - ) - torch.nan_to_num( - alpha_dynamic, out=alpha_dynamic, nan=0.0, posinf=0.0, neginf=0.0 - ) - - # Applied to the output - output *= alpha_dynamic - - ############################################################ - # Save the necessary data for the backward pass # - ############################################################ - - ctx.save_for_backward( - input_convolved, - epsilon_xy_convolved, - epsilon_0, - weights, - output, - kernel_size, - stride, - dilation, - padding, - input_size, - ) - - return output - - @staticmethod - def backward(ctx, grad_output): - - # Get the variables back - ( - input, - epsilon_xy, - epsilon_0, - weights, - output, - kernel_size, - stride, - dilation, - padding, - input_size, - ) = ctx.saved_tensors - - torch.set_default_dtype(torch.float32) - - input /= input.sum(dim=1, keepdim=True, dtype=torch.float32) - - # For debugging: - # print( - # f"S: O: {output.min().item():e} {output.max().item():e} I: {input.min().item():e} {input.max().item():e} G: {grad_output.min().item():e} {grad_output.max().item():e}" - # ) - - epsilon_0_float: float = epsilon_0.item() - - temp_e: torch.Tensor = 1.0 / ((epsilon_xy * epsilon_0_float) + 1.0) - - eps_a: torch.Tensor = temp_e.clone() - eps_a *= epsilon_xy * epsilon_0_float - - eps_b: torch.Tensor = temp_e**2 * epsilon_0_float - - backprop_r: torch.Tensor = weights.unsqueeze(0).unsqueeze(-1).unsqueeze( - -1 - ) * output.unsqueeze(1) - - backprop_bigr: torch.Tensor = backprop_r.sum(axis=2) - - temp: torch.Tensor = input / (backprop_bigr**2 + 1e-20) - - backprop_f: torch.Tensor = output.unsqueeze(1) * temp.unsqueeze(2) - torch.nan_to_num( - backprop_f, out=backprop_f, nan=1e30, posinf=1e30, neginf=-1e30 - ) - torch.clip(backprop_f, out=backprop_f, min=-1e30, max=1e30) - - tempz: torch.Tensor = 1.0 / (backprop_bigr + 1e-20) - - backprop_z: torch.Tensor = backprop_r * tempz.unsqueeze(2) - torch.nan_to_num( - backprop_z, out=backprop_z, nan=1e30, posinf=1e30, neginf=-1e30 - ) - torch.clip(backprop_z, out=backprop_z, min=-1e30, max=1e30) - - result_omega: torch.Tensor = backprop_bigr.unsqueeze(2) * grad_output.unsqueeze( - 1 - ) - result_omega -= torch.einsum( - "bijxy,bjxy->bixy", backprop_r, grad_output - ).unsqueeze(2) - result_omega *= backprop_f - - result_eps_xy: torch.Tensor = ( - ( - (backprop_z * input.unsqueeze(2) - output.unsqueeze(1)) - * grad_output.unsqueeze(1) - ) - .sum(dim=2) - .sum(dim=0) - ) * eps_b - - result_phi: torch.Tensor = torch.einsum( - "bijxy,bjxy->bixy", backprop_z, grad_output - ) * eps_a.unsqueeze(0) - - grad_weights = result_omega.sum(0).sum(-1).sum(-1) - torch.nan_to_num( - grad_weights, out=grad_weights, nan=1e30, posinf=1e30, neginf=-1e30 - ) - torch.clip(grad_weights, out=grad_weights, min=-1e30, max=1e30) - - grad_input = torch.nn.functional.fold( - torch.nn.functional.unfold( - result_phi, - kernel_size=(1, 1), - dilation=1, - padding=0, - stride=1, - ), - output_size=input_size, - kernel_size=kernel_size, - dilation=dilation, - padding=padding, - stride=stride, - ) - torch.nan_to_num( - grad_input, out=grad_input, nan=1e30, posinf=1e30, neginf=-1e30 - ) - torch.clip(grad_input, out=grad_input, min=-1e30, max=1e30) - - grad_eps_xy_temp = torch.nn.functional.fold( - result_eps_xy.moveaxis(0, -1) - .reshape( - ( - int(result_eps_xy.shape[1]) * int(result_eps_xy.shape[2]), - int(result_eps_xy.shape[0]), - ) - ) - .unsqueeze(-1), - output_size=kernel_size, - kernel_size=kernel_size, - ) - - grad_eps_xy = ( - grad_eps_xy_temp.sum(dim=1) - .reshape( - ( - int(result_eps_xy.shape[1]), - int(result_eps_xy.shape[2]), - int(grad_eps_xy_temp.shape[-2]), - int(grad_eps_xy_temp.shape[-1]), - ) - ) - .contiguous(memory_format=torch.contiguous_format) - ) - torch.nan_to_num( - grad_eps_xy, out=grad_eps_xy, nan=1e30, posinf=1e30, neginf=-1e30 - ) - torch.clip(grad_eps_xy, out=grad_eps_xy, min=-1e30, max=1e30) - - grad_epsilon_0 = None - grad_epsilon_t = None - grad_kernel_size = None - grad_stride = None - grad_dilation = None - grad_padding = None - grad_output_size = None - grad_number_of_spikes = None - grad_number_of_cpu_processes = None - grad_h_initial = None - grad_alpha_number_of_iterations = None - - return ( - grad_input, - grad_eps_xy, - grad_epsilon_0, - grad_epsilon_t, - grad_weights, - grad_kernel_size, - grad_stride, - grad_dilation, - grad_padding, - grad_output_size, - grad_number_of_spikes, - grad_number_of_cpu_processes, - grad_h_initial, - grad_alpha_number_of_iterations, - ) - - -# %% diff --git a/clean.sh b/clean.sh new file mode 100644 index 0000000..4bb9576 --- /dev/null +++ b/clean.sh @@ -0,0 +1,10 @@ +read -p "Are you sure? " -n 1 -r +echo # (optional) move to a new line +if [[ ! $REPLY =~ ^[Yy]$ ]] +then + exit 1 +fi + +rm -rf Log +rm -rf Parameters +rm *.txt diff --git a/dataset.json b/dataset.json new file mode 100644 index 0000000..76d5c06 --- /dev/null +++ b/dataset.json @@ -0,0 +1,4 @@ +{ + "data_path": "./DATA_FASHION_MNIST/", + "data_mode": "MNIST_FASHION" +} diff --git a/def.json b/def.json new file mode 100644 index 0000000..6ebd9ae --- /dev/null +++ b/def.json @@ -0,0 +1,24 @@ +{ + "epoch_id_max": 200, + "number_of_spikes": [ + 1600 + ], + "batch_size": 24, + "forgetting_offset": 0.0, + "stage_id": 0, + "simulation_id": 0, + "learning_parameters": { + "learning_rate_threshold_w": 0.001, + "eps_xy_intitial": 1.0, + "number_of_batches_for_one_update": 20, + "learning_rate_gamma_w": 0.001, + "lr_scheduler_patience_w": 50, + "adapt_learning_rate_after_minibatch": true, + "w_trainable": [ + true + ] + }, + "augmentation": {}, + "image_statistics": {}, + "approximation_setting": {} +} \ No newline at end of file diff --git a/def_sbs_L0.json b/def_sbs_L0.json new file mode 100644 index 0000000..493b394 --- /dev/null +++ b/def_sbs_L0.json @@ -0,0 +1,24 @@ +{ + "epoch_id_max": 200, + "number_of_spikes": [ + 1600 + ], + "batch_size": 24, + "forgetting_offset": 0.0, + "stage_id": 0, + "simulation_id": 0, + "learning_parameters": { + "learning_rate_threshold_w": 0.001, + "eps_xy_intitial": 1.0, + "number_of_batches_for_one_update": 20, + "learning_rate_gamma_w": 1.0, + "lr_scheduler_patience_w": 50, + "adapt_learning_rate_after_minibatch": true, + "w_trainable": [ + true + ] + }, + "augmentation": {}, + "image_statistics": {}, + "approximation_setting": {} +} \ No newline at end of file diff --git a/get_perf.py b/get_perf.py new file mode 100644 index 0000000..f37bceb --- /dev/null +++ b/get_perf.py @@ -0,0 +1,47 @@ +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +import matplotlib.pyplot as plt + +from tensorboard.backend.event_processing import event_accumulator +import numpy as np +import json +from jsmin import jsmin +import glob + +# ------------------------------- + +filename:str = "def.json" +with open(filename) as json_file: + minified = jsmin(json_file.read()) +data = json.loads(minified) +number_of_spikes = data["number_of_spikes"] + + +# ------------------------------- + + +path_runs: str = "./Log/*" + +temp = glob.glob(path_runs) +assert len(temp) == 1 +path = temp[0] + + +acc = event_accumulator.EventAccumulator(path) +acc.Reload() + +available_scalar = acc.Tags()["scalars"] +available_histograms = acc.Tags()["histograms"] + +which_scalar = "Test Error" +te = acc.Scalars(which_scalar) + +temp = [] +for te_item in te: + temp.append((te_item[1], te_item[2])) +temp = np.array(temp) + +print(temp) +np.save(f"test_error_{number_of_spikes}.npy", temp) + diff --git a/learn_it.py b/learn_it.py index 8123404..73f0b0e 100644 --- a/learn_it.py +++ b/learn_it.py @@ -1,74 +1,30 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - # %% import os os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -import numpy as np import sys import torch -import time import dataconf import logging from datetime import datetime -import glob -from Dataset import ( - DatasetMaster, - DatasetCIFAR, - DatasetMNIST, - DatasetFashionMNIST, -) -from Parameter import Config -from SbS import SbS +from network.Parameter import Config + +from network.build_network import build_network +from network.build_optimizer import build_optimizer +from network.build_lr_scheduler import build_lr_scheduler +from network.build_datasets import build_datasets +from network.load_previous_weights import load_previous_weights + +from network.loop_train_test import loop_test, loop_train, run_lr_scheduler from torch.utils.tensorboard import SummaryWriter -try: - from SbSLRScheduler import SbSLRScheduler - sbs_lr_scheduler: bool = True -except Exception: - sbs_lr_scheduler = False - - -tb = SummaryWriter() - -torch.set_default_dtype(torch.float32) - -####################################################################### -# We want to log what is going on into a file and screen # -####################################################################### +# ###################################################################### +# We want to log what is going on into a file and screen +# ###################################################################### now = datetime.now() dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S") @@ -80,9 +36,9 @@ logging.basicConfig( ) logging.getLogger().addHandler(logging.StreamHandler()) -####################################################################### -# Load the config data from the json file # -####################################################################### +# ###################################################################### +# Load the config data from the json file +# ###################################################################### if len(sys.argv) < 2: raise Exception("Argument: Config file name is missing") @@ -92,570 +48,154 @@ filename: str = sys.argv[1] if os.path.exists(filename) is False: raise Exception(f"Config file not found! {filename}") -cfg = dataconf.file(filename, Config) +if os.path.exists("network.json") is False: + raise Exception("Config file not found! network.json") + +if os.path.exists("dataset.json") is False: + raise Exception("Config file not found! dataset.json") + + +cfg = dataconf.multi.file("network.json").file("dataset.json").file(filename).on(Config) +logging.info(cfg) + logging.info(f"Using configuration file: {filename}") +logging.info(f"Number of spikes: {cfg.number_of_spikes}") +logging.info(f"Cooldown after spikes: {cfg.cooldown_after_number_of_spikes}") +logging.info(f"Reduction cooldown: {cfg.reduction_cooldown}") +logging.info("") +logging.info(f"Epsilon 0: {cfg.epsilon_0}") +logging.info(f"Batch size: {cfg.batch_size}") +logging.info(f"Data mode: {cfg.data_mode}") +logging.info("") +logging.info("*** Config loaded.") +logging.info("") -####################################################################### -# Prepare the test and training data # -####################################################################### +tb = SummaryWriter(log_dir=cfg.log_path) -# Load the input data -the_dataset_train: DatasetMaster -the_dataset_test: DatasetMaster -if cfg.data_mode == "CIFAR10": - the_dataset_train = DatasetCIFAR( - train=True, path_pattern=cfg.data_path, path_label=cfg.data_path - ) - the_dataset_test = DatasetCIFAR( - train=False, path_pattern=cfg.data_path, path_label=cfg.data_path - ) -elif cfg.data_mode == "MNIST": - the_dataset_train = DatasetMNIST( - train=True, path_pattern=cfg.data_path, path_label=cfg.data_path - ) - the_dataset_test = DatasetMNIST( - train=False, path_pattern=cfg.data_path, path_label=cfg.data_path - ) -elif cfg.data_mode == "MNIST_FASHION": - the_dataset_train = DatasetFashionMNIST( - train=True, path_pattern=cfg.data_path, path_label=cfg.data_path - ) - the_dataset_test = DatasetFashionMNIST( - train=False, path_pattern=cfg.data_path, path_label=cfg.data_path - ) -else: - raise Exception("data_mode unknown") +# ########################################### +# GPU Yes / NO ? +# ########################################### +default_dtype = torch.float32 +torch.set_default_dtype(default_dtype) +torch_device: str = "cuda:0" if torch.cuda.is_available() else "cpu" +use_gpu: bool = True if torch.cuda.is_available() else False +print(f"Using {torch_device} device") +device = torch.device(torch_device) -if len(cfg.image_statistics.mean) == 0: - cfg.image_statistics.mean = the_dataset_train.mean +# ###################################################################### +# Prepare the test and training data +# ###################################################################### -# The basic size -cfg.image_statistics.the_size = [ - the_dataset_train.pattern_storage.shape[2], - the_dataset_train.pattern_storage.shape[3], -] - -# Minus the stuff we cut away in the pattern filter -cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel -cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel - -my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader( - the_dataset_test, batch_size=cfg.batch_size, shuffle=False -) -my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader( - the_dataset_train, batch_size=cfg.batch_size, shuffle=True +the_dataset_train, the_dataset_test, my_loader_test, my_loader_train = build_datasets( + cfg ) logging.info("*** Data loaded.") -####################################################################### -# Build the network # -####################################################################### +# ###################################################################### +# Build the network, Optimizer, and LR Scheduler # +# ###################################################################### -wf: list[np.ndarray] = [] -eps_xy: list[np.ndarray] = [] -network = torch.nn.Sequential() -for id in range(0, len(cfg.network_structure.is_pooling_layer)): - if id == 0: - input_size: list[int] = cfg.image_statistics.the_size - else: - input_size = network[id - 1].output_size.tolist() +network = build_network( + cfg=cfg, device=device, default_dtype=default_dtype, logging=logging +) +logging.info("") - network.append( - SbS( - number_of_input_neurons=cfg.network_structure.forward_neuron_numbers[id][0], - number_of_neurons=cfg.network_structure.forward_neuron_numbers[id][1], - input_size=input_size, - forward_kernel_size=cfg.network_structure.forward_kernel_size[id], - number_of_spikes=cfg.number_of_spikes, - epsilon_t=cfg.get_epsilon_t(), - epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial, - epsilon_0=cfg.epsilon_0, - weight_noise_amplitude=cfg.learning_parameters.weight_noise_amplitude, - is_pooling_layer=cfg.network_structure.is_pooling_layer[id], - strides=cfg.network_structure.strides[id], - dilation=cfg.network_structure.dilation[id], - padding=cfg.network_structure.padding[id], - alpha_number_of_iterations=cfg.learning_parameters.alpha_number_of_iterations, - number_of_cpu_processes=cfg.number_of_cpu_processes, - ) - ) +optimizer = build_optimizer(network=network, cfg=cfg, logging=logging) - eps_xy.append(network[id].epsilon_xy.detach().clone().numpy()) - wf.append(network[id].weights.detach().clone().numpy()) +lr_scheduler = build_lr_scheduler(optimizer=optimizer, cfg=cfg, logging=logging) logging.info("*** Network generated.") -for id in range(0, len(network)): - # Load previous weights and epsilon xy - if cfg.learning_step > 0: - filename = ( - cfg.weight_path - + "/Weight_L" - + str(id) - + "_S" - + str(cfg.learning_step) - + ".npy" - ) - if os.path.exists(filename) is True: - network[id].weights = torch.tensor( - np.load(filename), - dtype=torch.float32, - ) - wf[id] = np.load(filename) - - filename = ( - cfg.eps_xy_path - + "/EpsXY_L" - + str(id) - + "_S" - + str(cfg.learning_step) - + ".npy" - ) - if os.path.exists(filename) is True: - network[id].epsilon_xy = torch.tensor( - np.load(filename), - dtype=torch.float32, - ) - eps_xy[id] = np.load(filename) - -for id in range(0, len(network)): - - # Are there weights that overwrite the initial weights? - file_to_load = glob.glob( - cfg.learning_parameters.overload_path + "/Weight_L" + str(id) + "*.npy" - ) - - if len(file_to_load) > 1: - raise Exception( - f"Too many previous weights files {cfg.learning_parameters.overload_path}/Weight_L{id}*.npy" - ) - - if len(file_to_load) == 1: - network[id].weights = torch.tensor( - np.load(file_to_load[0]), - dtype=torch.float32, - ) - wf[id] = np.load(file_to_load[0]) - logging.info(f"File used: {file_to_load[0]}") - - # Are there epsinlon xy files that overwrite the initial epsilon xy? - file_to_load = glob.glob( - cfg.learning_parameters.overload_path + "/EpsXY_L" + str(id) + "*.npy" - ) - - if len(file_to_load) > 1: - raise Exception( - f"Too many previous epsilon xy files {cfg.learning_parameters.overload_path}/EpsXY_L{id}*.npy" - ) - - if len(file_to_load) == 1: - network[id].epsilon_xy = torch.tensor( - np.load(file_to_load[0]), - dtype=torch.float32, - ) - eps_xy[id] = np.load(file_to_load[0]) - logging.info(f"File used: {file_to_load[0]}") - -####################################################################### -# Optimizer and LR Scheduler # -####################################################################### - -# I keep weights and epsilon xy seperate to -# set the initial learning rate independently -parameter_list_weights: list = [] -parameter_list_epsilon_xy: list = [] - -for id in range(0, len(network)): - parameter_list_weights.append(network[id]._weights) - parameter_list_epsilon_xy.append(network[id]._epsilon_xy) - -if cfg.learning_parameters.optimizer_name == "Adam": - logging.info("Using optimizer: Adam") - if cfg.learning_parameters.learning_rate_gamma_w > 0: - optimizer_wf: torch.optim.Optimizer = torch.optim.Adam( - parameter_list_weights, - lr=cfg.learning_parameters.learning_rate_gamma_w, - ) - else: - optimizer_wf = torch.optim.Adam( - parameter_list_weights, - ) - - if cfg.learning_parameters.learning_rate_gamma_eps_xy > 0: - optimizer_eps: torch.optim.Optimizer = torch.optim.Adam( - parameter_list_epsilon_xy, - lr=cfg.learning_parameters.learning_rate_gamma_eps_xy, - ) - else: - optimizer_eps = torch.optim.Adam( - parameter_list_epsilon_xy, - ) -else: - raise Exception("Optimizer not implemented") - -do_lr_scheduler_step: bool = True - -if cfg.learning_parameters.lr_schedule_name == "None": - logging.info("Using lr scheduler: None") - do_lr_scheduler_step = False - -elif cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau": - logging.info("Using lr scheduler: ReduceLROnPlateau") - - assert cfg.learning_parameters.lr_scheduler_factor_w > 0 - assert cfg.learning_parameters.lr_scheduler_factor_eps_xy > 0 - assert cfg.learning_parameters.lr_scheduler_patience_w > 0 - assert cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0 - - lr_scheduler_wf = torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizer_wf, - factor=cfg.learning_parameters.lr_scheduler_factor_w, - patience=cfg.learning_parameters.lr_scheduler_patience_w, - ) - - lr_scheduler_eps = torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizer_eps, - factor=cfg.learning_parameters.lr_scheduler_factor_eps_xy, - patience=cfg.learning_parameters.lr_scheduler_patience_eps_xy, - ) - -elif cfg.learning_parameters.lr_schedule_name == "SbSLRScheduler": - logging.info("Using lr scheduler: SbSLRScheduler") - - assert cfg.learning_parameters.lr_scheduler_factor_w > 0 - assert cfg.learning_parameters.lr_scheduler_factor_eps_xy > 0 - assert cfg.learning_parameters.lr_scheduler_patience_w > 0 - assert cfg.learning_parameters.lr_scheduler_patience_eps_xy > 0 - - if sbs_lr_scheduler is False: - raise Exception("lr_scheduler: SbSLRScheduler.py missing") - - lr_scheduler_wf = SbSLRScheduler( - optimizer_wf, - factor=cfg.learning_parameters.lr_scheduler_factor_w, - patience=cfg.learning_parameters.lr_scheduler_patience_w, - tau=cfg.learning_parameters.lr_scheduler_tau_w, - ) - - lr_scheduler_eps = SbSLRScheduler( - optimizer_eps, - factor=cfg.learning_parameters.lr_scheduler_factor_eps_xy, - patience=cfg.learning_parameters.lr_scheduler_patience_eps_xy, - tau=cfg.learning_parameters.lr_scheduler_tau_eps_xy, - ) -else: - raise Exception("lr_scheduler not implemented") - -logging.info("*** Optimizer prepared.") - - -####################################################################### -# Some variable declarations # -####################################################################### - -test_correct: int = 0 -test_all: int = 0 -test_complete: int = the_dataset_test.__len__() - -train_correct: int = 0 -train_all: int = 0 -train_complete: int = the_dataset_train.__len__() - -train_number_of_processed_pattern: int = 0 - -train_loss: np.ndarray = np.zeros((1), dtype=np.float32) - -last_test_performance: float = -1.0 - +load_previous_weights( + network=network, + overload_path=cfg.learning_parameters.overload_path, + logging=logging, + device=device, + default_dtype=default_dtype, +) logging.info("") +last_test_performance: float = -1.0 with torch.no_grad(): if cfg.learning_parameters.learning_active is True: - while True: + while cfg.epoch_id < cfg.epoch_id_max: - ############################################### - # Run a training data batch # - ############################################### + # ############################################## + # Run a training data epoch + # ############################################## + network.train() + ( + my_loss_for_batch, + performance_for_batch, + full_loss, + full_correct, + ) = loop_train( + cfg=cfg, + network=network, + my_loader_train=my_loader_train, + the_dataset_train=the_dataset_train, + optimizer=optimizer, + device=device, + default_dtype=default_dtype, + logging=logging, + tb=tb, + adapt_learning_rate=cfg.learning_parameters.adapt_learning_rate_after_minibatch, + lr_scheduler=lr_scheduler, + last_test_performance=last_test_performance, + ) - for h_x, h_x_labels in my_loader_train: - time_0: float = time.perf_counter() - - if train_number_of_processed_pattern == 0: - # Reset the gradient of the torch optimizers - optimizer_wf.zero_grad() - optimizer_eps.zero_grad() - - with torch.enable_grad(): - - h_collection = [] - h_collection.append( - the_dataset_train.pattern_filter_train(h_x, cfg).type( - dtype=torch.float32 - ) - ) - for id in range(0, len(network)): - h_collection.append(network[id](h_collection[-1])) - - # Convert label into one hot - target_one_hot: torch.Tensor = torch.zeros( - ( - h_x_labels.shape[0], - int(cfg.network_structure.number_of_output_neurons), - ) - ) - target_one_hot.scatter_( - 1, h_x_labels.unsqueeze(1), torch.ones((h_x_labels.shape[0], 1)) - ) - target_one_hot = ( - target_one_hot.unsqueeze(2) - .unsqueeze(2) - .type(dtype=torch.float32) - ) - - h_y1 = torch.log(h_collection[-1] + 1e-20) - - my_loss: torch.Tensor = ( - ( - torch.nn.functional.mse_loss( - h_collection[-1], - target_one_hot, - reduction="none", - ) - * cfg.learning_parameters.loss_coeffs_mse - + torch.nn.functional.kl_div( - h_y1, target_one_hot + 1e-20, reduction="none" - ) - * cfg.learning_parameters.loss_coeffs_kldiv - ) - / ( - cfg.learning_parameters.loss_coeffs_kldiv - + cfg.learning_parameters.loss_coeffs_mse - ) - ).mean() - - time_1: float = time.perf_counter() - - my_loss.backward() - - my_loss_float = my_loss.item() - time_2: float = time.perf_counter() - - train_correct += ( - (h_collection[-1].argmax(dim=1).squeeze() == h_x_labels) - .sum() - .numpy() - ) - train_all += h_collection[-1].shape[0] - - performance: float = 100.0 * train_correct / train_all - - time_measure_a: float = time_1 - time_0 - - logging.info( - ( - f"{cfg.learning_step:^6} Training \t{train_all^6} pattern " - f"with {performance/100.0:^6.2%} " - f"\t\tForward time: \t{time_measure_a:^6.2f}sec" - ) + # Let the torch learning rate scheduler update the + # learning rates of the optimiers + if cfg.learning_parameters.adapt_learning_rate_after_minibatch is False: + run_lr_scheduler( + cfg=cfg, + lr_scheduler=lr_scheduler, + optimizer=optimizer, + performance_for_batch=performance_for_batch, + my_loss_for_batch=my_loss_for_batch, + tb=tb, + logging=logging, ) - train_loss[0] += my_loss_float - train_number_of_processed_pattern += h_collection[-1].shape[0] + # ############################################## + # Run test data + # ############################################## + network.eval() + last_test_performance = loop_test( + epoch_id=cfg.epoch_id, + cfg=cfg, + network=network, + my_loader_test=my_loader_test, + the_dataset_test=the_dataset_test, + device=device, + default_dtype=default_dtype, + logging=logging, + tb=tb, + ) - time_measure_b: float = time_2 - time_1 + # Next epoch + cfg.epoch_id += 1 + else: + # ############################################## + # Run test data + # ############################################## + network.eval() + last_test_performance = loop_test( + epoch_id=cfg.epoch_id, + cfg=cfg, + network=network, + my_loader_test=my_loader_test, + the_dataset_test=the_dataset_test, + device=device, + default_dtype=default_dtype, + logging=logging, + tb=tb, + ) - logging.info( - ( - f"\t\t\tLoss: {train_loss[0]/train_number_of_processed_pattern:^15.3e} " - f"\t\t\tBackward time: \t{time_measure_b:^6.2f}sec " - ) - ) + tb.close() - if ( - train_number_of_processed_pattern - >= cfg.get_update_after_x_pattern() - ): - my_loss_for_batch: float = ( - train_loss[0] / train_number_of_processed_pattern - ) - - optimizer_wf.step() - optimizer_eps.step() - - for id in range(0, len(network)): - if cfg.network_structure.w_trainable[id] is True: - network[id].norm_weights() - network[id].threshold_weights( - cfg.learning_parameters.learning_rate_threshold_w - ) - network[id].norm_weights() - else: - network[id].weights = torch.tensor( - wf[id], dtype=torch.float32 - ) - - if cfg.network_structure.eps_xy_trainable[id] is True: - network[id].threshold_epsilon_xy( - cfg.learning_parameters.learning_rate_threshold_eps_xy - ) - if cfg.network_structure.eps_xy_mean[id] is True: - network[id].mean_epsilon_xy() - else: - network[id].epsilon_xy = torch.tensor( - eps_xy[id], dtype=torch.float32 - ) - - if cfg.network_structure.w_trainable[id] is True: - # Save the new values - np.save( - cfg.weight_path - + "/Weight_L" - + str(id) - + "_S" - + str(cfg.learning_step) - + ".npy", - network[id].weights.detach().numpy(), - ) - - try: - tb.add_histogram( - "Weights " + str(id), - network[id].weights, - cfg.learning_step, - ) - except ValueError: - pass - - if cfg.network_structure.eps_xy_trainable[id] is True: - np.save( - cfg.eps_xy_path - + "/EpsXY_L" - + str(id) - + "_S" - + str(cfg.learning_step) - + ".npy", - network[id].epsilon_xy.detach().numpy(), - ) - try: - tb.add_histogram( - "Epsilon XY " + str(id), - network[id].epsilon_xy.detach().numpy(), - cfg.learning_step, - ) - except ValueError: - pass - - # Let the torch learning rate scheduler update the - # learning rates of the optimiers - if do_lr_scheduler_step is True: - if cfg.learning_parameters.lr_scheduler_use_performance is True: - lr_scheduler_wf.step(100.0 - performance) - else: - lr_scheduler_wf.step(my_loss_for_batch) - - if do_lr_scheduler_step is True: - if cfg.learning_parameters.lr_scheduler_use_performance is True: - lr_scheduler_eps.step(100.0 - performance) - else: - lr_scheduler_eps.step(my_loss_for_batch) - - tb.add_scalar("Train Error", 100.0 - performance, cfg.learning_step) - tb.add_scalar("Train Loss", my_loss_for_batch, cfg.learning_step) - tb.add_scalar( - "Learning Rate Scale WF", - optimizer_wf.param_groups[-1]["lr"], - cfg.learning_step, - ) - tb.add_scalar( - "Learning Rate Scale Eps XY ", - optimizer_eps.param_groups[-1]["lr"], - cfg.learning_step, - ) - logging.info( - f"\t\t\tLearning rate: weights:{optimizer_wf.param_groups[-1]['lr']:^15.3e} \t epsilon xy:{optimizer_eps.param_groups[-1]['lr']:^15.3e}" - ) - logging.info("\t\t\t*** Updating the weights ***") - - cfg.learning_step += 1 - train_loss = np.zeros((1), dtype=np.float32) - train_correct = 0 - train_all = 0 - performance = 0 - train_number_of_processed_pattern = 0 - - tb.flush() - - test_correct = 0 - test_all = 0 - - if last_test_performance < 0: - logging.info("") - else: - logging.info( - f"\t\t\tLast test performance: {last_test_performance/100.0:^6.2%}" - ) - logging.info("") - - ############################################### - # Run a test data performance measurement # - ############################################### - if ( - ( - ( - ( - cfg.learning_step - % cfg.learning_parameters.test_every_x_learning_steps - ) - == 0 - ) - or (cfg.learning_step == cfg.learning_step_max) - ) - and (cfg.learning_parameters.test_during_learning is True) - and (cfg.learning_step > 0) - ): - logging.info("") - logging.info("Testing:") - - for h_x, h_x_labels in my_loader_test: - time_0 = time.perf_counter() - - h_h: torch.Tensor = network( - the_dataset_test.pattern_filter_test(h_x, cfg).type( - dtype=torch.float32 - ) - ) - - test_correct += ( - (h_h.argmax(dim=1).squeeze() == h_x_labels) - .sum() - .numpy() - ) - test_all += h_h.shape[0] - performance = 100.0 * test_correct / test_all - time_1 = time.perf_counter() - time_measure_a = time_1 - time_0 - - logging.info( - ( - f"\t\t{test_all} of {test_complete}" - f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec" - ) - ) - - logging.info("") - - last_test_performance = performance - - tb.add_scalar( - "Test Error", 100.0 - performance, cfg.learning_step - ) - tb.flush() - - if cfg.learning_step == cfg.learning_step_max: - tb.close() - exit(1) # %% diff --git a/make_new_previous.sh b/make_new_previous.sh new file mode 100644 index 0000000..b80763d --- /dev/null +++ b/make_new_previous.sh @@ -0,0 +1,5 @@ +MAX_NUMBER=199 + +MyPATH="Previous++" +mkdir $MyPATH +cp Parameters/*_S$MAX_NUMBER.npy $MyPATH diff --git a/network.json b/network.json new file mode 100644 index 0000000..a9411ac --- /dev/null +++ b/network.json @@ -0,0 +1,91 @@ +{ + "network_structure": { + "number_of_output_neurons": 10, + "layer_type": [ + "SbS", + "MAX POOLING", + "SbS", + "MAX POOLING", + "SbS", + "SbS" + ], + "strides": [ + [ + 1, + 1 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 1, + 1 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 1, + 1 + ], // "SbS" + [ + 1, + 1 + ] // "SbS" + ], + "forward_neuron_numbers": [ + [ + 1, + 32 + ], // "SbS" + [ + 32, + 32 + ], // POOLING + [ + 32, + 64 + ], // "SbS" + [ + 64, + 64 + ], // POOLING + [ + 64, + 96 + ], // "SbS" + [ + 96, + 10 + ] // "SbS" + ], + "forward_kernel_size": [ + [ + 5, + 5 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 5, + 5 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 3, + 3 + ], // "SbS" + [ + 1, + 1 + ] // "SbS" + ] + } +} \ No newline at end of file diff --git a/network/Adam.py b/network/Adam.py new file mode 100644 index 0000000..45bde9b --- /dev/null +++ b/network/Adam.py @@ -0,0 +1,156 @@ +import torch +import math + + +class Adam(torch.optim.Optimizer): + + sbs_setting: list[bool] + lr: float + beta1: float + beta2: float + eps: float + maximize: bool + + def __init__( + self, + params, + sbs_setting: list[bool], + lr: float = 1e-3, + beta1: float = 0.9, + beta2: float = 0.999, + eps: float = 1e-8, + maximize: bool = False, + ) -> None: + + assert lr > 0.0 + + assert eps > 0.0 + + assert beta1 > 0.0 + assert beta1 < 1.0 + + assert beta2 > 0.0 + assert beta2 < 1.0 + + assert len(sbs_setting) == len(params) + + self.sbs_setting = sbs_setting + self.params = params + self.lr = lr + self.beta1 = beta1 + self.beta2 = beta2 + self.eps = eps + self.maximize = maximize + + defaults = dict( + lr=lr, + beta1=beta1, + beta2=beta2, + eps=eps, + maximize=maximize, + ) + super().__init__(params, defaults) + + def step(self): + + params_with_grad = [] + grads = [] + exp_avgs = [] + exp_avg_sqs = [] + state_steps = [] + sbs_setting = [] + + assert len(self.param_groups) == 1 + + for id, p in enumerate(self.params): + if p.grad is not None: + params_with_grad.append(p) + grads.append(p.grad) + sbs_setting.append(self.sbs_setting[id]) + + state = self.state[p] + + # Lazy state initialization + if len(state) == 0: + + state["step"] = torch.tensor(0.0) + + # Exponential moving average of gradient values + state["exp_avg"] = torch.zeros_like( + p, memory_format=torch.preserve_format + ) + # Exponential moving average of squared gradient values + state["exp_avg_sq"] = torch.zeros_like( + p, memory_format=torch.preserve_format + ) + + exp_avgs.append(state["exp_avg"]) + exp_avg_sqs.append(state["exp_avg_sq"]) + state_steps.append(state["step"]) + + self.adam( + params_with_grad, + grads, + sbs_setting, + exp_avgs, + exp_avg_sqs, + state_steps, + beta1=self.beta1, + beta2=self.beta2, + lr=self.param_groups[0]["lr"], + eps=self.eps, + maximize=self.maximize, + ) + + def adam( + self, + params: list[torch.Tensor], + grads: list[torch.Tensor], + sbs_setting: list[bool], + exp_avgs: list[torch.Tensor], + exp_avg_sqs: list[torch.Tensor], + state_steps: list[torch.Tensor], + beta1: float, + beta2: float, + lr: float, + eps: float, + maximize: bool, + ) -> None: + + with torch.no_grad(): + + for i, param in enumerate(params): + + if maximize is False: + grad = grads[i] + else: + grad = -grads[i] + + exp_avg = exp_avgs[i] + exp_avg_sq = exp_avg_sqs[i] + step_t = state_steps[i] + + # increase step + step_t += 1 + + # Decay the first and second moment running average coefficient + exp_avg *= beta1 + exp_avg += (1.0 - beta1) * grad + + exp_avg_sq *= beta2 + exp_avg_sq += (1.0 - beta2) * grad**2 + + step_size: float = lr / (1.0 - beta1 ** float(step_t)) + + denom = ( + exp_avg_sq.sqrt() / math.sqrt(1.0 - beta2 ** float(step_t)) + ) + eps + + if sbs_setting[i] is False: + param -= step_size * (exp_avg / denom) + else: + delta = torch.exp(-step_size * (exp_avg / denom)) + print( + f"{float(delta.min()) - 1.0:.4e} {float(delta.max()) - 1.0:.4e} {lr:.4e}" + ) + param *= delta diff --git a/network/CPP_Cuda/HDynamicCNNManyIP.cu b/network/CPP_Cuda/HDynamicCNNManyIP.cu new file mode 100644 index 0000000..2272276 --- /dev/null +++ b/network/CPP_Cuda/HDynamicCNNManyIP.cu @@ -0,0 +1,714 @@ +#include "HDynamicCNNManyIP.h" + +#include +#include +#include + +#include +#include +#include + + +HDynamicCNNManyIP::HDynamicCNNManyIP() +{ + +}; + +HDynamicCNNManyIP::~HDynamicCNNManyIP() +{ + +}; + +bool HDynamicCNNManyIP::update_entrypoint( + int64_t h_pointer_addr, + int64_t h_dim_0, + int64_t h_dim_1, + int64_t h_dim_2, + int64_t h_dim_3, + int64_t epsilon_xy_pointer_addr, + int64_t epsilon_xy_dim_0, + int64_t epsilon_xy_dim_1, + int64_t epsilon_xy_dim_2, + int64_t epsilon_t_pointer_addr, + int64_t epsilon_t_dim_0, + int64_t weights_pointer_addr, + int64_t weights_dim_0, + int64_t weights_dim_1, + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t init_vector_pointer_addr, + int64_t init_vector_dim_0, + int64_t number_of_processes, + float forgetting_offset, + int64_t gpu_tuning_factor) +{ + + size_t number_of_pattern = input_dim_0; + + size_t h_dim = init_vector_dim_0; + float* h_init_ptr = (float*)init_vector_pointer_addr; + assert((h_init_ptr != nullptr)); + assert((h_dim > 0)); + + float* h_pointer = (float*)h_pointer_addr; + assert((h_pointer != nullptr)); + assert((h_dim_0 > 0)); + assert((h_dim_1 > 0)); + assert((h_dim_2 > 0)); + assert((h_dim_3 > 0)); + + size_t h_dim_c0 = h_dim_1 * h_dim_2 * h_dim_3; + size_t h_dim_c1 = h_dim_2 * h_dim_3; + size_t h_dim_c2 = h_dim_3; + + float* epsilon_xy_pointer = (float*)epsilon_xy_pointer_addr; + assert((epsilon_xy_pointer != nullptr)); + assert((epsilon_xy_dim_0 > 0)); + assert((epsilon_xy_dim_1 > 0)); + + size_t epsilon_xy_dim_c0 = epsilon_xy_dim_2 * epsilon_xy_dim_1; + size_t epsilon_xy_dim_c1 = epsilon_xy_dim_2; + + float* epsilon_t_pointer = (float*)epsilon_t_pointer_addr; + assert((epsilon_t_pointer != nullptr)); + assert((epsilon_t_dim_0 > 0)); + + float* weights_pointer = (float*)weights_pointer_addr; + assert((weights_pointer != nullptr)); + assert((weights_dim_0 > 0)); + assert((weights_dim_1 > 0)); + + size_t weights_dim_c0 = weights_dim_1; + + int64_t* input_pointer = (int64_t*)input_pointer_addr; + assert((input_pointer != nullptr)); + assert((input_dim_0 > 0)); + assert((input_dim_1 > 0)); + assert((input_dim_2 > 0)); + assert((input_dim_3 > 0)); + + size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3; + size_t input_dim_c1 = input_dim_2 * input_dim_3; + size_t input_dim_c2 = input_dim_3; + + assert((h_dim == weights_dim_1)); + size_t number_of_spikes = input_dim_1; + size_t dim_x = input_dim_2; + size_t dim_y = input_dim_3; + + float forgetting_offset_local = forgetting_offset / static_cast(h_dim); + + + // -------------------- + if (number_of_processes > 0) + { + omp_set_num_threads(number_of_processes); + + size_t pattern_id; +#pragma omp parallel for + for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) + { + update( + h_init_ptr, + h_pointer, + h_dim_c0, + h_dim_c1, + h_dim_c2, + h_dim, + epsilon_xy_pointer, + epsilon_xy_dim_c0, + epsilon_xy_dim_c1, + epsilon_t_pointer, + weights_pointer, + weights_dim_c0, + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + number_of_spikes, + dim_x, + dim_y, + forgetting_offset, + forgetting_offset_local, + pattern_id); + } + } + else + { + gpu_update( + h_init_ptr, + h_pointer, + h_dim_c0, + h_dim_c1, + h_dim_c2, + h_dim, + epsilon_xy_pointer, + epsilon_xy_dim_c0, + epsilon_xy_dim_c1, + epsilon_t_pointer, + weights_pointer, + weights_dim_c0, + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + number_of_spikes, + dim_x, + dim_y, + forgetting_offset, + forgetting_offset_local, + number_of_pattern, + gpu_tuning_factor); + + } + return true; +}; + + +bool HDynamicCNNManyIP::update( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* epsilon_t_pointer, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + size_t number_of_spikes, + size_t dim_x, + size_t dim_y, + float forgetting_offset, + float forgetting_offset_local, + size_t pattern_id) +{ + + float* h_ptr; + float* epsilon_xy_ptr; + int64_t* input_ptr; + + size_t counter_x; + size_t counter_y; + + for (counter_x = 0; counter_x < dim_x; counter_x++) + { + for (counter_y = 0; counter_y < dim_y; counter_y++) + { + epsilon_xy_ptr = epsilon_xy_pointer + + counter_x * epsilon_xy_dim_c1 + counter_y; + + h_ptr = h_pointer + + pattern_id * h_dim_c0 + counter_x * h_dim_c2 + counter_y; + + input_ptr = input_pointer + + pattern_id * input_dim_c0 + counter_x * input_dim_c2 + counter_y; + + update_one_ip( + h_init_ptr, + h_ptr, + h_dim_c1, + h_dim, + weights_pointer, + weights_dim_c0, + input_ptr, + input_dim_c1, + epsilon_xy_ptr, + epsilon_xy_dim_c0, + epsilon_t_pointer, + number_of_spikes, + forgetting_offset, + forgetting_offset_local); + + } + } + + return true; +}; + +void HDynamicCNNManyIP::update_one_ip( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c1, + size_t h_dim, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c1, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + float* epsilon_t_pointer, + size_t number_of_spikes, + float forgetting_offset, + float forgetting_offset_local) +{ + + float* h_temp = new float[h_dim]; + float* h_subsegment = new float[h_dim]; + + memcpy(h_subsegment, h_init_ptr, sizeof(float) * h_dim); + + size_t counter_spike; + size_t counter; + + float h_temp_sum; + float temp_value; + + float epsilon_subsegment; + float epsilon_scale = 1.0; + + int64_t* spike; + float* w_ptr; + + for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++) + { + if (epsilon_scale > 1E10) + { + temp_value = 1.0 / epsilon_scale; + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] *= temp_value; + } + + epsilon_scale = 1.0; + } + + spike = input_pointer + counter_spike * input_dim_c1; + + if (*spike >= 0) + { + epsilon_subsegment = + epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike]; + + w_ptr = weights_pointer + *spike * weights_dim_c0; + + memcpy(h_temp, h_subsegment, sizeof(float) * h_dim); + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_temp[counter] *= w_ptr[counter]; + } + + h_temp_sum = 0.0; +#pragma omp simd reduction(+ : h_temp_sum) + for (counter = 0; counter < h_dim; counter++) + { + h_temp_sum += h_temp[counter]; + } + + if (h_temp_sum > 1E-10) + { + temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum; + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_temp[counter] *= temp_value; + } + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += h_temp[counter]; + } + + if (forgetting_offset_local > 0.0) + { + temp_value = + epsilon_scale * epsilon_subsegment * forgetting_offset_local; + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += temp_value; + } + + epsilon_scale *= + 1.0 + epsilon_subsegment * (1.0 + forgetting_offset); + } + else + { + epsilon_scale *= 1.0 + epsilon_subsegment * 1.0; + } + } + } + } + + temp_value = 1.0 / epsilon_scale; +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_pointer[counter * h_dim_c1] = + h_subsegment[counter] * temp_value; + } + + delete[] h_temp; + delete[] h_subsegment; + + return; +}; + +__device__ void gpu_update_one_ip( + float* __restrict__ h_init_ptr, + float* __restrict__ h_pointer, + size_t h_dim_c1, + size_t h_dim, + float* __restrict__ weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c1, + float* __restrict__ epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + float* __restrict__ epsilon_t_pointer, + size_t number_of_spikes, + float forgetting_offset, + float forgetting_offset_local, + float* __restrict__ h_temp, + float* __restrict__ h_subsegment +) +{ + + size_t counter_spike; + size_t counter; + + float h_temp_sum; + float temp_value; + + float epsilon_subsegment; + float epsilon_scale = 1.0; + + int64_t* spike; + float* w_ptr; + + // float* h_temp = new float[h_dim]; + // float* h_subsegment = new float[h_dim]; + + // Initialize the sub-segement + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] = h_init_ptr[counter]; + } + + for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++) + { + if (epsilon_scale > 1E10) + { + temp_value = 1.0 / epsilon_scale; + + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] *= temp_value; + } + + epsilon_scale = 1.0; + } + + spike = input_pointer + counter_spike * input_dim_c1; + + if (*spike >= 0) + { + epsilon_subsegment = + epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike]; + + w_ptr = weights_pointer + *spike * weights_dim_c0; + + for (counter = 0; counter < h_dim; counter++) + { + h_temp[counter] = h_subsegment[counter] * w_ptr[counter]; + } + + h_temp_sum = 0.0; + + for (counter = 0; counter < h_dim; counter++) + { + h_temp_sum += h_temp[counter]; + } + + if (h_temp_sum > 1E-10) + { + temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum; + + for (counter = 0; counter < h_dim; counter++) + { + h_temp[counter] *= temp_value; + } + + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += h_temp[counter]; + } + + if (forgetting_offset_local > 0.0) + { + temp_value = + epsilon_scale * epsilon_subsegment * forgetting_offset_local; + + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += temp_value; + } + + epsilon_scale *= + 1.0 + epsilon_subsegment * (1.0 + forgetting_offset); + } + else + { + epsilon_scale *= 1.0 + epsilon_subsegment * 1.0; + } + } + } + } + + temp_value = 1.0 / epsilon_scale; + + for (counter = 0; counter < h_dim; counter++) + { + h_pointer[counter * h_dim_c1] = + h_subsegment[counter] * temp_value; + } + + // delete[] h_temp; + // delete[] h_subsegment; + + return; +}; + +__global__ void kernel_spike_generation( + float* __restrict__ h_init_ptr, + float* __restrict__ h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* __restrict__ weights_pointer, + size_t weights_dim_c0, + int64_t* __restrict__ input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* __restrict__ epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* __restrict__ epsilon_t_pointer, + size_t number_of_spikes, + float forgetting_offset, + float forgetting_offset_local, + size_t dim_x, + size_t dim_y, + size_t dim_xy, + size_t max_threadable_tasks, + float* __restrict__ temp_memory_a, + float* __restrict__ temp_memory_b +) +{ + + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx < max_threadable_tasks) + { + float* h_ptr; + float* epsilon_xy_ptr; + int64_t* input_ptr; + + float* temp_memory_ptr_a = temp_memory_a + idx * h_dim; + float* temp_memory_ptr_b = temp_memory_b + idx * h_dim; + + // int pattern_id = idx; + int pattern_id = idx / dim_xy; + int position_xy = idx - (pattern_id * dim_xy); + + // size_t position_x = blockIdx.y; + // size_t position_y = blockIdx.z; + size_t position_x = position_xy / dim_y; + size_t position_y = position_xy - (position_x * dim_y); + + epsilon_xy_ptr = epsilon_xy_pointer + + position_x * epsilon_xy_dim_c1 + position_y; + + h_ptr = h_pointer + + pattern_id * h_dim_c0 + position_x * h_dim_c2 + position_y; + + input_ptr = input_pointer + + pattern_id * input_dim_c0 + position_x * input_dim_c2 + position_y; + + gpu_update_one_ip( + h_init_ptr, + h_ptr, + h_dim_c1, + h_dim, + weights_pointer, + weights_dim_c0, + input_ptr, + input_dim_c1, + epsilon_xy_ptr, + epsilon_xy_dim_c0, + epsilon_t_pointer, + number_of_spikes, + forgetting_offset, + forgetting_offset_local, + temp_memory_ptr_a, + temp_memory_ptr_b + ); + + } + +}; + +// Let's face it... We need a better way to paralelize it... +bool HDynamicCNNManyIP::gpu_update( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* epsilon_t_pointer, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + size_t number_of_spikes, + size_t dim_x, + size_t dim_y, + float forgetting_offset, + float forgetting_offset_local, + size_t number_of_pattern, + size_t gpu_tuning_factor) +{ + + cudaError_t status; + assert((dim_x < 65535)); + assert((dim_y < 65535)); + + // // ////////////////////////////////////// + // // Get infos about the device + // // ////////////////////////////////////// + + // int device; + // cudaDeviceProp prop; + + // status = cudaGetDevice(&device); + // assert((status == cudaSuccess)); + // // std::cout << "Device ID: " << device << std::endl; + + // status = cudaGetDeviceProperties(&prop, device); + // assert((status == cudaSuccess)); + // // std::cout << "Device name: " << prop.name << std::endl; + + // int _cuda_heap_size_in_mb = 16; + // status = cudaDeviceSetLimit(cudaLimitMallocHeapSize, _cuda_heap_size_in_mb * (1 << 20)); + // assert((status == cudaSuccess)); + + // size_t pValue; + // cudaDeviceGetLimit(&pValue, cudaLimitMallocHeapSize); + // std::cout << pValue << " " << (pValue/(2*4*h_dim)) << std::endl; + // exit(1); + + + // ////////////////////////////////////// + // Calculate the distribution on the GPU + // ////////////////////////////////////// + + int min_grid_size; + int block_size; + int grid_size; + + size_t dynamic_s_mem_size = 0; + size_t max_threadable_tasks = number_of_pattern * dim_x * dim_y; + + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator + status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, + (void*)kernel_spike_generation, + dynamic_s_mem_size, max_threadable_tasks); + assert((status == cudaSuccess)); + + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications + // Maximum dimensionality of grid of thread blocks: 3 + // Maximum x -dimension of a grid of thread blocks: (2^31)-1 + // Maximum y- or z-dimension of a grid of thread blocks: 65535 + + // Reduce the automatic block size with our guess + if ((gpu_tuning_factor > 0) && (gpu_tuning_factor < block_size)) + { + block_size = int(gpu_tuning_factor); + } + // Round up according to array size + // (I will separate x and y into other grid dimentsions soon) + // grid_size = (number_of_pattern + block_size - 1) / block_size; + grid_size = (max_threadable_tasks + block_size - 1) / block_size; + + // std::cout << min_grid_size << std::endl; + // std::cout << grid_size << std::endl; + // std::cout << block_size << std::endl; + // std::cout << max_threadable_tasks << std::endl; + + //dim3 grid(grid_size, dim_x, dim_y); + + float* temp_memory_a = nullptr; + status = cudaMalloc((void**)&temp_memory_a, h_dim * max_threadable_tasks * sizeof(float)); + assert((status == cudaSuccess)); + + float* temp_memory_b = nullptr; + status = cudaMalloc((void**)&temp_memory_b, h_dim * max_threadable_tasks * sizeof(float)); + assert((status == cudaSuccess)); + + + //kernel_spike_generation<<>>( + kernel_spike_generation<<>>( + h_init_ptr, + h_pointer, + h_dim_c0, + h_dim_c1, + h_dim_c2, + h_dim, + weights_pointer, + weights_dim_c0, + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + epsilon_xy_pointer, + epsilon_xy_dim_c0, + epsilon_xy_dim_c1, + epsilon_t_pointer, + number_of_spikes, + forgetting_offset, + forgetting_offset_local, + dim_x, + dim_y, + (dim_x * dim_y), + //number_of_pattern + max_threadable_tasks, + temp_memory_a, + temp_memory_b + ); + + status = cudaDeviceSynchronize(); + assert((status == cudaSuccess)); + + status = cudaFree(temp_memory_a); + assert((status == cudaSuccess)); + + status = cudaFree(temp_memory_b); + assert((status == cudaSuccess)); + + + return true; +}; \ No newline at end of file diff --git a/network/CPP_Cuda/HDynamicCNNManyIP.h b/network/CPP_Cuda/HDynamicCNNManyIP.h new file mode 100644 index 0000000..afe7ac1 --- /dev/null +++ b/network/CPP_Cuda/HDynamicCNNManyIP.h @@ -0,0 +1,111 @@ +#ifndef SRC_HDYNAMICCNNMANYIP_H_ +#define SRC_HDYNAMICCNNMANYIP_H_ + +#include + +#include +#include + +class HDynamicCNNManyIP +{ + public: + HDynamicCNNManyIP(); + ~HDynamicCNNManyIP(); + + bool update_entrypoint( + int64_t h_pointer_addr, + int64_t h_dim_0, + int64_t h_dim_1, + int64_t h_dim_2, + int64_t h_dim_3, + int64_t epsilon_xy_pointer_addr, + int64_t epsilon_xy_dim_0, + int64_t epsilon_xy_dim_1, + int64_t epsilon_xy_dim_2, + int64_t epsilon_t_pointer_addr, + int64_t epsilon_t_dim_0, + int64_t weights_pointer_addr, + int64_t weights_dim_0, + int64_t weights_dim_1, + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t init_vector_pointer_addr, + int64_t init_vector_dim_0, + int64_t number_of_processes, + float forgetting_offset, + int64_t gpu_tuning_factor); + + private: + + bool update( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* epsilon_t_pointer, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + size_t number_of_spikes, + size_t dim_x, + size_t dim_y, + float forgetting_offset, + float forgetting_offset_local, + size_t pattern_id); + + void update_one_ip( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c1, + size_t h_dim, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c1, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + float* epsilon_t_pointer, + size_t number_of_spikes, + float forgetting_offset, + float forgetting_offset_local); + + bool gpu_update( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* epsilon_t_pointer, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + size_t number_of_spikes, + size_t dim_x, + size_t dim_y, + float forgetting_offset, + float forgetting_offset_local, + size_t number_of_pattern, + size_t gpu_tuning_factor); + + +}; + +#endif /* SRC_HDYNAMICCNNMANYIP_H_ */ \ No newline at end of file diff --git a/network/CPP_Cuda/Makefile b/network/CPP_Cuda/Makefile new file mode 100644 index 0000000..28b0600 --- /dev/null +++ b/network/CPP_Cuda/Makefile @@ -0,0 +1,67 @@ +# Change to your python bin directory (tested with Python 3.10.4) +PYBIN=~/P3.10GPU/bin/ +NVCC=/usr/local/cuda-12/bin/nvcc -allow-unsupported-compiler +CC=/usr/lib64/ccache/clang++ + +PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes` +PARAMETERS_O= -O3 -std=c++14 $(PYBIND11INCLUDE) -ccbin=$(CC) \ + -Xcompiler "-fPIC -Wall -fopenmp=libomp" + +PARAMETERS_Linker=-Xcompiler "-shared -lm -lomp -lstdc++ -Wall" + +PYPOSTFIX=`$(PYBIN)python3-config --extension-suffix` + + +all: PyHDynamicCNNManyIP \ + PySpikeGeneration2DManyIP \ + PyMultiApp + +####################### + +HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cu + $(NVCC) $(PARAMETERS_O) -c HDynamicCNNManyIP.cu -o HDynamicCNNManyIP.o + +PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp + $(NVCC) $(PARAMETERS_O) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o + +PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o + $(NVCC) $(PARAMETERS_Linker) -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o + cp PyHDynamicCNNManyIP PyHDynamicCNNManyIP$(PYPOSTFIX) + $(PYBIN)python3 pybind11_auto_pyi.py + +####################### + +SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cu + $(NVCC) $(PARAMETERS_O) -c SpikeGeneration2DManyIP.cu -o SpikeGeneration2DManyIP.o + +PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp + $(NVCC) $(PARAMETERS_O) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o + +PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o + $(NVCC) $(PARAMETERS_Linker) -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o + cp PySpikeGeneration2DManyIP PySpikeGeneration2DManyIP$(PYPOSTFIX) + $(PYBIN)python3 pybind11_auto_pyi.py + + +####################### + +MultiApp.o: MultiApp.h MultiApp.cu approximation_multiplication_function.cpp \ + gpu_approximation_multiplication_function.cu error_term.cpp gpu_error_term.cu + $(NVCC) $(PARAMETERS_O) -c MultiApp.cu -o MultiApp.o + +PyMultiApp.o: MultiApp.h PyMultiApp.cpp + $(NVCC) $(PARAMETERS_O) -c PyMultiApp.cpp -o PyMultiApp.o + +PyMultiApp: MultiApp.o PyMultiApp.o + $(NVCC) $(PARAMETERS_Linker) -o PyMultiApp MultiApp.o PyMultiApp.o + cp PyMultiApp PyMultiApp$(PYPOSTFIX) + $(PYBIN)python3 pybind11_auto_pyi.py + +####################### +clean: + rm -f PyHDynamicCNNManyIP + rm -f PySpikeGeneration2DManyIP + rm -f PyMultiApp + rm -f *.o + rm -f *.so + diff --git a/network/CPP_Cuda/MultiApp.cu b/network/CPP_Cuda/MultiApp.cu new file mode 100644 index 0000000..b7a3cec --- /dev/null +++ b/network/CPP_Cuda/MultiApp.cu @@ -0,0 +1,313 @@ +#include "MultiApp.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "approximation_multiplication_function.cpp" +#include "gpu_approximation_multiplication_function.cu" + +MultiApp::MultiApp() +{ + +}; + +MultiApp::~MultiApp() +{ + +}; + +bool MultiApp::update(float* np_input_pointer, + float* np_weight_pointer, + float* np_output_pointer, int64_t pattern_dim, + int64_t feature_dim, int64_t x_dim, int64_t y_dim, + int64_t input_channel_dim, int64_t id_pattern, + bool approximation_enable, int64_t number_of_trunc_bits, + int64_t number_of_frac_bits) +{ + + assert((id_pattern >= 0)); + assert((id_pattern < pattern_dim)); + + float* np_input_pointer_pattern; + float* np_output_pointer_pattern; + + float* input_ptr; + float* output_ptr; + float* w_ptr; + + uint64_t pattern_size = input_channel_dim; + + std::vector ap_h_vector; + ap_h_vector.resize(pattern_size); + float* ap_h_ptr = ap_h_vector.data(); + + std::vector ap_x_vector; + ap_x_vector.resize(pattern_size); + uint32_t* ap_x_ptr = ap_x_vector.data(); + + std::vector ap_y_vector; + ap_y_vector.resize(pattern_size); + uint32_t* ap_y_ptr = ap_y_vector.data(); + + std::vector ap_x_exponent_vector; + ap_x_exponent_vector.resize(pattern_size); + uint32_t* ap_x_exponent_ptr = ap_x_exponent_vector.data(); + + std::vector ap_y_exponent_vector; + ap_y_exponent_vector.resize(pattern_size); + uint32_t* ap_y_exponent_ptr = ap_y_exponent_vector.data(); + + std::vector ap_h_exponent_vector; + ap_h_exponent_vector.resize(pattern_size); + uint32_t* ap_h_exponent_ptr = ap_h_exponent_vector.data(); + + std::vector ap_res_vector; + ap_res_vector.resize(pattern_size); + uint64_t* ap_res_ptr = ap_res_vector.data(); + + uint32_t ap_mask = static_cast(pow(2, number_of_trunc_bits)) - 1; + + std::vector sign_temp_vector; + sign_temp_vector.resize(pattern_size); + uint32_t* sign_temp_ptr = sign_temp_vector.data(); + + uint64_t input_pattern_size = input_channel_dim * x_dim * y_dim; + uint64_t output_pattern_size = feature_dim * x_dim * y_dim; + + np_input_pointer_pattern = np_input_pointer + id_pattern * input_pattern_size; + np_output_pointer_pattern = + np_output_pointer + id_pattern * output_pattern_size; + + uint64_t counter; + + uint64_t counter_x; + uint64_t counter_y; + uint64_t counter_feature; + uint64_t pos_xy; + uint64_t pos_xy_if; + + float temp_sum; + + uint64_t pattern_c_2 = x_dim * y_dim; + + for (counter_x = 0; counter_x < x_dim; counter_x++) + { + for (counter_y = 0; counter_y < y_dim; counter_y++) + { + pos_xy = counter_y + counter_x * y_dim; + for (counter_feature = 0; counter_feature < feature_dim; + counter_feature++) + { + pos_xy_if = counter_feature * pattern_c_2 + pos_xy; + + input_ptr = np_input_pointer_pattern + pos_xy; + output_ptr = np_output_pointer_pattern + pos_xy_if; + w_ptr = np_weight_pointer + counter_feature * input_channel_dim; + +#pragma omp simd + for (counter = 0; counter < pattern_size; counter++) + { + ap_h_ptr[counter] = input_ptr[counter * pattern_c_2]; + } + + approximation_multiplication_function( + ap_h_ptr, w_ptr, pattern_size, number_of_trunc_bits, + number_of_frac_bits, ap_x_ptr, ap_y_ptr, ap_x_exponent_ptr, + ap_y_exponent_ptr, ap_h_exponent_ptr, ap_mask, ap_res_ptr, + sign_temp_ptr, approximation_enable); + + temp_sum = 0.0; +#pragma omp simd reduction(+ \ + : temp_sum) + for (counter = 0; counter < pattern_size; counter++) + { + temp_sum += ap_h_ptr[counter]; + } + + output_ptr[0] = temp_sum; + } + } + } + + return true; +}; + +bool MultiApp::update_with_init_vector_multi_pattern( + int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr, + int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim, + int64_t x_dim, int64_t y_dim, int64_t input_channel_dim, + int64_t number_of_processes, bool approximation_enable, + int64_t number_of_trunc_bits, int64_t number_of_frac) +{ + + + + int64_t number_of_pattern = pattern_dim; + int64_t pattern_id; + + float* np_input_pointer = (float*)np_input_pointer_addr; + float* np_weight_pointer = (float*)np_weight_pointer_addr; + float* np_output_pointer = (float*)np_output_pointer_addr; + + assert((np_input_pointer != nullptr)); + assert((np_output_pointer != nullptr)); + assert((np_weight_pointer != nullptr)); + + assert((pattern_dim > 0)); + assert((feature_dim > 0)); + assert((x_dim > 0)); + assert((y_dim > 0)); + assert((input_channel_dim > 0)); + + if (number_of_processes > 0) + { + omp_set_num_threads(number_of_processes); + // For debugging: Only one thread + // omp_set_num_threads(1); + +#pragma omp parallel for + for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) + { + + update(np_input_pointer, np_weight_pointer, + np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim, + input_channel_dim, pattern_id, approximation_enable, + number_of_trunc_bits, number_of_frac); + } + } + else + { + update_gpu(np_input_pointer, np_weight_pointer, + np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim, + input_channel_dim, approximation_enable, + number_of_trunc_bits, number_of_frac); + } + return true; +}; + +__global__ void kernel_approx_multiplication(float* __restrict__ input_pointer, float* __restrict__ weight_pointer, + float* __restrict__ output_pointer, uint64_t pattern_dim, + uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim, + uint64_t input_channel_dim, size_t max_threadable_tasks, + uint64_t input_index_scale, uint64_t number_of_frac_bits, + bool approximation_enable, uint64_t number_of_trunc_bits, + uint32_t ap_mask) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx < max_threadable_tasks) + { + int pattern_id = idx / feature_dim; + int feature_id = idx - (pattern_id * feature_dim); + int x_id = blockIdx.y; + int y_id = blockIdx.z; + + float* weight_pointer_sub = weight_pointer + feature_id * input_channel_dim; + float* input_pointer_sub = input_pointer + pattern_id * input_channel_dim * x_dim * y_dim + x_id * y_dim + y_id; + float* output_pointer_sub = output_pointer + + pattern_id * feature_dim * x_dim * y_dim + + feature_id * x_dim * y_dim + x_id * y_dim + y_id; + *output_pointer_sub = 0.0; + + size_t counter; + for (counter = 0; counter < input_channel_dim; counter++) + { + *output_pointer_sub += gpu_approximation_multiplication_function( + weight_pointer_sub[counter], + input_pointer_sub[counter * input_index_scale], + number_of_frac_bits, approximation_enable, + number_of_trunc_bits, ap_mask); + } + } +}; + +bool MultiApp::update_gpu(float* np_input_pointer, + float* np_weight_pointer, + float* np_output_pointer, uint64_t pattern_dim, + uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim, + uint64_t input_channel_dim, + bool approximation_enable, uint64_t number_of_trunc_bits, + uint64_t number_of_frac_bits) +{ + + uint32_t ap_mask = static_cast(pow(2, number_of_trunc_bits)) - 1; + // std::cout << approximation_enable << std::endl; + // std::cout << number_of_trunc_bits << std::endl; + // std::cout << number_of_frac_bits << std::endl; + + cudaError_t status; + assert((x_dim < 65535)); + assert((y_dim < 65535)); + + // ////////////////////////////////////// + // Get infos about the device + // ////////////////////////////////////// + + int device; + cudaDeviceProp prop; + + status = cudaGetDevice(&device); + assert((status == cudaSuccess)); + // std::cout << "Device ID: " << device << std::endl; + + status = cudaGetDeviceProperties(&prop, device); + assert((status == cudaSuccess)); + // std::cout << "Device name: " << prop.name << std::endl; + + // ////////////////////////////////////// + // Calculate the distribution on the GPU + // ////////////////////////////////////// + + int min_grid_size; + int block_size; + int grid_size; + + size_t dynamic_s_mem_size = 0; + size_t max_threadable_tasks = pattern_dim * feature_dim * x_dim * y_dim; + + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator + status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, + (void*)kernel_approx_multiplication, + dynamic_s_mem_size, max_threadable_tasks); + assert((status == cudaSuccess)); + + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications + // Maximum dimensionality of grid of thread blocks: 3 + // Maximum x -dimension of a grid of thread blocks: (2^31)-1 + // Maximum y- or z-dimension of a grid of thread blocks: 65535 + + // Round up according to array size + grid_size = ((pattern_dim * feature_dim) + block_size - 1) / block_size; + + // std::cout << min_grid_size << std::endl; + // std::cout << grid_size << std::endl; + // std::cout << block_size << std::endl; + // std::cout << max_threadable_tasks << std::endl; + + dim3 grid(grid_size, x_dim, y_dim); + + kernel_approx_multiplication<<>>(np_input_pointer, + np_weight_pointer, + np_output_pointer, + pattern_dim, + feature_dim, + x_dim, + y_dim, + input_channel_dim, + (pattern_dim * feature_dim), + (x_dim * y_dim), + number_of_frac_bits, + approximation_enable, + number_of_trunc_bits, + ap_mask); + + cudaDeviceSynchronize(); + return true; +}; diff --git a/network/CPP_Cuda/MultiApp.h b/network/CPP_Cuda/MultiApp.h new file mode 100644 index 0000000..8506918 --- /dev/null +++ b/network/CPP_Cuda/MultiApp.h @@ -0,0 +1,39 @@ +#ifndef SRC_MultiApp_H_ +#define SRC_MultiApp_H_ + +#include + +#include +#include + +class MultiApp +{ +public: + MultiApp(); + ~MultiApp(); + + bool update(float *np_input_pointer, float *np_weight_pointer, + float *np_output_pointer, int64_t pattern_dim, + int64_t feature_dim, int64_t x_dim, int64_t y_dim, + int64_t input_channel_dim, int64_t id_pattern, + bool approximation_enable, int64_t number_of_trunc_bits, + int64_t number_of_frac); + + bool update_gpu(float *input_pointer, float *weight_pointer, + float *output_pointer, uint64_t pattern_dim, + uint64_t feature_dim, uint64_t x_dim, uint64_t y_dim, + uint64_t input_channel_dim, + bool approximation_enable, uint64_t number_of_trunc_bits, + uint64_t number_of_frac); + + bool update_with_init_vector_multi_pattern( + int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr, + int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim, + int64_t x_dim, int64_t y_dim, int64_t input_channel_dim, + int64_t number_of_processes, bool approximation_enable, + int64_t number_of_trunc_bits, int64_t number_of_frac); + +private: +}; + +#endif /* SRC_MultiApp_H_ */ \ No newline at end of file diff --git a/network/CPP_Cuda/PyHDynamicCNNManyIP.cpp b/network/CPP_Cuda/PyHDynamicCNNManyIP.cpp new file mode 100644 index 0000000..f59118b --- /dev/null +++ b/network/CPP_Cuda/PyHDynamicCNNManyIP.cpp @@ -0,0 +1,14 @@ +#include + +#include "HDynamicCNNManyIP.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PyHDynamicCNNManyIP, m) +{ + m.doc() = "HDynamicCNNManyIP Module"; + py::class_(m, "HDynamicCNNManyIP") + .def(py::init<>()) + .def("update", + &HDynamicCNNManyIP::update_entrypoint); +} \ No newline at end of file diff --git a/network/CPP_Cuda/PyHDynamicCNNManyIP.pyi b/network/CPP_Cuda/PyHDynamicCNNManyIP.pyi new file mode 100644 index 0000000..7032d50 --- /dev/null +++ b/network/CPP_Cuda/PyHDynamicCNNManyIP.pyi @@ -0,0 +1,18 @@ +# +# AUTOMATICALLY GENERATED FILE, DO NOT EDIT! +# + +"""HDynamicCNNManyIP Module""" +from __future__ import annotations +import PyHDynamicCNNManyIP +import typing + +__all__ = [ + "HDynamicCNNManyIP" +] + + +class HDynamicCNNManyIP(): + def __init__(self) -> None: ... + def update(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int, arg22: float, arg23: int) -> bool: ... + pass diff --git a/network/CPP_Cuda/PyMultiApp.cpp b/network/CPP_Cuda/PyMultiApp.cpp new file mode 100644 index 0000000..fce188b --- /dev/null +++ b/network/CPP_Cuda/PyMultiApp.cpp @@ -0,0 +1,14 @@ + +#include + +#include "MultiApp.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PyMultiApp, m) { + m.doc() = "MultiApp Module"; + py::class_(m, "MultiApp") + .def(py::init<>()) + .def("update_with_init_vector_multi_pattern", + &MultiApp::update_with_init_vector_multi_pattern); +} \ No newline at end of file diff --git a/network/CPP_Cuda/PyMultiApp.pyi b/network/CPP_Cuda/PyMultiApp.pyi new file mode 100644 index 0000000..54768cc --- /dev/null +++ b/network/CPP_Cuda/PyMultiApp.pyi @@ -0,0 +1,18 @@ +# +# AUTOMATICALLY GENERATED FILE, DO NOT EDIT! +# + +"""MultiApp Module""" +from __future__ import annotations +import PyMultiApp +import typing + +__all__ = [ + "MultiApp" +] + + +class MultiApp(): + def __init__(self) -> None: ... + def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: bool, arg10: int, arg11: int) -> bool: ... + pass diff --git a/network/CPP_Cuda/PySpikeGeneration2DManyIP.cpp b/network/CPP_Cuda/PySpikeGeneration2DManyIP.cpp new file mode 100644 index 0000000..eaece6f --- /dev/null +++ b/network/CPP_Cuda/PySpikeGeneration2DManyIP.cpp @@ -0,0 +1,15 @@ + +#include + +#include "SpikeGeneration2DManyIP.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PySpikeGeneration2DManyIP, m) +{ + m.doc() = "SpikeGeneration2DManyIP Module"; + py::class_(m, "SpikeGeneration2DManyIP") + .def(py::init<>()) + .def("spike_generation", + &SpikeGeneration2DManyIP::spike_generation_entrypoint); +} diff --git a/PySpikeGeneration2DManyIP.pyi b/network/CPP_Cuda/PySpikeGeneration2DManyIP.pyi similarity index 55% rename from PySpikeGeneration2DManyIP.pyi rename to network/CPP_Cuda/PySpikeGeneration2DManyIP.pyi index 57e593a..7c2257d 100644 --- a/PySpikeGeneration2DManyIP.pyi +++ b/network/CPP_Cuda/PySpikeGeneration2DManyIP.pyi @@ -14,5 +14,5 @@ __all__ = [ class SpikeGeneration2DManyIP(): def __init__(self) -> None: ... - def spike_generation_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ... + def spike_generation(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ... pass diff --git a/network/CPP_Cuda/SpikeGeneration2DManyIP.cu b/network/CPP_Cuda/SpikeGeneration2DManyIP.cu new file mode 100644 index 0000000..e0176e9 --- /dev/null +++ b/network/CPP_Cuda/SpikeGeneration2DManyIP.cu @@ -0,0 +1,390 @@ +#include "SpikeGeneration2DManyIP.h" + +#include +#include +#include + +#include +#include +#include + + +SpikeGeneration2DManyIP::SpikeGeneration2DManyIP() +{ + +}; + +SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP() +{ + +}; + +bool SpikeGeneration2DManyIP::spike_generation_entrypoint( + int64_t input_pointer_addr, int64_t input_dim_0, + int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3, + int64_t random_values_pointer_addr, int64_t random_values_dim_0, + int64_t random_values_dim_1, int64_t random_values_dim_2, + int64_t random_values_dim_3, int64_t output_pointer_addr, + int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2, + int64_t output_dim_3, int64_t number_of_cpu_processes) +{ + + float* input_pointer = (float*)input_pointer_addr; + float* random_values_pointer = (float*)random_values_pointer_addr; + int64_t* output_pointer = (int64_t*)output_pointer_addr; + + // Input + assert((input_pointer != nullptr)); + assert((input_dim_0 > 0)); + assert((input_dim_1 > 0)); + assert((input_dim_2 > 0)); + assert((input_dim_3 > 0)); + + // Random + assert((random_values_pointer != nullptr)); + assert((random_values_dim_0 > 0)); + assert((random_values_dim_1 > 0)); + assert((random_values_dim_2 > 0)); + assert((random_values_dim_3 > 0)); + + // Output + assert((output_pointer != nullptr)); + assert((output_dim_0 > 0)); + assert((output_dim_1 > 0)); + assert((output_dim_2 > 0)); + assert((output_dim_3 > 0)); + + // Input + size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3; + size_t input_dim_c1 = input_dim_2 * input_dim_3; + size_t input_dim_c2 = input_dim_3; + + // Random + size_t random_values_dim_c0 = + random_values_dim_1 * random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c1 = + random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c2 = random_values_dim_3; + + // Output + size_t output_dim_c0 = + output_dim_1 * output_dim_2 * output_dim_3; + size_t output_dim_c1 = output_dim_2 * output_dim_3; + size_t output_dim_c2 = output_dim_3; + + size_t number_of_pattern = input_dim_0; + size_t h_dim = input_dim_1; + size_t spike_dim = output_dim_1; + size_t x_dim = output_dim_2; + size_t y_dim = output_dim_2; + + if (number_of_cpu_processes > 0) + { + + omp_set_num_threads(number_of_cpu_processes); + // DEBUG: + // omp_set_num_threads(1); + + size_t pattern_id; + +#pragma omp parallel for + for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) + { + spike_generation( + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + random_values_pointer, + random_values_dim_c0, + random_values_dim_c1, + random_values_dim_c2, + output_pointer, + output_dim_c0, + output_dim_c1, + output_dim_c2, + x_dim, + y_dim, + spike_dim, + h_dim, + pattern_id); + } + } + else + { + gpu_spike_generation( + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + random_values_pointer, + random_values_dim_c0, + random_values_dim_c1, + random_values_dim_c2, + output_pointer, + output_dim_c0, + output_dim_c1, + output_dim_c2, + x_dim, + y_dim, + spike_dim, + h_dim, + number_of_pattern); + } + + return true; +}; + +bool SpikeGeneration2DManyIP::spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t pattern_id) +{ + + size_t counter; + size_t counter_x = 0; + size_t counter_y = 0; + + float* p_ptr = nullptr; + int64_t* out_ptr = nullptr; + float* rand_ptr = nullptr; + + for (counter_x = 0; counter_x < x_dim; counter_x++) + { + for (counter_y = 0; counter_y < y_dim; counter_y++) + { + p_ptr = input_pointer + pattern_id * input_dim_c0 + + counter_x * input_dim_c2 + counter_y; + // + counter * input_dim_c1 + + out_ptr = output_pointer + pattern_id * output_dim_c0 + + counter_x * output_dim_c2 + counter_y; + // + counter * output_dim_c1 + + rand_ptr = random_values_pointer + + pattern_id * random_values_dim_c0 + + counter_x * random_values_dim_c2 + counter_y; + // + counter * random_values_dim_c1 + + for (counter = 0; counter < spike_dim; counter++) + { + out_ptr[counter * output_dim_c1] = lower_bound(p_ptr, + h_dim, + input_dim_c1, + rand_ptr[counter * random_values_dim_c1]); + } + } + } + + return true; +}; + +// algorithmic idea stolen from libc++ +size_t SpikeGeneration2DManyIP::lower_bound(float* data_ptr, + size_t data_length, + size_t data_ptr_stride, + float compare_to_value) +{ + + size_t start_of_range = 0; + size_t length_of_range = data_length; + + while (length_of_range != 0) + { + size_t half_length = length_of_range >> 1; + size_t actual_position = start_of_range + half_length; + + if (data_ptr[actual_position * data_ptr_stride] < compare_to_value) + { + start_of_range = ++actual_position; + length_of_range -= half_length + 1; + } + else + length_of_range = half_length; + } + return start_of_range; +}; + +__device__ size_t gpu_lower_bound(float* __restrict__ data_ptr, + size_t data_length, + size_t data_ptr_stride, + float compare_to_value) +{ + + size_t start_of_range = 0; + size_t length_of_range = data_length; + + while (length_of_range != 0) + { + size_t half_length = length_of_range >> 1; + size_t actual_position = start_of_range + half_length; + + if (data_ptr[actual_position * data_ptr_stride] < compare_to_value) + { + start_of_range = ++actual_position; + length_of_range -= half_length + 1; + } + else + length_of_range = half_length; + } + return start_of_range; +}; + +__global__ void kernel_spike_generation( + float* __restrict__ input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* __restrict__ random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* __restrict__ output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t max_threadable_tasks) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx < max_threadable_tasks) + { + + size_t pattern_id = idx / spike_dim; + size_t position_spike = idx - (pattern_id * spike_dim); + + size_t position_x = blockIdx.y; + size_t position_y = blockIdx.z; + + float* p_ptr = input_pointer + pattern_id * input_dim_c0 + + position_x * input_dim_c2 + position_y; + + int64_t* out_ptr = output_pointer + pattern_id * output_dim_c0 + + position_x * output_dim_c2 + position_y + + position_spike * output_dim_c1; + + float* rand_ptr = random_values_pointer + + pattern_id * random_values_dim_c0 + + position_x * random_values_dim_c2 + position_y + + position_spike * random_values_dim_c1; + + *out_ptr = gpu_lower_bound(p_ptr, + h_dim, + input_dim_c1, + *rand_ptr); + } +}; + +bool SpikeGeneration2DManyIP::gpu_spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t number_of_pattern) +{ + cudaError_t status; + assert((x_dim < 65535)); + assert((y_dim < 65535)); + + // // ////////////////////////////////////// + // // Get infos about the device + // // ////////////////////////////////////// + + // int device; + // cudaDeviceProp prop; + + // status = cudaGetDevice(&device); + // assert((status == cudaSuccess)); + // // std::cout << "Device ID: " << device << std::endl; + + // status = cudaGetDeviceProperties(&prop, device); + // assert((status == cudaSuccess)); + // // std::cout << "Device name: " << prop.name << std::endl; + + // ////////////////////////////////////// + // Calculate the distribution on the GPU + // ////////////////////////////////////// + + int min_grid_size; + int block_size; + int grid_size; + + size_t dynamic_s_mem_size = 0; + size_t max_threadable_tasks = number_of_pattern * spike_dim * x_dim * y_dim; + + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html?highlight=blocksize#occupancy-calculator + status = cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, + (void*)kernel_spike_generation, + dynamic_s_mem_size, max_threadable_tasks); + assert((status == cudaSuccess)); + + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications + // Maximum dimensionality of grid of thread blocks: 3 + // Maximum x -dimension of a grid of thread blocks: (2^31)-1 + // Maximum y- or z-dimension of a grid of thread blocks: 65535 + + // Round up according to array size + // (I will separate x and y into other grid dimentsions soon) + grid_size = ((number_of_pattern * spike_dim) + block_size - 1) / block_size; + + // std::cout << min_grid_size << std::endl; + // std::cout << grid_size << std::endl; + // std::cout << block_size << std::endl; + // std::cout << max_threadable_tasks << std::endl; + + dim3 grid(grid_size, x_dim, y_dim); + + + kernel_spike_generation<<>>( + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + random_values_pointer, + random_values_dim_c0, + random_values_dim_c1, + random_values_dim_c2, + output_pointer, + output_dim_c0, + output_dim_c1, + output_dim_c2, + x_dim, + y_dim, + spike_dim, + h_dim, + (number_of_pattern * spike_dim)); + + cudaDeviceSynchronize(); + + return true; +}; \ No newline at end of file diff --git a/network/CPP_Cuda/SpikeGeneration2DManyIP.h b/network/CPP_Cuda/SpikeGeneration2DManyIP.h new file mode 100644 index 0000000..7c17031 --- /dev/null +++ b/network/CPP_Cuda/SpikeGeneration2DManyIP.h @@ -0,0 +1,68 @@ +#ifndef SRC_SPIKEGENERATION2DMANYIP_H_ +#define SRC_SPIKEGENERATION2DMANYIP_H_ + +#include + +#include +#include + +class SpikeGeneration2DManyIP +{ + public: + SpikeGeneration2DManyIP(); + ~SpikeGeneration2DManyIP(); + + bool spike_generation_entrypoint( + int64_t input_pointer_addr, int64_t input_dim_0, + int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3, + int64_t random_values_pointer_addr, int64_t random_values_dim_0, + int64_t random_values_dim_1, int64_t random_values_dim_2, + int64_t random_values_dim_3, int64_t output_pointer_addr, + int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2, + int64_t output_dim_3, int64_t number_of_cpu_processes); + + bool spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t pattern_id); + + bool gpu_spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t number_of_pattern); + + private: + size_t lower_bound(float* data_ptr, size_t data_length, + size_t data_ptr_stride, + float compare_to_value); +}; + +#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */ \ No newline at end of file diff --git a/network/CPP_Cuda/approximation_multiplication_function.cpp b/network/CPP_Cuda/approximation_multiplication_function.cpp new file mode 100644 index 0000000..6b54c98 --- /dev/null +++ b/network/CPP_Cuda/approximation_multiplication_function.cpp @@ -0,0 +1,138 @@ +#include + +#include +#include +#include + +#include "error_term.cpp" + +// Best way to plot the bits +// std::cout << std::bitset<32>(ap_y_ptr[1]) << "\n"; + +// The result needs to be written back into h_pointer (which contains h) +// Don't write to w_pointer. +void approximation_multiplication_function( + float *h_pointer, float *w_pointer, int64_t pattern_length, + uint64_t number_of_trunc_bits, uint64_t number_of_frac_bits, + uint32_t *ap_x_ptr, uint32_t *ap_y_ptr, uint32_t *ap_x_exponent_ptr, + uint32_t *ap_y_exponent_ptr, uint32_t *ap_h_exponent_ptr, uint32_t ap_mask, + uint64_t *ap_res_ptr, uint32_t *sign_temp_ptr, bool approximation_enable) { + uint64_t counter; + + uint32_t *w_pointer_mod = (uint32_t *)w_pointer; + uint32_t *h_pointer_mod = (uint32_t *)h_pointer; + +// Calculate the new sign +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + sign_temp_ptr[counter] = (w_pointer_mod[counter] & 0x80000000) ^ + (h_pointer_mod[counter] & 0x80000000); + } + +// Extract the exponent +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_x_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24; + } +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_y_exponent_ptr[counter] = (w_pointer_mod[counter] << 1) >> 24; + } + + // Cast and "normalize" + uint64_t shift_value = 32 - number_of_frac_bits; +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_x_ptr[counter] = + ((h_pointer_mod[counter] << 8) | 0x80000000) >> shift_value; + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_y_ptr[counter] = + ((w_pointer_mod[counter] << 8) | 0x80000000) >> shift_value; + } + +// Make the zero -g-r-e-a-t- correct again +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + if (h_pointer[counter] == 0) { + ap_x_ptr[counter] = 0; + } + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + if (w_pointer[counter] == 0) { + ap_y_ptr[counter] = 0; + } + } + +// res = x*y +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_res_ptr[counter] = static_cast(ap_x_ptr[counter]) * static_cast(ap_y_ptr[counter]); + } + + uint32_t temp; + if (approximation_enable == true){ + // Go through the vector values + for (counter = 0; counter < pattern_length; counter++) { + temp = error_term(ap_y_ptr[counter], ap_x_ptr[counter], ap_mask, + number_of_trunc_bits); + if (temp > ap_res_ptr[counter]) { + ap_res_ptr[counter] = 0; + } else { + ap_res_ptr[counter] -= temp; + } + } + } +// Cast from int to float +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer[counter] = static_cast(ap_res_ptr[counter]); + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_h_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24; + } + +// devide by the 2^number_of_frac_bits +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_h_exponent_ptr[counter] -= 2 * number_of_frac_bits; + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + temp = ap_x_exponent_ptr[counter] + ap_y_exponent_ptr[counter] + + ap_h_exponent_ptr[counter]; + if (temp > 252) { + ap_h_exponent_ptr[counter] = temp - 252; + } else { + // Here I try to catch the case that the new exponent is too small + ap_h_exponent_ptr[counter] = 0; + } + } + +// Remove the old exponent +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer_mod[counter] = (h_pointer_mod[counter] << 9) >> 9; + } + +// Install the new exponent +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer_mod[counter] += ap_h_exponent_ptr[counter] << 23; + } + +// Add the sign back +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer_mod[counter] += sign_temp_ptr[counter]; + } + + return; +} diff --git a/network/CPP_Cuda/error_term.cpp b/network/CPP_Cuda/error_term.cpp new file mode 100644 index 0000000..f683d1b --- /dev/null +++ b/network/CPP_Cuda/error_term.cpp @@ -0,0 +1,28 @@ +#include + +#include +#include + +uint32_t error_term(uint32_t a, uint32_t b, uint32_t ap_mask, + uint32_t number_of_trunc_bits) { + uint32_t error_value = 0; + + uint32_t temp_shift_a = a; + uint32_t temp_shift_b = b & ap_mask; + + uint32_t counter_trunc; + uint32_t temp; + + // Go through the bits + for (counter_trunc = 0; counter_trunc < number_of_trunc_bits; + counter_trunc++) { + temp = temp_shift_a & 1; + if (temp == 1) { + error_value += temp_shift_b & ap_mask; + } + temp_shift_a >>= 1; + temp_shift_b <<= 1; + } + + return error_value; +} diff --git a/network/CPP_Cuda/gpu_approximation_multiplication_function.cu b/network/CPP_Cuda/gpu_approximation_multiplication_function.cu new file mode 100644 index 0000000..7e145e8 --- /dev/null +++ b/network/CPP_Cuda/gpu_approximation_multiplication_function.cu @@ -0,0 +1,102 @@ +#include "gpu_error_term.cu" + +__device__ float gpu_approximation_multiplication_function( + float weight, + float input, + uint64_t number_of_frac_bits, + bool approximation_enable, + uint64_t number_of_trunc_bits, + uint32_t ap_mask) +{ + + float weight_copy = weight; + float input_copy = input; + + uint32_t *weight_pointer_mod = (uint32_t *)&weight_copy; + uint32_t *input_pointer_mod = (uint32_t *)&input_copy; + + // Calculate the new sign + uint32_t sign_temp = (*weight_pointer_mod & 0x80000000) ^ + (*input_pointer_mod & 0x80000000); + + // Extract the exponent + uint32_t ap_input_exponent = (*input_pointer_mod << 1) >> 24; + uint32_t ap_weight_exponent = (*weight_pointer_mod << 1) >> 24; + + // Cast and "normalize" + uint64_t shift_value = 32 - number_of_frac_bits; + + uint32_t ap_input_mantissa = + ((*input_pointer_mod << 8) | 0x80000000) >> shift_value; + + uint32_t ap_weight_mantissa = + ((*weight_pointer_mod << 8) | 0x80000000) >> shift_value; + + // Make the zero -g-r-e-a-t- correct again + if (input == 0) + { + ap_input_mantissa = 0; + } + + if (weight == 0) + { + ap_weight_mantissa = 0; + } + + // res = x*y + uint64_t ap_result = static_cast(ap_input_mantissa) * static_cast(ap_weight_mantissa); + + uint32_t temp; + // -------------------------------------------- + // Approx + // -------------------------------------------- + + if (approximation_enable == true) + { + // Go through the vector values + temp = gpu_error_term(ap_weight_mantissa, ap_input_mantissa, ap_mask, + number_of_trunc_bits); + if (temp > ap_result) + { + ap_result = 0; + } + else + { + ap_result -= temp; + } + } + + // Cast from int to float + float output = static_cast(ap_result); + if (ap_result == 0) + { + output = 0.0; + } + else + { + uint32_t *output_pointer_mod = (uint32_t *)&output; + + uint32_t ap_output_exponent = (*output_pointer_mod << 1) >> 24; + ap_output_exponent -= 2 * number_of_frac_bits; + temp = ap_input_exponent + ap_weight_exponent + ap_output_exponent; + if (temp > 252) + { + ap_output_exponent = temp - 252; + } + else + { + // Here I try to catch the case that the new exponent is too small + ap_output_exponent = 0; + } + + // Remove the old exponent + *output_pointer_mod = (*output_pointer_mod << 9) >> 9; + + // Install the new exponent + *output_pointer_mod += ap_output_exponent << 23; + + // Add the sign back + *output_pointer_mod += sign_temp; + } + return output; +}; \ No newline at end of file diff --git a/network/CPP_Cuda/gpu_error_term.cu b/network/CPP_Cuda/gpu_error_term.cu new file mode 100644 index 0000000..23bdc1b --- /dev/null +++ b/network/CPP_Cuda/gpu_error_term.cu @@ -0,0 +1,28 @@ +__device__ uint32_t gpu_error_term(uint32_t ap_weight_mantissa, + uint32_t ap_input_mantissa, + uint32_t ap_mask, + uint32_t number_of_trunc_bits) +{ + uint32_t error_value = 0; + + uint32_t temp_shift_a = ap_weight_mantissa; + uint32_t temp_shift_b = ap_input_mantissa & ap_mask; + + uint32_t counter_trunc; + uint32_t temp; + + // Go through the bits + for (counter_trunc = 0; counter_trunc < number_of_trunc_bits; + counter_trunc++) + { + temp = temp_shift_a & 1; + if (temp == 1) + { + error_value += temp_shift_b & ap_mask; + } + temp_shift_a >>= 1; + temp_shift_b <<= 1; + } + + return error_value; +} \ No newline at end of file diff --git a/C++/tools/pybind11_auto_pyi.py b/network/CPP_Cuda/pybind11_auto_pyi.py similarity index 82% rename from C++/tools/pybind11_auto_pyi.py rename to network/CPP_Cuda/pybind11_auto_pyi.py index f316485..73c8bd2 100644 --- a/C++/tools/pybind11_auto_pyi.py +++ b/network/CPP_Cuda/pybind11_auto_pyi.py @@ -1,7 +1,3 @@ -# Derived from code by Aaron.Ma -# which was posted here: -# https://blog.csdn.net/sinat_40922660/article/details/123850832 - # %% # pip install pybind11-stubgen from pybind11_stubgen import ModuleStubsGenerator # type: ignore diff --git a/network/CPP_NoCuda/HDynamicCNNManyIP.cpp b/network/CPP_NoCuda/HDynamicCNNManyIP.cpp new file mode 100644 index 0000000..4c1da91 --- /dev/null +++ b/network/CPP_NoCuda/HDynamicCNNManyIP.cpp @@ -0,0 +1,339 @@ +#include "HDynamicCNNManyIP.h" + +#include +#include +#include + +#include +#include +#include + + +HDynamicCNNManyIP::HDynamicCNNManyIP() +{ + +}; + +HDynamicCNNManyIP::~HDynamicCNNManyIP() +{ + +}; + +bool HDynamicCNNManyIP::update_entrypoint( + int64_t h_pointer_addr, + int64_t h_dim_0, + int64_t h_dim_1, + int64_t h_dim_2, + int64_t h_dim_3, + int64_t epsilon_xy_pointer_addr, + int64_t epsilon_xy_dim_0, + int64_t epsilon_xy_dim_1, + int64_t epsilon_xy_dim_2, + int64_t epsilon_t_pointer_addr, + int64_t epsilon_t_dim_0, + int64_t weights_pointer_addr, + int64_t weights_dim_0, + int64_t weights_dim_1, + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t init_vector_pointer_addr, + int64_t init_vector_dim_0, + int64_t number_of_processes, + float forgetting_offset, + int64_t gpu_tuning_factor) +{ + + size_t number_of_pattern = input_dim_0; + + size_t h_dim = init_vector_dim_0; + float* h_init_ptr = (float*)init_vector_pointer_addr; + assert((h_init_ptr != nullptr)); + assert((h_dim > 0)); + + float* h_pointer = (float*)h_pointer_addr; + assert((h_pointer != nullptr)); + assert((h_dim_0 > 0)); + assert((h_dim_1 > 0)); + assert((h_dim_2 > 0)); + assert((h_dim_3 > 0)); + + size_t h_dim_c0 = h_dim_1 * h_dim_2 * h_dim_3; + size_t h_dim_c1 = h_dim_2 * h_dim_3; + size_t h_dim_c2 = h_dim_3; + + float* epsilon_xy_pointer = (float*)epsilon_xy_pointer_addr; + assert((epsilon_xy_pointer != nullptr)); + assert((epsilon_xy_dim_0 > 0)); + assert((epsilon_xy_dim_1 > 0)); + + size_t epsilon_xy_dim_c0 = epsilon_xy_dim_2 * epsilon_xy_dim_1; + size_t epsilon_xy_dim_c1 = epsilon_xy_dim_2; + + float* epsilon_t_pointer = (float*)epsilon_t_pointer_addr; + assert((epsilon_t_pointer != nullptr)); + assert((epsilon_t_dim_0 > 0)); + + float* weights_pointer = (float*)weights_pointer_addr; + assert((weights_pointer != nullptr)); + assert((weights_dim_0 > 0)); + assert((weights_dim_1 > 0)); + + size_t weights_dim_c0 = weights_dim_1; + + int64_t* input_pointer = (int64_t*)input_pointer_addr; + assert((input_pointer != nullptr)); + assert((input_dim_0 > 0)); + assert((input_dim_1 > 0)); + assert((input_dim_2 > 0)); + assert((input_dim_3 > 0)); + + size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3; + size_t input_dim_c1 = input_dim_2 * input_dim_3; + size_t input_dim_c2 = input_dim_3; + + assert((h_dim == weights_dim_1)); + size_t number_of_spikes = input_dim_1; + size_t dim_x = input_dim_2; + size_t dim_y = input_dim_3; + + float forgetting_offset_local = forgetting_offset / static_cast(h_dim); + + + // -------------------- + if (number_of_processes > 0) + { + omp_set_num_threads(number_of_processes); + + size_t pattern_id; +#pragma omp parallel for + for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) + { + update( + h_init_ptr, + h_pointer, + h_dim_c0, + h_dim_c1, + h_dim_c2, + h_dim, + epsilon_xy_pointer, + epsilon_xy_dim_c0, + epsilon_xy_dim_c1, + epsilon_t_pointer, + weights_pointer, + weights_dim_c0, + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + number_of_spikes, + dim_x, + dim_y, + forgetting_offset, + forgetting_offset_local, + pattern_id); + } + } + else + { + std::cout << "Error: number_of_processes <= 0" << std::endl; + return false; + } + return true; +}; + + +bool HDynamicCNNManyIP::update( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* epsilon_t_pointer, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + size_t number_of_spikes, + size_t dim_x, + size_t dim_y, + float forgetting_offset, + float forgetting_offset_local, + size_t pattern_id) +{ + + float* h_ptr; + float* epsilon_xy_ptr; + int64_t* input_ptr; + + size_t counter_x; + size_t counter_y; + + for (counter_x = 0; counter_x < dim_x; counter_x++) + { + for (counter_y = 0; counter_y < dim_y; counter_y++) + { + epsilon_xy_ptr = epsilon_xy_pointer + + counter_x * epsilon_xy_dim_c1 + counter_y; + + h_ptr = h_pointer + + pattern_id * h_dim_c0 + counter_x * h_dim_c2 + counter_y; + + input_ptr = input_pointer + + pattern_id * input_dim_c0 + counter_x * input_dim_c2 + counter_y; + + update_one_ip( + h_init_ptr, + h_ptr, + h_dim_c1, + h_dim, + weights_pointer, + weights_dim_c0, + input_ptr, + input_dim_c1, + epsilon_xy_ptr, + epsilon_xy_dim_c0, + epsilon_t_pointer, + number_of_spikes, + forgetting_offset, + forgetting_offset_local); + + } + } + + return true; +}; + +void HDynamicCNNManyIP::update_one_ip( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c1, + size_t h_dim, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c1, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + float* epsilon_t_pointer, + size_t number_of_spikes, + float forgetting_offset, + float forgetting_offset_local) +{ + + float* h_temp = new float[h_dim]; + float* h_subsegment = new float[h_dim]; + + memcpy(h_subsegment, h_init_ptr, sizeof(float) * h_dim); + + size_t counter_spike; + size_t counter; + + float h_temp_sum; + float temp_value; + + float epsilon_subsegment; + float epsilon_scale = 1.0; + + int64_t* spike; + float* w_ptr; + + for (counter_spike = 0; counter_spike < number_of_spikes; counter_spike++) + { + if (epsilon_scale > 1E10) + { + temp_value = 1.0 / epsilon_scale; + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] *= temp_value; + } + + epsilon_scale = 1.0; + } + + spike = input_pointer + counter_spike * input_dim_c1; + + if (*spike >= 0) + { + epsilon_subsegment = + epsilon_xy_pointer[*spike *epsilon_xy_dim_c0] * epsilon_t_pointer[counter_spike]; + + w_ptr = weights_pointer + *spike * weights_dim_c0; + + memcpy(h_temp, h_subsegment, sizeof(float) * h_dim); + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_temp[counter] *= w_ptr[counter]; + } + + h_temp_sum = 0.0; +#pragma omp simd reduction(+ : h_temp_sum) + for (counter = 0; counter < h_dim; counter++) + { + h_temp_sum += h_temp[counter]; + } + + if (h_temp_sum > 1E-10) + { + temp_value = epsilon_scale * epsilon_subsegment / h_temp_sum; + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_temp[counter] *= temp_value; + } + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += h_temp[counter]; + } + + if (forgetting_offset_local > 0.0) + { + temp_value = + epsilon_scale * epsilon_subsegment * forgetting_offset_local; + +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_subsegment[counter] += temp_value; + } + + epsilon_scale *= + 1.0 + epsilon_subsegment * (1.0 + forgetting_offset); + } + else + { + epsilon_scale *= 1.0 + epsilon_subsegment * 1.0; + } + } + } + } + + temp_value = 1.0 / epsilon_scale; +#pragma omp simd + for (counter = 0; counter < h_dim; counter++) + { + h_pointer[counter * h_dim_c1] = + h_subsegment[counter] * temp_value; + } + + delete[] h_temp; + delete[] h_subsegment; + + return; +}; + diff --git a/network/CPP_NoCuda/HDynamicCNNManyIP.h b/network/CPP_NoCuda/HDynamicCNNManyIP.h new file mode 100644 index 0000000..7729cfd --- /dev/null +++ b/network/CPP_NoCuda/HDynamicCNNManyIP.h @@ -0,0 +1,85 @@ +#ifndef SRC_HDYNAMICCNNMANYIP_H_ +#define SRC_HDYNAMICCNNMANYIP_H_ + +#include + +#include +#include + +class HDynamicCNNManyIP +{ + public: + HDynamicCNNManyIP(); + ~HDynamicCNNManyIP(); + + bool update_entrypoint( + int64_t h_pointer_addr, + int64_t h_dim_0, + int64_t h_dim_1, + int64_t h_dim_2, + int64_t h_dim_3, + int64_t epsilon_xy_pointer_addr, + int64_t epsilon_xy_dim_0, + int64_t epsilon_xy_dim_1, + int64_t epsilon_xy_dim_2, + int64_t epsilon_t_pointer_addr, + int64_t epsilon_t_dim_0, + int64_t weights_pointer_addr, + int64_t weights_dim_0, + int64_t weights_dim_1, + int64_t input_pointer_addr, + int64_t input_dim_0, + int64_t input_dim_1, + int64_t input_dim_2, + int64_t input_dim_3, + int64_t init_vector_pointer_addr, + int64_t init_vector_dim_0, + int64_t number_of_processes, + float forgetting_offset, + int64_t gpu_tuning_factor); + + private: + + bool update( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c0, + size_t h_dim_c1, + size_t h_dim_c2, + size_t h_dim, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + size_t epsilon_xy_dim_c1, + float* epsilon_t_pointer, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + size_t number_of_spikes, + size_t dim_x, + size_t dim_y, + float forgetting_offset, + float forgetting_offset_local, + size_t pattern_id); + + void update_one_ip( + float* h_init_ptr, + float* h_pointer, + size_t h_dim_c1, + size_t h_dim, + float* weights_pointer, + size_t weights_dim_c0, + int64_t* input_pointer, + size_t input_dim_c1, + float* epsilon_xy_pointer, + size_t epsilon_xy_dim_c0, + float* epsilon_t_pointer, + size_t number_of_spikes, + float forgetting_offset, + float forgetting_offset_local); + +}; + +#endif /* SRC_HDYNAMICCNNMANYIP_H_ */ \ No newline at end of file diff --git a/network/CPP_NoCuda/Makefile b/network/CPP_NoCuda/Makefile new file mode 100644 index 0000000..7f23471 --- /dev/null +++ b/network/CPP_NoCuda/Makefile @@ -0,0 +1,64 @@ +# Change to your python bin directory (tested with Python 3.10.4) +PYBIN=~/P3.10GPU/bin/ +CC=/usr/lib64/ccache/clang++ + +PYBIND11INCLUDE=`$(PYBIN)python3 -m pybind11 --includes` +PARAMETERS_O= -O3 -std=c++14 $(PYBIND11INCLUDE) -fPIC -Wall -fopenmp=libomp + +PARAMETERS_Linker=-shared -lm -lomp -lstdc++ -Wall + +PYPOSTFIX=`$(PYBIN)python3-config --extension-suffix` + + +all: PyHDynamicCNNManyIP \ + PySpikeGeneration2DManyIP \ + PyMultiApp + +####################### + +HDynamicCNNManyIP.o: HDynamicCNNManyIP.h HDynamicCNNManyIP.cpp + $(CC) $(PARAMETERS_O) -c HDynamicCNNManyIP.cpp -o HDynamicCNNManyIP.o + +PyHDynamicCNNManyIP.o: HDynamicCNNManyIP.h PyHDynamicCNNManyIP.cpp + $(CC) $(PARAMETERS_O) -c PyHDynamicCNNManyIP.cpp -o PyHDynamicCNNManyIP.o + +PyHDynamicCNNManyIP: HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o + $(CC) $(PARAMETERS_Linker) -o PyHDynamicCNNManyIP HDynamicCNNManyIP.o PyHDynamicCNNManyIP.o + cp PyHDynamicCNNManyIP PyHDynamicCNNManyIP$(PYPOSTFIX) + $(PYBIN)python3 pybind11_auto_pyi.py + +####################### + +SpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h SpikeGeneration2DManyIP.cpp + $(CC) $(PARAMETERS_O) -c SpikeGeneration2DManyIP.cpp -o SpikeGeneration2DManyIP.o + +PySpikeGeneration2DManyIP.o: SpikeGeneration2DManyIP.h PySpikeGeneration2DManyIP.cpp + $(CC) $(PARAMETERS_O) -c PySpikeGeneration2DManyIP.cpp -o PySpikeGeneration2DManyIP.o + +PySpikeGeneration2DManyIP: SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o + $(CC) $(PARAMETERS_Linker) -o PySpikeGeneration2DManyIP SpikeGeneration2DManyIP.o PySpikeGeneration2DManyIP.o + cp PySpikeGeneration2DManyIP PySpikeGeneration2DManyIP$(PYPOSTFIX) + $(PYBIN)python3 pybind11_auto_pyi.py + +####################### + +MultiApp.o: MultiApp.h MultiApp.cpp approximation_multiplication_function.cpp \ + error_term.cpp + $(CC) $(PARAMETERS_O) -c MultiApp.cpp -o MultiApp.o + +PyMultiApp.o: MultiApp.h PyMultiApp.cpp + $(CC) $(PARAMETERS_O) -c PyMultiApp.cpp -o PyMultiApp.o + +PyMultiApp: MultiApp.o PyMultiApp.o + $(CC) $(PARAMETERS_Linker) -o PyMultiApp MultiApp.o PyMultiApp.o + cp PyMultiApp PyMultiApp$(PYPOSTFIX) + $(PYBIN)python3 pybind11_auto_pyi.py + +####################### +clean: + rm -f PyHDynamicCNNManyIP + rm -f PySpikeGeneration2DManyIP + rm -f PyMultiApp + rm -f *.o + rm -f *.so + diff --git a/network/CPP_NoCuda/MultiApp.cpp b/network/CPP_NoCuda/MultiApp.cpp new file mode 100644 index 0000000..eb328c0 --- /dev/null +++ b/network/CPP_NoCuda/MultiApp.cpp @@ -0,0 +1,187 @@ +#include "MultiApp.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "approximation_multiplication_function.cpp" + +MultiApp::MultiApp() +{ + +}; + +MultiApp::~MultiApp() +{ + +}; + +bool MultiApp::update(float* np_input_pointer, + float* np_weight_pointer, + float* np_output_pointer, int64_t pattern_dim, + int64_t feature_dim, int64_t x_dim, int64_t y_dim, + int64_t input_channel_dim, int64_t id_pattern, + bool approximation_enable, int64_t number_of_trunc_bits, + int64_t number_of_frac_bits) +{ + + assert((id_pattern >= 0)); + assert((id_pattern < pattern_dim)); + + float* np_input_pointer_pattern; + float* np_output_pointer_pattern; + + float* input_ptr; + float* output_ptr; + float* w_ptr; + + uint64_t pattern_size = input_channel_dim; + + std::vector ap_h_vector; + ap_h_vector.resize(pattern_size); + float* ap_h_ptr = ap_h_vector.data(); + + std::vector ap_x_vector; + ap_x_vector.resize(pattern_size); + uint32_t* ap_x_ptr = ap_x_vector.data(); + + std::vector ap_y_vector; + ap_y_vector.resize(pattern_size); + uint32_t* ap_y_ptr = ap_y_vector.data(); + + std::vector ap_x_exponent_vector; + ap_x_exponent_vector.resize(pattern_size); + uint32_t* ap_x_exponent_ptr = ap_x_exponent_vector.data(); + + std::vector ap_y_exponent_vector; + ap_y_exponent_vector.resize(pattern_size); + uint32_t* ap_y_exponent_ptr = ap_y_exponent_vector.data(); + + std::vector ap_h_exponent_vector; + ap_h_exponent_vector.resize(pattern_size); + uint32_t* ap_h_exponent_ptr = ap_h_exponent_vector.data(); + + std::vector ap_res_vector; + ap_res_vector.resize(pattern_size); + uint64_t* ap_res_ptr = ap_res_vector.data(); + + uint32_t ap_mask = static_cast(pow(2, number_of_trunc_bits)) - 1; + + std::vector sign_temp_vector; + sign_temp_vector.resize(pattern_size); + uint32_t* sign_temp_ptr = sign_temp_vector.data(); + + uint64_t input_pattern_size = input_channel_dim * x_dim * y_dim; + uint64_t output_pattern_size = feature_dim * x_dim * y_dim; + + np_input_pointer_pattern = np_input_pointer + id_pattern * input_pattern_size; + np_output_pointer_pattern = + np_output_pointer + id_pattern * output_pattern_size; + + uint64_t counter; + + uint64_t counter_x; + uint64_t counter_y; + uint64_t counter_feature; + uint64_t pos_xy; + uint64_t pos_xy_if; + + float temp_sum; + + uint64_t pattern_c_2 = x_dim * y_dim; + + for (counter_x = 0; counter_x < x_dim; counter_x++) + { + for (counter_y = 0; counter_y < y_dim; counter_y++) + { + pos_xy = counter_y + counter_x * y_dim; + for (counter_feature = 0; counter_feature < feature_dim; + counter_feature++) + { + pos_xy_if = counter_feature * pattern_c_2 + pos_xy; + + input_ptr = np_input_pointer_pattern + pos_xy; + output_ptr = np_output_pointer_pattern + pos_xy_if; + w_ptr = np_weight_pointer + counter_feature * input_channel_dim; + +#pragma omp simd + for (counter = 0; counter < pattern_size; counter++) + { + ap_h_ptr[counter] = input_ptr[counter * pattern_c_2]; + } + + approximation_multiplication_function( + ap_h_ptr, w_ptr, pattern_size, number_of_trunc_bits, + number_of_frac_bits, ap_x_ptr, ap_y_ptr, ap_x_exponent_ptr, + ap_y_exponent_ptr, ap_h_exponent_ptr, ap_mask, ap_res_ptr, + sign_temp_ptr, approximation_enable); + + temp_sum = 0.0; +#pragma omp simd reduction(+ \ + : temp_sum) + for (counter = 0; counter < pattern_size; counter++) + { + temp_sum += ap_h_ptr[counter]; + } + + output_ptr[0] = temp_sum; + } + } + } + + return true; +}; + +bool MultiApp::update_with_init_vector_multi_pattern( + int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr, + int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim, + int64_t x_dim, int64_t y_dim, int64_t input_channel_dim, + int64_t number_of_processes, bool approximation_enable, + int64_t number_of_trunc_bits, int64_t number_of_frac) +{ + int64_t number_of_pattern = pattern_dim; + int64_t pattern_id; + + float* np_input_pointer = (float*)np_input_pointer_addr; + float* np_weight_pointer = (float*)np_weight_pointer_addr; + float* np_output_pointer = (float*)np_output_pointer_addr; + + assert((np_input_pointer != nullptr)); + assert((np_output_pointer != nullptr)); + assert((np_weight_pointer != nullptr)); + + assert((pattern_dim > 0)); + assert((feature_dim > 0)); + assert((x_dim > 0)); + assert((y_dim > 0)); + assert((input_channel_dim > 0)); + + if (number_of_processes > 0) + { + omp_set_num_threads(number_of_processes); + // For debugging: Only one thread + // omp_set_num_threads(1); + +#pragma omp parallel for + for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) + { + update(np_input_pointer, np_weight_pointer, + np_output_pointer, pattern_dim, feature_dim, x_dim, y_dim, + input_channel_dim, pattern_id, approximation_enable, + number_of_trunc_bits, number_of_frac); + } + } + else + { + std::cout << "Error: number_of_processes <= 0" << std::endl; + return false; + } + return true; +}; + diff --git a/network/CPP_NoCuda/MultiApp.h b/network/CPP_NoCuda/MultiApp.h new file mode 100644 index 0000000..e039820 --- /dev/null +++ b/network/CPP_NoCuda/MultiApp.h @@ -0,0 +1,32 @@ +#ifndef SRC_MultiApp_H_ +#define SRC_MultiApp_H_ + +#include + +#include +#include + +class MultiApp +{ + public: + MultiApp(); + ~MultiApp(); + + bool update(float* np_input_pointer, float* np_weight_pointer, + float* np_output_pointer, int64_t pattern_dim, + int64_t feature_dim, int64_t x_dim, int64_t y_dim, + int64_t input_channel_dim, int64_t id_pattern, + bool approximation_enable, int64_t number_of_trunc_bits, + int64_t number_of_frac); + + bool update_with_init_vector_multi_pattern( + int64_t np_input_pointer_addr, int64_t np_weight_pointer_addr, + int64_t np_output_pointer_addr, int64_t pattern_dim, int64_t feature_dim, + int64_t x_dim, int64_t y_dim, int64_t input_channel_dim, + int64_t number_of_processes, bool approximation_enable, + int64_t number_of_trunc_bits, int64_t number_of_frac); + + private: +}; + +#endif /* SRC_MultiApp_H_ */ \ No newline at end of file diff --git a/network/CPP_NoCuda/PyHDynamicCNNManyIP.cpp b/network/CPP_NoCuda/PyHDynamicCNNManyIP.cpp new file mode 100644 index 0000000..f59118b --- /dev/null +++ b/network/CPP_NoCuda/PyHDynamicCNNManyIP.cpp @@ -0,0 +1,14 @@ +#include + +#include "HDynamicCNNManyIP.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PyHDynamicCNNManyIP, m) +{ + m.doc() = "HDynamicCNNManyIP Module"; + py::class_(m, "HDynamicCNNManyIP") + .def(py::init<>()) + .def("update", + &HDynamicCNNManyIP::update_entrypoint); +} \ No newline at end of file diff --git a/network/CPP_NoCuda/PyHDynamicCNNManyIP.pyi b/network/CPP_NoCuda/PyHDynamicCNNManyIP.pyi new file mode 100644 index 0000000..7032d50 --- /dev/null +++ b/network/CPP_NoCuda/PyHDynamicCNNManyIP.pyi @@ -0,0 +1,18 @@ +# +# AUTOMATICALLY GENERATED FILE, DO NOT EDIT! +# + +"""HDynamicCNNManyIP Module""" +from __future__ import annotations +import PyHDynamicCNNManyIP +import typing + +__all__ = [ + "HDynamicCNNManyIP" +] + + +class HDynamicCNNManyIP(): + def __init__(self) -> None: ... + def update(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int, arg16: int, arg17: int, arg18: int, arg19: int, arg20: int, arg21: int, arg22: float, arg23: int) -> bool: ... + pass diff --git a/network/CPP_NoCuda/PyMultiApp.cpp b/network/CPP_NoCuda/PyMultiApp.cpp new file mode 100644 index 0000000..fce188b --- /dev/null +++ b/network/CPP_NoCuda/PyMultiApp.cpp @@ -0,0 +1,14 @@ + +#include + +#include "MultiApp.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PyMultiApp, m) { + m.doc() = "MultiApp Module"; + py::class_(m, "MultiApp") + .def(py::init<>()) + .def("update_with_init_vector_multi_pattern", + &MultiApp::update_with_init_vector_multi_pattern); +} \ No newline at end of file diff --git a/network/CPP_NoCuda/PyMultiApp.pyi b/network/CPP_NoCuda/PyMultiApp.pyi new file mode 100644 index 0000000..54768cc --- /dev/null +++ b/network/CPP_NoCuda/PyMultiApp.pyi @@ -0,0 +1,18 @@ +# +# AUTOMATICALLY GENERATED FILE, DO NOT EDIT! +# + +"""MultiApp Module""" +from __future__ import annotations +import PyMultiApp +import typing + +__all__ = [ + "MultiApp" +] + + +class MultiApp(): + def __init__(self) -> None: ... + def update_with_init_vector_multi_pattern(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: bool, arg10: int, arg11: int) -> bool: ... + pass diff --git a/network/CPP_NoCuda/PySpikeGeneration2DManyIP.cpp b/network/CPP_NoCuda/PySpikeGeneration2DManyIP.cpp new file mode 100644 index 0000000..eaece6f --- /dev/null +++ b/network/CPP_NoCuda/PySpikeGeneration2DManyIP.cpp @@ -0,0 +1,15 @@ + +#include + +#include "SpikeGeneration2DManyIP.h" + +namespace py = pybind11; + +PYBIND11_MODULE(PySpikeGeneration2DManyIP, m) +{ + m.doc() = "SpikeGeneration2DManyIP Module"; + py::class_(m, "SpikeGeneration2DManyIP") + .def(py::init<>()) + .def("spike_generation", + &SpikeGeneration2DManyIP::spike_generation_entrypoint); +} diff --git a/network/CPP_NoCuda/PySpikeGeneration2DManyIP.pyi b/network/CPP_NoCuda/PySpikeGeneration2DManyIP.pyi new file mode 100644 index 0000000..7c2257d --- /dev/null +++ b/network/CPP_NoCuda/PySpikeGeneration2DManyIP.pyi @@ -0,0 +1,18 @@ +# +# AUTOMATICALLY GENERATED FILE, DO NOT EDIT! +# + +"""SpikeGeneration2DManyIP Module""" +from __future__ import annotations +import PySpikeGeneration2DManyIP +import typing + +__all__ = [ + "SpikeGeneration2DManyIP" +] + + +class SpikeGeneration2DManyIP(): + def __init__(self) -> None: ... + def spike_generation(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, arg8: int, arg9: int, arg10: int, arg11: int, arg12: int, arg13: int, arg14: int, arg15: int) -> bool: ... + pass diff --git a/network/CPP_NoCuda/SpikeGeneration2DManyIP.cpp b/network/CPP_NoCuda/SpikeGeneration2DManyIP.cpp new file mode 100644 index 0000000..5f53813 --- /dev/null +++ b/network/CPP_NoCuda/SpikeGeneration2DManyIP.cpp @@ -0,0 +1,205 @@ +#include "SpikeGeneration2DManyIP.h" + +#include +#include +#include + +#include +#include +#include + + +SpikeGeneration2DManyIP::SpikeGeneration2DManyIP() +{ + +}; + +SpikeGeneration2DManyIP::~SpikeGeneration2DManyIP() +{ + +}; + +bool SpikeGeneration2DManyIP::spike_generation_entrypoint( + int64_t input_pointer_addr, int64_t input_dim_0, + int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3, + int64_t random_values_pointer_addr, int64_t random_values_dim_0, + int64_t random_values_dim_1, int64_t random_values_dim_2, + int64_t random_values_dim_3, int64_t output_pointer_addr, + int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2, + int64_t output_dim_3, int64_t number_of_cpu_processes) +{ + + float* input_pointer = (float*)input_pointer_addr; + float* random_values_pointer = (float*)random_values_pointer_addr; + int64_t* output_pointer = (int64_t*)output_pointer_addr; + + // Input + assert((input_pointer != nullptr)); + assert((input_dim_0 > 0)); + assert((input_dim_1 > 0)); + assert((input_dim_2 > 0)); + assert((input_dim_3 > 0)); + + // Random + assert((random_values_pointer != nullptr)); + assert((random_values_dim_0 > 0)); + assert((random_values_dim_1 > 0)); + assert((random_values_dim_2 > 0)); + assert((random_values_dim_3 > 0)); + + // Output + assert((output_pointer != nullptr)); + assert((output_dim_0 > 0)); + assert((output_dim_1 > 0)); + assert((output_dim_2 > 0)); + assert((output_dim_3 > 0)); + + // Input + size_t input_dim_c0 = input_dim_1 * input_dim_2 * input_dim_3; + size_t input_dim_c1 = input_dim_2 * input_dim_3; + size_t input_dim_c2 = input_dim_3; + + // Random + size_t random_values_dim_c0 = + random_values_dim_1 * random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c1 = + random_values_dim_2 * random_values_dim_3; + size_t random_values_dim_c2 = random_values_dim_3; + + // Output + size_t output_dim_c0 = + output_dim_1 * output_dim_2 * output_dim_3; + size_t output_dim_c1 = output_dim_2 * output_dim_3; + size_t output_dim_c2 = output_dim_3; + + size_t number_of_pattern = input_dim_0; + size_t h_dim = input_dim_1; + size_t spike_dim = output_dim_1; + size_t x_dim = output_dim_2; + size_t y_dim = output_dim_2; + + if (number_of_cpu_processes > 0) + { + + omp_set_num_threads(number_of_cpu_processes); + // DEBUG: + // omp_set_num_threads(1); + + size_t pattern_id; + +#pragma omp parallel for + for (pattern_id = 0; pattern_id < number_of_pattern; pattern_id++) + { + spike_generation( + input_pointer, + input_dim_c0, + input_dim_c1, + input_dim_c2, + random_values_pointer, + random_values_dim_c0, + random_values_dim_c1, + random_values_dim_c2, + output_pointer, + output_dim_c0, + output_dim_c1, + output_dim_c2, + x_dim, + y_dim, + spike_dim, + h_dim, + pattern_id); + } + } + else + { + std::cout << "Error: number_of_processes <= 0" << std::endl; + return false; + } + + return true; +}; + +bool SpikeGeneration2DManyIP::spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t pattern_id) +{ + + size_t counter; + size_t counter_x = 0; + size_t counter_y = 0; + + float* p_ptr = nullptr; + int64_t* out_ptr = nullptr; + float* rand_ptr = nullptr; + + for (counter_x = 0; counter_x < x_dim; counter_x++) + { + for (counter_y = 0; counter_y < y_dim; counter_y++) + { + p_ptr = input_pointer + pattern_id * input_dim_c0 + + counter_x * input_dim_c2 + counter_y; + // + counter * input_dim_c1 + + out_ptr = output_pointer + pattern_id * output_dim_c0 + + counter_x * output_dim_c2 + counter_y; + // + counter * output_dim_c1 + + rand_ptr = random_values_pointer + + pattern_id * random_values_dim_c0 + + counter_x * random_values_dim_c2 + counter_y; + // + counter * random_values_dim_c1 + + for (counter = 0; counter < spike_dim; counter++) + { + out_ptr[counter * output_dim_c1] = lower_bound(p_ptr, + h_dim, + input_dim_c1, + rand_ptr[counter * random_values_dim_c1]); + } + } + } + + return true; +}; + +// algorithmic idea stolen from libc++ +size_t SpikeGeneration2DManyIP::lower_bound(float* data_ptr, + size_t data_length, + size_t data_ptr_stride, + float compare_to_value) +{ + + size_t start_of_range = 0; + size_t length_of_range = data_length; + + while (length_of_range != 0) + { + size_t half_length = length_of_range >> 1; + size_t actual_position = start_of_range + half_length; + + if (data_ptr[actual_position * data_ptr_stride] < compare_to_value) + { + start_of_range = ++actual_position; + length_of_range -= half_length + 1; + } + else + length_of_range = half_length; + } + return start_of_range; +}; + diff --git a/network/CPP_NoCuda/SpikeGeneration2DManyIP.h b/network/CPP_NoCuda/SpikeGeneration2DManyIP.h new file mode 100644 index 0000000..34ee70c --- /dev/null +++ b/network/CPP_NoCuda/SpikeGeneration2DManyIP.h @@ -0,0 +1,49 @@ +#ifndef SRC_SPIKEGENERATION2DMANYIP_H_ +#define SRC_SPIKEGENERATION2DMANYIP_H_ + +#include + +#include +#include + +class SpikeGeneration2DManyIP +{ + public: + SpikeGeneration2DManyIP(); + ~SpikeGeneration2DManyIP(); + + bool spike_generation_entrypoint( + int64_t input_pointer_addr, int64_t input_dim_0, + int64_t input_dim_1, int64_t input_dim_2, int64_t input_dim_3, + int64_t random_values_pointer_addr, int64_t random_values_dim_0, + int64_t random_values_dim_1, int64_t random_values_dim_2, + int64_t random_values_dim_3, int64_t output_pointer_addr, + int64_t output_dim_0, int64_t output_dim_1, int64_t output_dim_2, + int64_t output_dim_3, int64_t number_of_cpu_processes); + + bool spike_generation( + float* input_pointer, + size_t input_dim_c0, + size_t input_dim_c1, + size_t input_dim_c2, + float* random_values_pointer, + size_t random_values_dim_c0, + size_t random_values_dim_c1, + size_t random_values_dim_c2, + int64_t* output_pointer, + size_t output_dim_c0, + size_t output_dim_c1, + size_t output_dim_c2, + size_t x_dim, + size_t y_dim, + size_t spike_dim, + size_t h_dim, + size_t pattern_id); + + private: + size_t lower_bound(float* data_ptr, size_t data_length, + size_t data_ptr_stride, + float compare_to_value); +}; + +#endif /* SRC_SPIKEGENERATION2DMANYIP_H_ */ \ No newline at end of file diff --git a/network/CPP_NoCuda/approximation_multiplication_function.cpp b/network/CPP_NoCuda/approximation_multiplication_function.cpp new file mode 100644 index 0000000..6b54c98 --- /dev/null +++ b/network/CPP_NoCuda/approximation_multiplication_function.cpp @@ -0,0 +1,138 @@ +#include + +#include +#include +#include + +#include "error_term.cpp" + +// Best way to plot the bits +// std::cout << std::bitset<32>(ap_y_ptr[1]) << "\n"; + +// The result needs to be written back into h_pointer (which contains h) +// Don't write to w_pointer. +void approximation_multiplication_function( + float *h_pointer, float *w_pointer, int64_t pattern_length, + uint64_t number_of_trunc_bits, uint64_t number_of_frac_bits, + uint32_t *ap_x_ptr, uint32_t *ap_y_ptr, uint32_t *ap_x_exponent_ptr, + uint32_t *ap_y_exponent_ptr, uint32_t *ap_h_exponent_ptr, uint32_t ap_mask, + uint64_t *ap_res_ptr, uint32_t *sign_temp_ptr, bool approximation_enable) { + uint64_t counter; + + uint32_t *w_pointer_mod = (uint32_t *)w_pointer; + uint32_t *h_pointer_mod = (uint32_t *)h_pointer; + +// Calculate the new sign +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + sign_temp_ptr[counter] = (w_pointer_mod[counter] & 0x80000000) ^ + (h_pointer_mod[counter] & 0x80000000); + } + +// Extract the exponent +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_x_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24; + } +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_y_exponent_ptr[counter] = (w_pointer_mod[counter] << 1) >> 24; + } + + // Cast and "normalize" + uint64_t shift_value = 32 - number_of_frac_bits; +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_x_ptr[counter] = + ((h_pointer_mod[counter] << 8) | 0x80000000) >> shift_value; + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_y_ptr[counter] = + ((w_pointer_mod[counter] << 8) | 0x80000000) >> shift_value; + } + +// Make the zero -g-r-e-a-t- correct again +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + if (h_pointer[counter] == 0) { + ap_x_ptr[counter] = 0; + } + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + if (w_pointer[counter] == 0) { + ap_y_ptr[counter] = 0; + } + } + +// res = x*y +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_res_ptr[counter] = static_cast(ap_x_ptr[counter]) * static_cast(ap_y_ptr[counter]); + } + + uint32_t temp; + if (approximation_enable == true){ + // Go through the vector values + for (counter = 0; counter < pattern_length; counter++) { + temp = error_term(ap_y_ptr[counter], ap_x_ptr[counter], ap_mask, + number_of_trunc_bits); + if (temp > ap_res_ptr[counter]) { + ap_res_ptr[counter] = 0; + } else { + ap_res_ptr[counter] -= temp; + } + } + } +// Cast from int to float +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer[counter] = static_cast(ap_res_ptr[counter]); + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_h_exponent_ptr[counter] = (h_pointer_mod[counter] << 1) >> 24; + } + +// devide by the 2^number_of_frac_bits +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + ap_h_exponent_ptr[counter] -= 2 * number_of_frac_bits; + } + +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + temp = ap_x_exponent_ptr[counter] + ap_y_exponent_ptr[counter] + + ap_h_exponent_ptr[counter]; + if (temp > 252) { + ap_h_exponent_ptr[counter] = temp - 252; + } else { + // Here I try to catch the case that the new exponent is too small + ap_h_exponent_ptr[counter] = 0; + } + } + +// Remove the old exponent +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer_mod[counter] = (h_pointer_mod[counter] << 9) >> 9; + } + +// Install the new exponent +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer_mod[counter] += ap_h_exponent_ptr[counter] << 23; + } + +// Add the sign back +#pragma omp simd + for (counter = 0; counter < pattern_length; counter++) { + h_pointer_mod[counter] += sign_temp_ptr[counter]; + } + + return; +} diff --git a/network/CPP_NoCuda/error_term.cpp b/network/CPP_NoCuda/error_term.cpp new file mode 100644 index 0000000..f683d1b --- /dev/null +++ b/network/CPP_NoCuda/error_term.cpp @@ -0,0 +1,28 @@ +#include + +#include +#include + +uint32_t error_term(uint32_t a, uint32_t b, uint32_t ap_mask, + uint32_t number_of_trunc_bits) { + uint32_t error_value = 0; + + uint32_t temp_shift_a = a; + uint32_t temp_shift_b = b & ap_mask; + + uint32_t counter_trunc; + uint32_t temp; + + // Go through the bits + for (counter_trunc = 0; counter_trunc < number_of_trunc_bits; + counter_trunc++) { + temp = temp_shift_a & 1; + if (temp == 1) { + error_value += temp_shift_b & ap_mask; + } + temp_shift_a >>= 1; + temp_shift_b <<= 1; + } + + return error_value; +} diff --git a/network/CPP_NoCuda/pybind11_auto_pyi.py b/network/CPP_NoCuda/pybind11_auto_pyi.py new file mode 100644 index 0000000..73c8bd2 --- /dev/null +++ b/network/CPP_NoCuda/pybind11_auto_pyi.py @@ -0,0 +1,23 @@ +# %% +# pip install pybind11-stubgen +from pybind11_stubgen import ModuleStubsGenerator # type: ignore +import glob + + +def process(module_name: str) -> None: + module = ModuleStubsGenerator(module_name) + module.parse() + module.write_setup_py = False + + with open(module_name + ".pyi", "w") as fp: + fp.write("#\n# AUTOMATICALLY GENERATED FILE, DO NOT EDIT!\n#\n\n") + fp.write("\n".join(module.to_lines())) + + +Files = glob.glob("*.so") + +for fid in Files: + Idx: int = fid.find(".") + module_name: str = fid[:Idx] + print("Processing: " + module_name) + process(module_name) diff --git a/network/Conv2dApproximation.py b/network/Conv2dApproximation.py new file mode 100644 index 0000000..3a3525c --- /dev/null +++ b/network/Conv2dApproximation.py @@ -0,0 +1,318 @@ +import torch +import math + +from network.CPP.PyMultiApp import MultiApp + + +class Conv2dApproximation(torch.nn.Module): + + in_channels: int | None = None + out_channels: int | None = None + kernel_size: list[int] | None = None + stride: list[int] = [1, 1] + padding: list[int] = [0, 0] + dilation: list[int] = [1, 1] + use_bias: bool = False + + approximation_enable: bool = False + number_of_trunc_bits: int = -1 + number_of_frac: int = -1 + + number_of_processes: int = 1 + + weights: torch.nn.parameter.Parameter + bias: torch.nn.parameter.Parameter | None + + device: torch.device + dtype: torch.dtype + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: list[int], + stride: list[int] = [1, 1], + padding: list[int] = [0, 0], + dilation: list[int] = [1, 1], + bias: bool = True, + approximation_enable: bool = False, + number_of_trunc_bits: int = -1, + number_of_frac: int = -1, + number_of_processes: int = 1, + device: torch.device | None = None, + dtype: torch.dtype | None = None, + ) -> None: + super().__init__() + + assert device is not None + self.device = device + + assert dtype is not None + self.dtype = dtype + + assert len(kernel_size) == 2 + assert len(stride) == 2 + assert len(padding) == 2 + assert len(dilation) == 2 + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.use_bias = bias + self.number_of_processes = number_of_processes + + self.approximation_enable = approximation_enable + self.number_of_trunc_bits = number_of_trunc_bits + self.number_of_frac = number_of_frac + + if self.use_bias is True: + self.bias: torch.nn.parameter.Parameter | None = ( + torch.nn.parameter.Parameter( + torch.empty( + (out_channels), + dtype=self.dtype, + device=self.device, + ) + ) + ) + else: + self.bias = None + + self.weights: torch.nn.parameter.Parameter = torch.nn.parameter.Parameter( + torch.empty( + (out_channels, in_channels, *kernel_size), + dtype=self.dtype, + device=self.device, + ) + ) + + self.functional_multi = FunctionalMultiConv2d.apply + + self.reset_parameters() + + def reset_parameters(self) -> None: + # Stolen from original torch conv2 code + torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights) + if fan_in != 0: + bound = 1 / math.sqrt(fan_in) + torch.nn.init.uniform_(self.bias, -bound, bound) + + def calculate_output_size(self, value: torch.Tensor) -> None: + + coordinates_0, coordinates_1 = self._get_coordinates(value) + + self.output_size: torch.Tensor = torch.tensor( + [ + coordinates_0.shape[1], + coordinates_1.shape[1], + ], + dtype=torch.int64, + ) + self.output_size.requires_grad_(False) + + def _get_coordinates( + self, value: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Function converts parameter in coordinates + for the convolution window""" + + assert value is not None + assert torch.is_tensor(value) is True + assert value.dim() == 1 + assert torch.numel(value) == 2 + assert value.dtype == torch.int64 + assert value[0] > 0 + assert value[1] > 0 + + assert self.kernel_size is not None + assert len(self.kernel_size) == 2 + assert len(self.stride) == 2 + assert len(self.dilation) == 2 + assert len(self.padding) == 2 + + unfold_0: torch.nn.Unfold = torch.nn.Unfold( + kernel_size=(int(self.kernel_size[0]), 1), + dilation=int(self.dilation[0]), + padding=int(self.padding[0]), + stride=int(self.stride[0]), + ) + + unfold_1: torch.nn.Unfold = torch.nn.Unfold( + kernel_size=(1, int(self.kernel_size[1])), + dilation=int(self.dilation[1]), + padding=int(self.padding[1]), + stride=int(self.stride[1]), + ) + + coordinates_0: torch.Tensor = ( + unfold_0( + torch.unsqueeze( + torch.unsqueeze( + torch.unsqueeze( + torch.arange(0, int(value[0]), dtype=torch.float32), + 1, + ), + 0, + ), + 0, + ) + ) + .squeeze(0) + .type(torch.int64) + ) + + coordinates_1: torch.Tensor = ( + unfold_1( + torch.unsqueeze( + torch.unsqueeze( + torch.unsqueeze( + torch.arange(0, int(value[1]), dtype=torch.float32), + 0, + ), + 0, + ), + 0, + ) + ) + .squeeze(0) + .type(torch.int64) + ) + + return coordinates_0, coordinates_1 + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + assert input.dim() == 4 + + assert self.kernel_size is not None + + input_size = torch.Tensor([int(input.shape[-2]), int(input.shape[-1])]).type( + dtype=torch.int64 + ) + + self.calculate_output_size(input_size) + + input_fold = torch.nn.functional.fold( + torch.nn.functional.unfold( + input.requires_grad_(True), + tuple(self.kernel_size), + tuple(self.dilation), + tuple(self.padding), + tuple(self.stride), + ), + output_size=(int(self.output_size[0]), int(self.output_size[1])), + kernel_size=(1, 1), + dilation=(1, 1), + padding=(0, 0), + stride=(1, 1), + ) + + weights_fold = torch.nn.functional.unfold( + self.weights.requires_grad_(True), + tuple(self.kernel_size), + tuple(self.dilation), + tuple(self.padding), + tuple(self.stride), + ).squeeze(-1) + + if input.device == torch.device("cpu"): + number_of_cpu_processes: int = int(self.number_of_processes) + else: + number_of_cpu_processes = -1 + + # Here... + parameter_list = torch.tensor( + [ + int(self.approximation_enable), # 0 + int(self.number_of_trunc_bits), # 1 + int(self.number_of_frac), # 2 + int(number_of_cpu_processes), # 3 + ], + dtype=torch.int64, + ) + + output = self.functional_multi(input_fold, weights_fold, parameter_list) + + if self.bias is not None: + output += self.bias.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) + + return output + + +class FunctionalMultiConv2d(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weights: torch.Tensor, + parameter_list: torch.Tensor, + ) -> torch.Tensor: + + assert input.ndim == 4 + assert input.dtype is torch.float32 + assert input.is_contiguous() is True + + assert weights.ndim == 2 + assert weights.dtype is torch.float32 + assert weights.is_contiguous() is True + + assert input.shape[1] == weights.shape[1] + + approximation_enable = bool(parameter_list[0]) + number_of_trunc_bits = int(parameter_list[1]) + number_of_frac = int(parameter_list[2]) + number_of_processes = int(parameter_list[3]) + + assert input.device == weights.device + + output = torch.empty( + (input.shape[0], weights.shape[0], input.shape[2], input.shape[3]), + dtype=weights.dtype, + device=weights.device, + requires_grad=True, + ) + assert output.is_contiguous() is True + + multiplier: MultiApp = MultiApp() + + multiplier.update_with_init_vector_multi_pattern( + input.data_ptr(), + weights.data_ptr(), + output.data_ptr(), + int(output.shape[0]), # pattern + int(output.shape[1]), # feature channel + int(output.shape[2]), # x + int(output.shape[3]), # y + int(input.shape[1]), # input channel + int(number_of_processes), + bool(approximation_enable), + int(number_of_trunc_bits), + int(number_of_frac), + ) + + ctx.save_for_backward( + input.detach(), + weights.detach(), + ) + + return output + + @staticmethod + def backward(ctx, grad_output): + + (input, weights) = ctx.saved_tensors + + grad_input = ( + grad_output.unsqueeze(2) * weights.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) + ).sum(1) + grad_weights = ( + (grad_output.unsqueeze(2) * input.unsqueeze(1)).sum(0).sum(-1).sum(-1) + ) + grad_parameter_list = None + + return (grad_input, grad_weights, grad_parameter_list) diff --git a/DATA_MNIST/PyTorch_Non_Spike_Network/Dataset.py b/network/Dataset.py similarity index 68% rename from DATA_MNIST/PyTorch_Non_Spike_Network/Dataset.py rename to network/Dataset.py index 11f9854..44c314c 100644 --- a/DATA_MNIST/PyTorch_Non_Spike_Network/Dataset.py +++ b/network/Dataset.py @@ -1,39 +1,8 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - from abc import ABC, abstractmethod import torch import numpy as np import torchvision as tv # type: ignore -from Parameter import Config +from network.Parameter import Config class DatasetMaster(torch.utils.data.Dataset, ABC): @@ -114,8 +83,7 @@ class DatasetMNIST(DatasetMaster): def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: """0. The test image comes in 1. is center cropped - 2. on/off filteres - 3. returned. + 2. returned. This is a 1 channel version (e.g. one gray channel). """ @@ -134,19 +102,14 @@ class DatasetMNIST(DatasetMaster): # Preprocess the input data pattern = scripted_transforms(pattern) - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) + gray = pattern[:, 0:1, :, :] + 1e-20 return gray def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: """0. The training image comes in 1. is cropped from a random position - 2. on/off filteres - 3. returned. + 2. returned. This is a 1 channel version (e.g. one gray channel). """ @@ -165,11 +128,7 @@ class DatasetMNIST(DatasetMaster): # Preprocess the input data pattern = scripted_transforms(pattern) - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) + gray = pattern[:, 0:1, :, :] + 1e-20 return gray @@ -204,8 +163,7 @@ class DatasetFashionMNIST(DatasetMaster): def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: """0. The test image comes in 1. is center cropped - 2. on/off filteres - 3. returned. + 2. returned. This is a 1 channel version (e.g. one gray channel). """ @@ -224,19 +182,14 @@ class DatasetFashionMNIST(DatasetMaster): # Preprocess the input data pattern = scripted_transforms(pattern) - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) + gray = pattern[:, 0:1, :, :] + 1e-20 return gray def pattern_filter_train(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: """0. The training image comes in 1. is cropped from a random position - 2. on/off filteres - 3. returned. + 2. returned. This is a 1 channel version (e.g. one gray channel). """ @@ -262,11 +215,7 @@ class DatasetFashionMNIST(DatasetMaster): # Preprocess the input data pattern = scripted_transforms(pattern) - # => On/Off - my_on_off_filter: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - gray: torch.Tensor = my_on_off_filter( - pattern[:, 0:1, :, :], - ) + gray = pattern[:, 0:1, :, :] + 1e-20 return gray @@ -300,8 +249,7 @@ class DatasetCIFAR(DatasetMaster): def pattern_filter_test(self, pattern: torch.Tensor, cfg: Config) -> torch.Tensor: """0. The test image comes in 1. is center cropped - 2. on/off filteres - 3. returned. + 2. returned. This is a 3 channel version (e.g. r,g,b channels). """ @@ -320,20 +268,9 @@ class DatasetCIFAR(DatasetMaster): # Preprocess the input data pattern = scripted_transforms(pattern) - # => On/Off - - my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) - my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2]) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) + r = pattern[:, 0:1, :, :] + 1e-20 + g = pattern[:, 1:2, :, :] + 1e-20 + b = pattern[:, 2:3, :, :] + 1e-20 new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) return new_tensor @@ -343,8 +280,7 @@ class DatasetCIFAR(DatasetMaster): 1. is cropped from a random position 2. is randomly horizontally flipped 3. is randomly color jitteres - 4. on/off filteres - 5. returned. + 4. returned. This is a 3 channel version (e.g. r,g,b channels). """ @@ -369,54 +305,13 @@ class DatasetCIFAR(DatasetMaster): # Preprocess the input data pattern = scripted_transforms(pattern) - # => On/Off - my_on_off_filter_r: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[0]) - my_on_off_filter_g: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[1]) - my_on_off_filter_b: OnOffFilter = OnOffFilter(p=cfg.image_statistics.mean[2]) - r: torch.Tensor = my_on_off_filter_r( - pattern[:, 0:1, :, :], - ) - g: torch.Tensor = my_on_off_filter_g( - pattern[:, 1:2, :, :], - ) - b: torch.Tensor = my_on_off_filter_b( - pattern[:, 2:3, :, :], - ) + r = pattern[:, 0:1, :, :] + 1e-20 + g = pattern[:, 1:2, :, :] + 1e-20 + b = pattern[:, 2:3, :, :] + 1e-20 new_tensor: torch.Tensor = torch.cat((r, g, b), dim=1) return new_tensor -class OnOffFilter(torch.nn.Module): - def __init__(self, p: float = 0.5) -> None: - super(OnOffFilter, self).__init__() - self.p: float = p - - def forward(self, tensor: torch.Tensor) -> torch.Tensor: - - assert tensor.shape[1] == 1 - - tensor_clone = 2.0 * (tensor - self.p) - - temp_0: torch.Tensor = torch.where( - tensor_clone < 0.0, - -tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - temp_1: torch.Tensor = torch.where( - tensor_clone >= 0.0, - tensor_clone, - tensor_clone.new_zeros(tensor_clone.shape, dtype=tensor_clone.dtype), - ) - - new_tensor: torch.Tensor = torch.cat((temp_0, temp_1), dim=1) - - return new_tensor - - def __repr__(self) -> str: - return self.__class__.__name__ + "(p={0})".format(self.p) - - if __name__ == "__main__": pass diff --git a/Parameter.py b/network/Parameter.py similarity index 54% rename from Parameter.py rename to network/Parameter.py index d9d7016..77fa859 100644 --- a/Parameter.py +++ b/network/Parameter.py @@ -1,34 +1,3 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - # %% from dataclasses import dataclass, field import numpy as np @@ -41,18 +10,14 @@ class Network: """Parameters of the network. The details about its layers and the number of output neurons.""" - number_of_output_neurons: int = field(default=0) + layer_type: list[str] = field(default_factory=list) forward_neuron_numbers: list[list[int]] = field(default_factory=list) - is_pooling_layer: list[bool] = field(default_factory=list) - forward_kernel_size: list[list[int]] = field(default_factory=list) strides: list[list[int]] = field(default_factory=list) dilation: list[list[int]] = field(default_factory=list) padding: list[list[int]] = field(default_factory=list) - w_trainable: list[bool] = field(default_factory=list) - eps_xy_trainable: list[bool] = field(default_factory=list) - eps_xy_mean: list[bool] = field(default_factory=list) + number_of_output_neurons: int = field(default=0) @dataclass @@ -61,35 +26,35 @@ class LearningParameters: learning_active: bool = field(default=True) + loss_mode: int = field(default=0) loss_coeffs_mse: float = field(default=0.5) loss_coeffs_kldiv: float = field(default=1.0) optimizer_name: str = field(default="Adam") + learning_rate_gamma_w: float = field(default=-1.0) - learning_rate_gamma_eps_xy: float = field(default=-1.0) learning_rate_threshold_w: float = field(default=0.00001) - learning_rate_threshold_eps_xy: float = field(default=0.00001) lr_schedule_name: str = field(default="ReduceLROnPlateau") - lr_scheduler_use_performance: bool = field(default=True) + lr_scheduler_use_performance: bool = field(default=False) lr_scheduler_factor_w: float = field(default=0.75) lr_scheduler_patience_w: int = field(default=-1) lr_scheduler_tau_w: int = field(default=10) - lr_scheduler_factor_eps_xy: float = field(default=0.75) - lr_scheduler_patience_eps_xy: int = field(default=-1) - lr_scheduler_tau_eps_xy: int = field(default=10) - number_of_batches_for_one_update: int = field(default=1) - overload_path: str = field(default="./Previous") + overload_path: str = field(default="Previous") - weight_noise_amplitude: float = field(default=0.01) + weight_noise_range: list[float] = field(default_factory=list) eps_xy_intitial: float = field(default=0.1) - test_every_x_learning_steps: int = field(default=50) - test_during_learning: bool = field(default=True) + # disable_scale_grade: bool = field(default=False) + # kepp_last_grad_scale: bool = field(default=True) - alpha_number_of_iterations: int = field(default=0) + sbs_skip_gradient_calculation: list[bool] = field(default_factory=list) + + adapt_learning_rate_after_minibatch: bool = field(default=True) + + w_trainable: list[bool] = field(default_factory=list) @dataclass @@ -105,8 +70,6 @@ class Augmentation: jitter_saturation: float = field(default=0.1) jitter_hue: float = field(default=0.15) - use_on_off_filter: bool = field(default=True) - @dataclass class ImageStatistics: @@ -117,6 +80,14 @@ class ImageStatistics: the_size: list[int] = field(default_factory=list) +@dataclass +class ApproximationSetting: + # Approximation CONV2D Layer + approximation_enable: list[bool] = field(default_factory=list) + number_of_trunc_bits: list[int] = field(default_factory=list) + number_of_frac_bits: list[int] = field(default_factory=list) + + @dataclass class Config: """Master config class.""" @@ -126,25 +97,49 @@ class Config: learning_parameters: LearningParameters = field(default_factory=LearningParameters) augmentation: Augmentation = field(default_factory=Augmentation) image_statistics: ImageStatistics = field(default_factory=ImageStatistics) + approximation_setting: ApproximationSetting = field( + default_factory=ApproximationSetting + ) + # For labeling simulations + # (not actively used) + simulation_id: int = field(default=0) + stage_id: int = field(default=-1) + + # Size of one sub-mini-batch + # (the number of pattern processed at the same time) batch_size: int = field(default=500) + + # The data set + # Identifier for Dataset.oy data_mode: str = field(default="") + # The path to the data set + data_path: str = field(default="") - learning_step: int = field(default=0) - learning_step_max: int = field(default=10000) + # The epochs identifier + epoch_id: int = field(default=0) + # Maximum number of epochs + epoch_id_max: int = field(default=10000) + # Number of cpu threads number_of_cpu_processes: int = field(default=-1) + # Adjust the number of pattern processed in + # one step to the amount of core or with HT threads + # of the cpu + enable_cpu_thread_balacing: bool = field(default=True) - number_of_spikes: int = field(default=0) + # Path for storing information + weight_path: str = field(default="Parameters") + log_path: str = field(default="Log") + + # Other SbS Settings + + number_of_spikes: list[int] = field(default_factory=list) cooldown_after_number_of_spikes: int = field(default=-1) - - weight_path: str = field(default="./Weights/") - eps_xy_path: str = field(default="./EpsXY/") - data_path: str = field(default="./") - results_path: str = field(default="./Results") - reduction_cooldown: float = field(default=25.0) + epsilon_0: float = field(default=1.0) + forgetting_offset: float = field(default=-1.0) def __post_init__(self) -> None: """Post init determines the number of cores. @@ -159,25 +154,27 @@ class Config: self.number_of_cpu_processes = number_of_cpu_processes_temp os.makedirs(self.weight_path, exist_ok=True) - os.makedirs(self.eps_xy_path, exist_ok=True) - os.makedirs(self.data_path, exist_ok=True) - os.makedirs(self.results_path, exist_ok=True) - self.batch_size = ( - self.batch_size // self.number_of_cpu_processes - ) * self.number_of_cpu_processes + if self.enable_cpu_thread_balacing is True: + self.batch_size = ( + self.batch_size // self.number_of_cpu_processes + ) * self.number_of_cpu_processes - self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes)) + self.batch_size = np.max((self.batch_size, self.number_of_cpu_processes)) self.batch_size = int(self.batch_size) - def get_epsilon_t(self): + def get_epsilon_t(self, number_of_spikes: int): """Generates the time series of the basic epsilon.""" - np_epsilon_t: np.ndarray = np.ones((self.number_of_spikes), dtype=np.float32) - if (self.cooldown_after_number_of_spikes < self.number_of_spikes) and ( + t = np.arange(0, number_of_spikes, dtype=np.float32) + 1 + np_epsilon_t: np.ndarray = t ** ( + -1.0 / 2.0 + ) # np.ones((number_of_spikes), dtype=np.float32) + + if (self.cooldown_after_number_of_spikes < number_of_spikes) and ( self.cooldown_after_number_of_spikes >= 0 ): np_epsilon_t[ - self.cooldown_after_number_of_spikes : self.number_of_spikes + self.cooldown_after_number_of_spikes : number_of_spikes ] /= self.reduction_cooldown return torch.tensor(np_epsilon_t) diff --git a/network/SbS.py b/network/SbS.py new file mode 100644 index 0000000..0334ad5 --- /dev/null +++ b/network/SbS.py @@ -0,0 +1,714 @@ +import torch + +from network.CPP.PySpikeGeneration2DManyIP import SpikeGeneration2DManyIP +from network.CPP.PyHDynamicCNNManyIP import HDynamicCNNManyIP +from network.calculate_output_size import calculate_output_size + + +class SbS(torch.nn.Module): + + _epsilon_xy: torch.Tensor | None = None + _epsilon_0: float + _epsilon_t: torch.Tensor | None = None + _weights: torch.nn.parameter.Parameter + _weights_exists: bool = False + _kernel_size: list[int] + _stride: list[int] + _dilation: list[int] + _padding: list[int] + _output_size: torch.Tensor + _number_of_spikes: int + _number_of_cpu_processes: int + _number_of_neurons: int + _number_of_input_neurons: int + _epsilon_xy_intitial: float + _h_initial: torch.Tensor | None = None + _w_trainable: bool + # _last_grad_scale: torch.nn.parameter.Parameter + # _keep_last_grad_scale: bool + # _disable_scale_grade: bool + _forgetting_offset: torch.Tensor | None = None + _weight_noise_range: list[float] + _skip_gradient_calculation: bool + _is_pooling_layer: bool + _input_size: list[int] + _output_layer: bool = False + _local_learning: bool = False + + device: torch.device + default_dtype: torch.dtype + _gpu_tuning_factor: int + + _max_grad_weights: torch.Tensor | None = None + + _number_of_grad_weight_contributions: float = 0.0 + + def __init__( + self, + number_of_input_neurons: int, + number_of_neurons: int, + input_size: list[int], + forward_kernel_size: list[int], + number_of_spikes: int, + epsilon_t: torch.Tensor, + epsilon_xy_intitial: float = 0.1, + epsilon_0: float = 1.0, + weight_noise_range: list[float] = [0.0, 1.0], + is_pooling_layer: bool = False, + strides: list[int] = [1, 1], + dilation: list[int] = [0, 0], + padding: list[int] = [0, 0], + number_of_cpu_processes: int = 1, + w_trainable: bool = False, + # keep_last_grad_scale: bool = False, + # disable_scale_grade: bool = True, + forgetting_offset: float = -1.0, + skip_gradient_calculation: bool = False, + device: torch.device | None = None, + default_dtype: torch.dtype | None = None, + gpu_tuning_factor: int = 5, + ) -> None: + super().__init__() + + assert device is not None + assert default_dtype is not None + self.device = device + self.default_dtype = default_dtype + + self._w_trainable = bool(w_trainable) + # self._keep_last_grad_scale = bool(keep_last_grad_scale) + self._skip_gradient_calculation = bool(skip_gradient_calculation) + # self._disable_scale_grade = bool(disable_scale_grade) + self._epsilon_xy_intitial = float(epsilon_xy_intitial) + self._stride = strides + self._dilation = dilation + self._padding = padding + self._kernel_size = forward_kernel_size + self._number_of_input_neurons = int(number_of_input_neurons) + self._number_of_neurons = int(number_of_neurons) + self._epsilon_0 = float(epsilon_0) + self._number_of_cpu_processes = int(number_of_cpu_processes) + self._number_of_spikes = int(number_of_spikes) + self._weight_noise_range = weight_noise_range + self._is_pooling_layer = bool(is_pooling_layer) + + assert len(input_size) == 2 + self._input_size = input_size + + # The GPU hates me... + # Too many SbS threads == bad + # Thus I need to limit them... + # (Reminder: We cannot access the mini-batch size here, + # which is part of the GPU thread size calculation...) + if (self._input_size[0] * self._input_size[1]) > gpu_tuning_factor: + self._gpu_tuning_factor = gpu_tuning_factor + else: + self._gpu_tuning_factor = 0 + + # self._last_grad_scale = torch.nn.parameter.Parameter( + # torch.tensor(-1.0, dtype=self.default_dtype), + # requires_grad=True, + # ) + + self._forgetting_offset = torch.tensor( + forgetting_offset, dtype=self.default_dtype, device=self.device + ) + + self.epsilon_t = epsilon_t.type(dtype=self.default_dtype).to(device=self.device) + + self._output_size = calculate_output_size( + value=input_size, + kernel_size=self._kernel_size, + stride=self._stride, + dilation=self._dilation, + padding=self._padding, + ) + + self.set_h_init_to_uniform() + + self.functional_sbs = FunctionalSbS.apply + + # ############################################################### + # Initialize the weights + # ############################################################### + + if self._is_pooling_layer is True: + self.weights = self._make_pooling_weights() + + else: + assert len(self._weight_noise_range) == 2 + weights = torch.empty( + ( + int(self._kernel_size[0]) + * int(self._kernel_size[1]) + * int(self._number_of_input_neurons), + int(self._number_of_neurons), + ), + dtype=self.default_dtype, + device=self.device, + ) + + torch.nn.init.uniform_( + weights, + a=float(self._weight_noise_range[0]), + b=float(self._weight_noise_range[1]), + ) + self.weights = weights + + #################################################################### + # Variables in and out # + #################################################################### + + @property + def epsilon_t(self) -> torch.Tensor | None: + return self._epsilon_t + + @epsilon_t.setter + def epsilon_t(self, value: torch.Tensor): + assert value is not None + assert torch.is_tensor(value) is True + assert value.dim() == 1 + assert value.dtype == self.default_dtype + self._epsilon_t = ( + value.detach() + .clone(memory_format=torch.contiguous_format) + .type(dtype=self.default_dtype) + .to(device=self.device) + .requires_grad_(False) + ) + + @property + def weights(self) -> torch.Tensor | None: + if self._weights_exists is False: + return None + else: + return self._weights + + @weights.setter + def weights(self, value: torch.Tensor): + assert value is not None + assert torch.is_tensor(value) is True + assert value.dim() == 2 + temp: torch.Tensor = ( + value.detach() + .clone(memory_format=torch.contiguous_format) + .type(dtype=self.default_dtype) + .to(device=self.device) + ) + temp /= temp.sum(dim=0, keepdim=True, dtype=self.default_dtype) + if self._weights_exists is False: + self._weights = torch.nn.parameter.Parameter(temp, requires_grad=True) + self._weights_exists = True + else: + self._weights.data = temp + + @property + def h_initial(self) -> torch.Tensor | None: + return self._h_initial + + @h_initial.setter + def h_initial(self, value: torch.Tensor): + assert value is not None + assert torch.is_tensor(value) is True + assert value.dim() == 1 + assert value.dtype == self.default_dtype + self._h_initial = ( + value.detach() + .clone(memory_format=torch.contiguous_format) + .type(dtype=self.default_dtype) + .to(device=self.device) + .requires_grad_(False) + ) + + def update_pre_care(self): + + if self._weights.grad is not None: + assert self._number_of_grad_weight_contributions > 0 + self._weights.grad /= self._number_of_grad_weight_contributions + self._number_of_grad_weight_contributions = 0.0 + + def update_after_care(self, threshold_weight: float): + + if self._w_trainable is True: + self.norm_weights() + self.threshold_weights(threshold_weight) + self.norm_weights() + + # def after_batch(self, new_state: bool = False): + # if self._keep_last_grad_scale is True: + # self._last_grad_scale.data = self._last_grad_scale.grad + # self._keep_last_grad_scale = new_state + + # self._last_grad_scale.grad = torch.zeros_like(self._last_grad_scale.grad) + + #################################################################### + # Helper functions # + #################################################################### + + def _make_pooling_weights(self) -> torch.Tensor: + """For generating the pooling weights.""" + + assert self._number_of_neurons is not None + assert self._kernel_size is not None + + weights: torch.Tensor = torch.zeros( + ( + int(self._kernel_size[0]), + int(self._kernel_size[1]), + int(self._number_of_neurons), + int(self._number_of_neurons), + ), + dtype=self.default_dtype, + device=self.device, + ) + + for i in range(0, int(self._number_of_neurons)): + weights[:, :, i, i] = 1.0 + + weights = weights.moveaxis(-1, 0).moveaxis(-1, 1) + + weights = torch.nn.functional.unfold( + input=weights, + kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])), + dilation=(1, 1), + padding=(0, 0), + stride=(1, 1), + ).squeeze() + + weights = torch.moveaxis(weights, 0, 1) + + return weights + + def set_h_init_to_uniform(self) -> None: + + assert self._number_of_neurons > 2 + + self.h_initial: torch.Tensor = torch.full( + (self._number_of_neurons,), + (1.0 / float(self._number_of_neurons)), + dtype=self.default_dtype, + device=self.device, + ) + + def norm_weights(self) -> None: + assert self._weights_exists is True + temp: torch.Tensor = ( + self._weights.data.detach() + .clone(memory_format=torch.contiguous_format) + .type(dtype=self.default_dtype) + .to(device=self.device) + ) + temp /= temp.sum(dim=0, keepdim=True, dtype=self.default_dtype) + self._weights.data = temp + + def threshold_weights(self, threshold: float) -> None: + assert self._weights_exists is True + assert threshold >= 0 + + torch.clamp( + self._weights.data, + min=float(threshold), + max=None, + out=self._weights.data, + ) + + #################################################################### + # Forward # + #################################################################### + + def forward( + self, input: torch.Tensor, labels: torch.Tensor | None = None + ) -> torch.Tensor: + + # Are we happy with the input? + assert input is not None + assert torch.is_tensor(input) is True + assert input.dim() == 4 + assert input.dtype == self.default_dtype + assert input.shape[1] == self._number_of_input_neurons + assert input.shape[2] == self._input_size[0] + assert input.shape[3] == self._input_size[1] + + # Are we happy with the rest of the network? + assert self._epsilon_0 is not None + assert self._epsilon_t is not None + + assert self._h_initial is not None + assert self._forgetting_offset is not None + + assert self._weights_exists is True + assert self._weights is not None + + input_convolved = torch.nn.functional.fold( + torch.nn.functional.unfold( + input.requires_grad_(True), + kernel_size=(int(self._kernel_size[0]), int(self._kernel_size[1])), + dilation=(int(self._dilation[0]), int(self._dilation[1])), + padding=(int(self._padding[0]), int(self._padding[1])), + stride=(int(self._stride[0]), int(self._stride[1])), + ), + output_size=tuple(self._output_size.tolist()), + kernel_size=(1, 1), + dilation=(1, 1), + padding=(0, 0), + stride=(1, 1), + ) + + epsilon_t_0: torch.Tensor = ( + (self._epsilon_t * self._epsilon_0).type(input.dtype).to(input.device) + ) + + parameter_list = torch.tensor( + [ + int(self._w_trainable), # 0 + int(0), # int(self._disable_scale_grade), # 1 + int(0), # int(self._keep_last_grad_scale), # 2 + int(self._skip_gradient_calculation), # 3 + int(self._number_of_spikes), # 4 + int(self._number_of_cpu_processes), # 5 + int(self._output_size[0]), # 6 + int(self._output_size[1]), # 7 + int(self._gpu_tuning_factor), # 8 + int(self._output_layer), # 9 + int(self._local_learning), # 10 + ], + dtype=torch.int64, + ) + + if self._epsilon_xy is None: + self._epsilon_xy = torch.full( + ( + input_convolved.shape[1], + input_convolved.shape[2], + input_convolved.shape[3], + ), + float(self._epsilon_xy_intitial), + dtype=self.default_dtype, + device=self.device, + ) + + assert self._epsilon_xy is not None + # In the case somebody tried to replace the matrix with wrong dimensions + assert self._epsilon_xy.shape[0] == input_convolved.shape[1] + assert self._epsilon_xy.shape[1] == input_convolved.shape[2] + assert self._epsilon_xy.shape[2] == input_convolved.shape[3] + + # SbS forward functional + output = self.functional_sbs( + input_convolved, + self._epsilon_xy, + epsilon_t_0, + self._weights, + self._h_initial, + parameter_list, + # self._last_grad_scale, + self._forgetting_offset, + ) + + self._number_of_grad_weight_contributions += ( + output.shape[0] * output.shape[-2] * output.shape[-1] + ) + + return output + + +class FunctionalSbS(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + epsilon_xy: torch.Tensor, + epsilon_t_0: torch.Tensor, + weights: torch.Tensor, + h_initial: torch.Tensor, + parameter_list: torch.Tensor, + # grad_output_scale: torch.Tensor, + forgetting_offset: torch.Tensor, + ) -> torch.Tensor: + + assert input.dim() == 4 + + number_of_spikes: int = int(parameter_list[4]) + + if input.device == torch.device("cpu"): + spike_number_of_cpu_processes: int = int(parameter_list[5]) + else: + spike_number_of_cpu_processes = -1 + + if input.device == torch.device("cpu"): + hdyn_number_of_cpu_processes: int = int(parameter_list[5]) + else: + hdyn_number_of_cpu_processes = -1 + + output_size_0: int = int(parameter_list[6]) + output_size_1: int = int(parameter_list[7]) + gpu_tuning_factor: int = int(parameter_list[8]) + + # ########################################################### + # Spike generation + # ########################################################### + + # ############################################ + # Normalized cumsum + # (beware of the pytorch bug! Thus .clone()!) + # ############################################ + input_cumsum: torch.Tensor = torch.cumsum(input, dim=1, dtype=input.dtype) + input_cumsum_last: torch.Tensor = input_cumsum[:, -1, :, :].unsqueeze(1).clone() + input_cumsum /= input_cumsum_last + + # ############################################ + # Get the required random numbers + # ############################################ + random_values = torch.rand( + size=[ + input_cumsum.shape[0], + number_of_spikes, + input_cumsum.shape[2], + input_cumsum.shape[3], + ], + dtype=input.dtype, + device=input.device, + ) + + # ############################################ + # Make space for the results + # ############################################ + spikes = torch.empty_like(random_values, dtype=torch.int64, device=input.device) + + assert input_cumsum.is_contiguous() is True + assert random_values.is_contiguous() is True + assert spikes.is_contiguous() is True + + # time_start: float = time.perf_counter() + spike_generation: SpikeGeneration2DManyIP = SpikeGeneration2DManyIP() + + spike_generation.spike_generation( + input_cumsum.data_ptr(), + int(input_cumsum.shape[0]), + int(input_cumsum.shape[1]), + int(input_cumsum.shape[2]), + int(input_cumsum.shape[3]), + random_values.data_ptr(), + int(random_values.shape[0]), + int(random_values.shape[1]), + int(random_values.shape[2]), + int(random_values.shape[3]), + spikes.data_ptr(), + int(spikes.shape[0]), + int(spikes.shape[1]), + int(spikes.shape[2]), + int(spikes.shape[3]), + int(spike_number_of_cpu_processes), + ) + del random_values + del input_cumsum + + # ########################################################### + # H dynamic + # ########################################################### + + assert epsilon_t_0.ndim == 1 + assert epsilon_t_0.shape[0] >= number_of_spikes + + # ############################################ + # Make space for the results + # ############################################ + + output = torch.empty( + ( + int(input.shape[0]), + int(weights.shape[1]), + output_size_0, + output_size_1, + ), + dtype=input.dtype, + device=input.device, + ) + + assert output.is_contiguous() is True + assert epsilon_xy.is_contiguous() is True + assert epsilon_t_0.is_contiguous() is True + assert weights.is_contiguous() is True + assert spikes.is_contiguous() is True + assert h_initial.is_contiguous() is True + + assert epsilon_xy.ndim == 3 + assert weights.ndim == 2 + assert h_initial.ndim == 1 + + h_dynamic: HDynamicCNNManyIP = HDynamicCNNManyIP() + + h_dynamic.update( + output.data_ptr(), + int(output.shape[0]), + int(output.shape[1]), + int(output.shape[2]), + int(output.shape[3]), + epsilon_xy.data_ptr(), + int(epsilon_xy.shape[0]), + int(epsilon_xy.shape[1]), + int(epsilon_xy.shape[2]), + epsilon_t_0.data_ptr(), + int(epsilon_t_0.shape[0]), + weights.data_ptr(), + int(weights.shape[0]), + int(weights.shape[1]), + spikes.data_ptr(), + int(spikes.shape[0]), + int(spikes.shape[1]), + int(spikes.shape[2]), + int(spikes.shape[3]), + h_initial.data_ptr(), + int(h_initial.shape[0]), + hdyn_number_of_cpu_processes, + float(forgetting_offset.item()), + int(gpu_tuning_factor), + ) + del spikes + + # ########################################################### + # Save the necessary data for the backward pass + # ########################################################### + + ctx.save_for_backward( + input, + weights, + output, + parameter_list, + # grad_output_scale, + ) + + return output + + @staticmethod + def backward(ctx, grad_output): + # ############################################## + # Get the variables back + # ############################################## + ( + input, + weights, + output, + parameter_list, + # last_grad_scale, + ) = ctx.saved_tensors + + # ############################################## + # Default output + # ############################################## + grad_input = None + grad_eps_xy = None + grad_epsilon_t_0 = None + grad_weights = None + grad_h_initial = None + grad_parameter_list = None + grad_forgetting_offset = None + + # ############################################## + # Parameters + # ############################################## + parameter_w_trainable: bool = bool(parameter_list[0]) + # parameter_disable_scale_grade: bool = bool(parameter_list[1]) + # parameter_keep_last_grad_scale: bool = bool(parameter_list[2]) + parameter_skip_gradient_calculation: bool = bool(parameter_list[3]) + parameter_output_layer: bool = bool(parameter_list[9]) + parameter_local_learning: bool = bool(parameter_list[10]) + + # ############################################## + # Dealing with overall scale of the gradient + # ############################################## + # if parameter_disable_scale_grade is False: + # if parameter_keep_last_grad_scale is True: + # last_grad_scale = torch.tensor( + # [torch.abs(grad_output).max(), last_grad_scale] + # ).max() + # grad_output /= last_grad_scale + # grad_output_scale = last_grad_scale.clone() + + input /= input.sum(dim=1, keepdim=True, dtype=weights.dtype) + + # ################################################# + # User doesn't want us to calculate the gradients + # ################################################# + + if parameter_skip_gradient_calculation is True: + + return ( + grad_input, + grad_eps_xy, + grad_epsilon_t_0, + grad_weights, + grad_h_initial, + grad_parameter_list, + # grad_output_scale, + grad_forgetting_offset, + ) + + # ################################################# + # Calculate backprop error (grad_input) + # ################################################# + + backprop_r: torch.Tensor = weights.unsqueeze(0).unsqueeze(-1).unsqueeze( + -1 + ) * output.unsqueeze(1) + + backprop_bigr: torch.Tensor = backprop_r.sum(dim=2) + + backprop_z: torch.Tensor = backprop_r * ( + 1.0 / (backprop_bigr + 1e-20) + ).unsqueeze(2) + grad_input: torch.Tensor = (backprop_z * grad_output.unsqueeze(1)).sum(2) + del backprop_z + + # ################################################# + # Calculate weight gradient (grad_weights) + # ################################################# + + if parameter_w_trainable is False: + + # ################################################# + # We don't train this weight + # ################################################# + grad_weights = None + + elif (parameter_output_layer is False) and (parameter_local_learning is True): + # ################################################# + # Local learning + # ################################################# + grad_weights = ( + (-2 * (input - backprop_bigr).unsqueeze(2) * output.unsqueeze(1)) + .sum(0) + .sum(-1) + .sum(-1) + ) + + else: + # ################################################# + # Backprop + # ################################################# + backprop_f: torch.Tensor = output.unsqueeze(1) * ( + input / (backprop_bigr**2 + 1e-20) + ).unsqueeze(2) + + result_omega: torch.Tensor = backprop_bigr.unsqueeze( + 2 + ) * grad_output.unsqueeze(1) + result_omega -= (backprop_r * grad_output.unsqueeze(1)).sum(2).unsqueeze(2) + result_omega *= backprop_f + del backprop_f + grad_weights = result_omega.sum(0).sum(-1).sum(-1) + del result_omega + + del backprop_bigr + del backprop_r + + return ( + grad_input, + grad_eps_xy, + grad_epsilon_t_0, + grad_weights, + grad_h_initial, + grad_parameter_list, + # grad_output_scale, + grad_forgetting_offset, + ) diff --git a/SbSLRScheduler.py b/network/SbSLRScheduler.py similarity index 100% rename from SbSLRScheduler.py rename to network/SbSLRScheduler.py diff --git a/network/SplitOnOffLayer.py b/network/SplitOnOffLayer.py new file mode 100644 index 0000000..4190917 --- /dev/null +++ b/network/SplitOnOffLayer.py @@ -0,0 +1,54 @@ +import torch + + +class SplitOnOffLayer(torch.nn.Module): + + device: torch.device + default_dtype: torch.dtype + + mean: torch.Tensor | None = None + epsilon: float = 0.01 + + def __init__( + self, + device: torch.device | None = None, + default_dtype: torch.dtype | None = None, + ) -> None: + super().__init__() + + assert device is not None + assert default_dtype is not None + self.device = device + self.default_dtype = default_dtype + + #################################################################### + # Forward # + #################################################################### + + def forward(self, input: torch.Tensor) -> torch.Tensor: + assert input.ndim == 4 + + # self.training is switched by network.eval() and network.train() + if self.training is True: + mean_temp = ( + input.mean(dim=0, keepdim=True) + .mean(dim=1, keepdim=True) + .detach() + .clone() + ) + + if self.mean is None: + self.mean = mean_temp + else: + self.mean = (1.0 - self.epsilon) * self.mean + self.epsilon * mean_temp + + assert self.mean is not None + + temp = input - self.mean.detach().clone() + temp_a = torch.nn.functional.relu(temp) + temp_b = torch.nn.functional.relu(-temp) + output = torch.cat((temp_a, temp_b), dim=1) + + output /= output.sum(dim=1, keepdim=True) + 1e-20 + + return output diff --git a/network/build_datasets.py b/network/build_datasets.py new file mode 100644 index 0000000..3fdb637 --- /dev/null +++ b/network/build_datasets.py @@ -0,0 +1,68 @@ +# %% +import torch +from network.Dataset import ( + DatasetMaster, + DatasetCIFAR, + DatasetMNIST, + DatasetFashionMNIST, +) +from network.Parameter import Config + + +def build_datasets( + cfg: Config, +) -> tuple[ + DatasetMaster, + DatasetMaster, + torch.utils.data.DataLoader, + torch.utils.data.DataLoader, +]: + + # Load the input data + the_dataset_train: DatasetMaster + the_dataset_test: DatasetMaster + if cfg.data_mode == "CIFAR10": + the_dataset_train = DatasetCIFAR( + train=True, path_pattern=cfg.data_path, path_label=cfg.data_path + ) + the_dataset_test = DatasetCIFAR( + train=False, path_pattern=cfg.data_path, path_label=cfg.data_path + ) + elif cfg.data_mode == "MNIST": + the_dataset_train = DatasetMNIST( + train=True, path_pattern=cfg.data_path, path_label=cfg.data_path + ) + the_dataset_test = DatasetMNIST( + train=False, path_pattern=cfg.data_path, path_label=cfg.data_path + ) + elif cfg.data_mode == "MNIST_FASHION": + the_dataset_train = DatasetFashionMNIST( + train=True, path_pattern=cfg.data_path, path_label=cfg.data_path + ) + the_dataset_test = DatasetFashionMNIST( + train=False, path_pattern=cfg.data_path, path_label=cfg.data_path + ) + else: + raise Exception("data_mode unknown") + + if len(cfg.image_statistics.mean) == 0: + cfg.image_statistics.mean = the_dataset_train.mean + + # The basic size + cfg.image_statistics.the_size = [ + the_dataset_train.pattern_storage.shape[2], + the_dataset_train.pattern_storage.shape[3], + ] + + # Minus the stuff we cut away in the pattern filter + cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel + cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel + + my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader( + the_dataset_test, batch_size=cfg.batch_size, shuffle=False + ) + my_loader_train: torch.utils.data.DataLoader = torch.utils.data.DataLoader( + the_dataset_train, batch_size=cfg.batch_size, shuffle=True + ) + + return the_dataset_train, the_dataset_test, my_loader_test, my_loader_train diff --git a/network/build_lr_scheduler.py b/network/build_lr_scheduler.py new file mode 100644 index 0000000..e771950 --- /dev/null +++ b/network/build_lr_scheduler.py @@ -0,0 +1,86 @@ +# %% +import torch +from network.Parameter import Config + +try: + from network.SbSLRScheduler import SbSLRScheduler + + sbs_lr_scheduler: bool = True +except Exception: + sbs_lr_scheduler = False + + +def build_lr_scheduler( + optimizer, cfg: Config, logging +) -> list[torch.optim.lr_scheduler.ReduceLROnPlateau | SbSLRScheduler | None]: + + assert len(optimizer) > 0 + + lr_scheduler_list: list[ + torch.optim.lr_scheduler.ReduceLROnPlateau | SbSLRScheduler | None + ] = [] + + for id_optimizer in range(0, len(optimizer)): + + if cfg.learning_parameters.lr_schedule_name == "None": + logging.info(f"Using lr scheduler for optimizer {id_optimizer} : None") + + lr_scheduler_list.append(None) + + elif cfg.learning_parameters.lr_schedule_name == "ReduceLROnPlateau": + logging.info( + f"Using lr scheduler for optimizer {id_optimizer}: ReduceLROnPlateau" + ) + + if optimizer[id_optimizer] is None: + lr_scheduler_list.append(None) + elif (cfg.learning_parameters.lr_scheduler_factor_w <= 0) or ( + cfg.learning_parameters.lr_scheduler_patience_w <= 0 + ): + lr_scheduler_list.append( + torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizer[id_optimizer],eps=1e-14, + ) + ) + else: + lr_scheduler_list.append( + torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizer[id_optimizer], + factor=cfg.learning_parameters.lr_scheduler_factor_w, + patience=cfg.learning_parameters.lr_scheduler_patience_w, + eps=1e-14, + ) + ) + + elif cfg.learning_parameters.lr_schedule_name == "SbSLRScheduler": + logging.info( + f"Using lr scheduler for optimizer {id_optimizer}: SbSLRScheduler" + ) + + if sbs_lr_scheduler is False: + raise Exception( + f"lr_scheduler for optimizer {id_optimizer}: SbSLRScheduler.py missing" + ) + + if optimizer[id_optimizer] is None: + lr_scheduler_list.append(None) + elif ( + (cfg.learning_parameters.lr_scheduler_factor_w <= 0) + or (cfg.learning_parameters.lr_scheduler_patience_w <= 0) + or (cfg.learning_parameters.lr_scheduler_tau_w <= 0) + ): + lr_scheduler_list.append(None) + else: + lr_scheduler_list.append( + SbSLRScheduler( + optimizer[id_optimizer], + factor=cfg.learning_parameters.lr_scheduler_factor_w, + patience=cfg.learning_parameters.lr_scheduler_patience_w, + tau=cfg.learning_parameters.lr_scheduler_tau_w, + ) + ) + + else: + raise Exception("lr_scheduler not implemented") + + return lr_scheduler_list diff --git a/network/build_network.py b/network/build_network.py new file mode 100644 index 0000000..6d5e306 --- /dev/null +++ b/network/build_network.py @@ -0,0 +1,354 @@ +# %% +import torch + +from network.calculate_output_size import calculate_output_size +from network.Parameter import Config +from network.SbS import SbS +from network.SplitOnOffLayer import SplitOnOffLayer +from network.Conv2dApproximation import Conv2dApproximation + + +def build_network( + cfg: Config, device: torch.device, default_dtype: torch.dtype, logging +) -> torch.nn.Sequential: + network = torch.nn.Sequential() + input_size: list[list[int]] = [] + input_size.append(cfg.image_statistics.the_size) + + for layer_id in range(0, len(cfg.network_structure.layer_type)): + # ############################################################# + # Show infos about the layer: + # ############################################################# + logging.info("") + logging.info(f"Layer ID: {layer_id}") + logging.info(f"Layer type: {cfg.network_structure.layer_type[layer_id]}") + + # ############################################################# + # Fill in the default values + # ############################################################# + + kernel_size: list[int] = [1, 1] + if len(cfg.network_structure.forward_kernel_size) > layer_id: + kernel_size = cfg.network_structure.forward_kernel_size[layer_id] + + padding: list[int] = [0, 0] + if len(cfg.network_structure.padding) > layer_id: + padding = cfg.network_structure.padding[layer_id] + + dilation: list[int] = [1, 1] + if len(cfg.network_structure.dilation) > layer_id: + dilation = cfg.network_structure.dilation[layer_id] + + strides: list[int] = [1, 1] + if len(cfg.network_structure.strides) > layer_id: + if len(cfg.network_structure.strides[layer_id]) == 2: + strides = cfg.network_structure.strides[layer_id] + + in_channels: int = -1 + out_channels: int = -1 + if len(cfg.network_structure.forward_neuron_numbers) > layer_id: + if len(cfg.network_structure.forward_neuron_numbers[layer_id]) == 2: + in_channels = cfg.network_structure.forward_neuron_numbers[layer_id][0] + out_channels = cfg.network_structure.forward_neuron_numbers[layer_id][1] + + weight_noise_range: list[float] = [1.0, 1.1] + if len(cfg.learning_parameters.weight_noise_range) == 2: + weight_noise_range = [ + float(cfg.learning_parameters.weight_noise_range[0]), + float(cfg.learning_parameters.weight_noise_range[1]), + ] + + logging.info(f"Input channels: {in_channels}") + logging.info(f"Output channels: {out_channels}") + logging.info(f"Kernel size: {kernel_size}") + logging.info(f"Stride: {strides}") + logging.info(f"Dilation: {dilation}") + logging.info(f"Padding: {padding}") + + # Conv2D + bias: bool = True + + # Approx settings + approximation_enable: bool = False + if len(cfg.approximation_setting.approximation_enable) > layer_id: + approximation_enable = cfg.approximation_setting.approximation_enable[ + layer_id + ] + logging.info(f"Approximation Enable: {approximation_enable}") + elif len(cfg.approximation_setting.approximation_enable) == 1: + approximation_enable = cfg.approximation_setting.approximation_enable[0] + logging.info(f"Approximation Enable: {approximation_enable}") + + number_of_trunc_bits: int = -1 + if len(cfg.approximation_setting.number_of_trunc_bits) > layer_id: + number_of_trunc_bits = cfg.approximation_setting.number_of_trunc_bits[ + layer_id + ] + logging.info(f"Number of trunc bits: {number_of_trunc_bits}") + elif len(cfg.approximation_setting.number_of_trunc_bits) == 1: + number_of_trunc_bits = cfg.approximation_setting.number_of_trunc_bits[0] + logging.info(f"Number of trunc bits: {number_of_trunc_bits}") + + number_of_frac_bits: int = -1 + if len(cfg.approximation_setting.number_of_frac_bits) > layer_id: + number_of_frac_bits = cfg.approximation_setting.number_of_frac_bits[ + layer_id + ] + logging.info(f"Number of frac bits: {number_of_trunc_bits}") + elif len(cfg.approximation_setting.number_of_frac_bits) == 1: + number_of_frac_bits = cfg.approximation_setting.number_of_frac_bits[0] + logging.info(f"Number of frac bits: {number_of_trunc_bits}") + + # Weights: Trainable? + w_trainable: bool = False + if len(cfg.learning_parameters.w_trainable) > layer_id: + w_trainable = cfg.learning_parameters.w_trainable[layer_id] + elif len(cfg.learning_parameters.w_trainable) == 1: + w_trainable = cfg.learning_parameters.w_trainable[0] + logging.info(f"W trainable?: {w_trainable}") + + # SbS Setting + sbs_skip_gradient_calculation: bool = False + if len(cfg.learning_parameters.sbs_skip_gradient_calculation) > layer_id: + sbs_skip_gradient_calculation = ( + cfg.learning_parameters.sbs_skip_gradient_calculation[layer_id] + ) + elif len(cfg.learning_parameters.sbs_skip_gradient_calculation) == 1: + sbs_skip_gradient_calculation = ( + cfg.learning_parameters.sbs_skip_gradient_calculation[0] + ) + + # ############################################################# + # SbS layer: + # ############################################################# + + if cfg.network_structure.layer_type[layer_id].upper().startswith("SBS") is True: + + assert in_channels > 0 + assert out_channels > 0 + + number_of_spikes: int = -1 + if len(cfg.number_of_spikes) > layer_id: + number_of_spikes = cfg.number_of_spikes[layer_id] + elif len(cfg.number_of_spikes) == 1: + number_of_spikes = cfg.number_of_spikes[0] + + assert number_of_spikes > 0 + + logging.info( + f"Layer: {layer_id} -> SbS Layer with {number_of_spikes} spikes" + ) + is_pooling_layer: bool = False + if cfg.network_structure.layer_type[layer_id].upper().find("POOLING") != -1: + is_pooling_layer = True + + network.append( + SbS( + number_of_input_neurons=in_channels, + number_of_neurons=out_channels, + input_size=input_size[-1], + forward_kernel_size=kernel_size, + number_of_spikes=number_of_spikes, + epsilon_t=cfg.get_epsilon_t(number_of_spikes), + epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial, + epsilon_0=cfg.epsilon_0, + weight_noise_range=weight_noise_range, + is_pooling_layer=is_pooling_layer, + strides=strides, + dilation=dilation, + padding=padding, + number_of_cpu_processes=cfg.number_of_cpu_processes, + w_trainable=w_trainable, + # keep_last_grad_scale=cfg.learning_parameters.kepp_last_grad_scale, + # disable_scale_grade=cfg.learning_parameters.disable_scale_grade, + forgetting_offset=cfg.forgetting_offset, + skip_gradient_calculation=sbs_skip_gradient_calculation, + device=device, + default_dtype=default_dtype, + ) + ) + # Adding the x,y output dimensions + input_size.append(network[-1]._output_size.tolist()) + + network[-1]._output_layer = False + if layer_id == len(cfg.network_structure.layer_type) - 1: + network[-1]._output_layer = True + + network[-1]._local_learning = False + if cfg.network_structure.layer_type[layer_id].upper().find("LOCAL") != -1: + network[-1]._local_learning = True + + # ############################################################# + # Split On Off Layer: + # ############################################################# + elif ( + cfg.network_structure.layer_type[layer_id].upper().startswith("ONOFF") + is True + ): + logging.info(f"Layer: {layer_id} -> Split On Off Layer") + network.append( + SplitOnOffLayer( + device=device, + default_dtype=default_dtype, + ) + ) + input_size.append(input_size[-1]) + + # ############################################################# + # PyTorch CONV2D layer: + # ############################################################# + elif ( + cfg.network_structure.layer_type[layer_id].upper().startswith("CONV2D") + is True + ): + assert in_channels > 0 + assert out_channels > 0 + + logging.info(f"Layer: {layer_id} -> CONV2D Layer") + network.append( + torch.nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(int(kernel_size[0]), int(kernel_size[1])), + stride=(int(strides[0]), int(strides[1])), + dilation=(int(dilation[0]), int(dilation[1])), + bias=bias, + padding=(int(padding[0]), int(padding[1])), + device=device, + dtype=default_dtype, + ) + ) + + # I need this later... + network[-1]._w_trainable = w_trainable + + # Calculate the x,y output dimensions + input_size_temp = calculate_output_size( + value=input_size[-1], + kernel_size=kernel_size, + stride=strides, + dilation=dilation, + padding=padding, + ).tolist() + input_size.append(input_size_temp) + + # ############################################################# + # PyTorch RELU layer: + # ############################################################# + + elif ( + cfg.network_structure.layer_type[layer_id].upper().startswith("RELU") + is True + ): + logging.info(f"Layer: {layer_id} -> RELU Layer") + network.append(torch.nn.ReLU()) + input_size.append(input_size[-1]) + + # ############################################################# + # PyTorch MAX Pooling layer: + # ############################################################# + + elif ( + cfg.network_structure.layer_type[layer_id].upper().startswith("MAX POOLING") + is True + ): + logging.info(f"Layer: {layer_id} -> MAX POOLING Layer") + network.append( + torch.nn.MaxPool2d( + kernel_size=(int(kernel_size[0]), int(kernel_size[1])), + stride=(int(strides[0]), int(strides[1])), + padding=(int(padding[0]), int(padding[1])), + dilation=(int(dilation[0]), int(dilation[1])), + ) + ) + + # Calculate the x,y output dimensions + input_size_temp = calculate_output_size( + value=input_size[-1], + kernel_size=kernel_size, + stride=strides, + dilation=dilation, + padding=padding, + ).tolist() + input_size.append(input_size_temp) + + # ############################################################# + # PyTorch Average Pooling layer: + # ############################################################# + elif ( + cfg.network_structure.layer_type[layer_id] + .upper() + .startswith("AVERAGE POOLING") + is True + ): + logging.info(f"Layer: {layer_id} -> AVERAGE POOLING Layer") + network.append( + torch.nn.AvgPool2d( + kernel_size=(int(kernel_size[0]), int(kernel_size[1])), + stride=(int(strides[0]), int(strides[1])), + padding=(int(padding[0]), int(padding[1])), + ) + ) + # Calculate the x,y output dimensions + input_size_temp = calculate_output_size( + value=input_size[-1], + kernel_size=kernel_size, + stride=strides, + dilation=dilation, + padding=padding, + ).tolist() + input_size.append(input_size_temp) + + # ############################################################# + # Approx CONV2D layer: + # ############################################################# + elif ( + cfg.network_structure.layer_type[layer_id] + .upper() + .startswith("APPROX CONV2D") + is True + ): + assert in_channels > 0 + assert out_channels > 0 + + logging.info(f"Layer: {layer_id} -> Approximation CONV2D Layer") + network.append( + Conv2dApproximation( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(int(kernel_size[0]), int(kernel_size[1])), + stride=(int(strides[0]), int(strides[1])), + dilation=(int(dilation[0]), int(dilation[1])), + bias=bias, + padding=(int(padding[0]), int(padding[1])), + device=device, + dtype=default_dtype, + approximation_enable=approximation_enable, + number_of_trunc_bits=number_of_trunc_bits, + number_of_frac=number_of_frac_bits, + number_of_processes=cfg.number_of_cpu_processes, + ) + ) + + # I need this later... + network[-1]._w_trainable = w_trainable + + # Calculate the x,y output dimensions + input_size_temp = calculate_output_size( + value=input_size[-1], + kernel_size=kernel_size, + stride=strides, + dilation=dilation, + padding=padding, + ).tolist() + input_size.append(input_size_temp) + + # ############################################################# + # Failure becaue we didn't found the selection of layer + # ############################################################# + else: + raise Exception( + f"Unknown layer type: {cfg.network_structure.layer_type[layer_id]}" + ) + + return network diff --git a/network/build_optimizer.py b/network/build_optimizer.py new file mode 100644 index 0000000..d0f96d8 --- /dev/null +++ b/network/build_optimizer.py @@ -0,0 +1,83 @@ +# %% +import torch +from network.Parameter import Config +from network.SbS import SbS +from network.Conv2dApproximation import Conv2dApproximation +from network.Adam import Adam + + +def build_optimizer( + network: torch.nn.Sequential, cfg: Config, logging +) -> list[torch.optim.Optimizer | None]: + + parameter_list_weights: list = [] + parameter_list_sbs: list = [] + + # ############################################### + # Put all parameter that needs to be learned + # in a parameter list. + # ############################################### + + for id in range(0, len(network)): + + if (isinstance(network[id], SbS) is True) and ( + network[id]._w_trainable is True + ): + parameter_list_weights.append(network[id]._weights) + parameter_list_sbs.append(True) + + if (isinstance(network[id], torch.nn.modules.conv.Conv2d) is True) and ( + network[id]._w_trainable is True + ): + for id_parameter in network[id].parameters(): + parameter_list_weights.append(id_parameter) + parameter_list_sbs.append(False) + + if (isinstance(network[id], Conv2dApproximation) is True) and ( + network[id]._w_trainable is True + ): + for id_parameter in network[id].parameters(): + parameter_list_weights.append(id_parameter) + parameter_list_sbs.append(False) + + logging.info( + f"Number of parameters found to optimize: {len(parameter_list_weights)}" + ) + + # ############################################### + # Connect the parameters to an optimizer + # ############################################### + + if cfg.learning_parameters.optimizer_name == "Adam": + logging.info("Using optimizer: Adam") + + if len(parameter_list_weights) == 0: + optimizer_wf: torch.optim.Optimizer | None = None + elif cfg.learning_parameters.learning_rate_gamma_w > 0: + optimizer_wf = Adam( + parameter_list_weights, + parameter_list_sbs, + lr=cfg.learning_parameters.learning_rate_gamma_w, + ) + else: + optimizer_wf = Adam(parameter_list_weights, parameter_list_sbs) + + elif cfg.learning_parameters.optimizer_name == "SGD": + logging.info("Using optimizer: SGD") + + if len(parameter_list_weights) == 0: + optimizer_wf = None + elif cfg.learning_parameters.learning_rate_gamma_w > 0: + optimizer_wf = torch.optim.SGD( + parameter_list_weights, + lr=cfg.learning_parameters.learning_rate_gamma_w, + ) + else: + assert cfg.learning_parameters.learning_rate_gamma_w > 0 + + else: + raise Exception("Optimizer not implemented") + + optimizer = [] + optimizer.append(optimizer_wf) + return optimizer diff --git a/network/calculate_output_size.py b/network/calculate_output_size.py new file mode 100644 index 0000000..d99681e --- /dev/null +++ b/network/calculate_output_size.py @@ -0,0 +1,95 @@ +# %% +import torch + + +def calculate_output_size( + value: list[int], + kernel_size: list[int], + stride: list[int], + dilation: list[int], + padding: list[int], +) -> torch.Tensor: + + assert len(value) == 2 + assert len(kernel_size) == 2 + assert len(stride) == 2 + assert len(dilation) == 2 + assert len(padding) == 2 + + coordinates_0, coordinates_1 = get_coordinates( + value=value, + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding, + ) + + output_size: torch.Tensor = torch.tensor( + [ + coordinates_0.shape[1], + coordinates_1.shape[1], + ], + dtype=torch.int64, + ) + return output_size + + +def get_coordinates( + value: list[int], + kernel_size: list[int], + stride: list[int], + dilation: list[int], + padding: list[int], +) -> tuple[torch.Tensor, torch.Tensor]: + """Function converts parameter in coordinates + for the convolution window""" + + unfold_0: torch.nn.Unfold = torch.nn.Unfold( + kernel_size=(int(kernel_size[0]), 1), + dilation=int(dilation[0]), + padding=int(padding[0]), + stride=int(stride[0]), + ) + + unfold_1: torch.nn.Unfold = torch.nn.Unfold( + kernel_size=(1, int(kernel_size[1])), + dilation=int(dilation[1]), + padding=int(padding[1]), + stride=int(stride[1]), + ) + + coordinates_0: torch.Tensor = ( + unfold_0( + torch.unsqueeze( + torch.unsqueeze( + torch.unsqueeze( + torch.arange(0, int(value[0]), dtype=torch.float32), + 1, + ), + 0, + ), + 0, + ) + ) + .squeeze(0) + .type(torch.int64) + ) + + coordinates_1: torch.Tensor = ( + unfold_1( + torch.unsqueeze( + torch.unsqueeze( + torch.unsqueeze( + torch.arange(0, int(value[1]), dtype=torch.float32), + 0, + ), + 0, + ), + 0, + ) + ) + .squeeze(0) + .type(torch.int64) + ) + + return coordinates_0, coordinates_1 diff --git a/DATA_CIFAR10/convert.py b/network/dataset_collection/DATA_CIFAR10/convert.py similarity index 100% rename from DATA_CIFAR10/convert.py rename to network/dataset_collection/DATA_CIFAR10/convert.py diff --git a/DATA_CIFAR10/data_url.txt b/network/dataset_collection/DATA_CIFAR10/data_url.txt similarity index 100% rename from DATA_CIFAR10/data_url.txt rename to network/dataset_collection/DATA_CIFAR10/data_url.txt diff --git a/network/dataset_collection/DATA_CIFAR10/dataset.json b/network/dataset_collection/DATA_CIFAR10/dataset.json new file mode 100644 index 0000000..01eb008 --- /dev/null +++ b/network/dataset_collection/DATA_CIFAR10/dataset.json @@ -0,0 +1,4 @@ +{ + "data_path": "./DATA_CIFAR10/", + "data_mode": "CIFAR10" +} diff --git a/DATA_MNIST/convert.py b/network/dataset_collection/DATA_FASHION_MNIST/convert.py similarity index 100% rename from DATA_MNIST/convert.py rename to network/dataset_collection/DATA_FASHION_MNIST/convert.py diff --git a/network/dataset_collection/DATA_FASHION_MNIST/data_url.txt b/network/dataset_collection/DATA_FASHION_MNIST/data_url.txt new file mode 100644 index 0000000..58ff44e --- /dev/null +++ b/network/dataset_collection/DATA_FASHION_MNIST/data_url.txt @@ -0,0 +1,8 @@ +https://github.com/zalandoresearch/fashion-mnist + +We need: +t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz + +Then +gzip -d *.gz +python convert.py diff --git a/network/dataset_collection/DATA_FASHION_MNIST/dataset.json b/network/dataset_collection/DATA_FASHION_MNIST/dataset.json new file mode 100644 index 0000000..76d5c06 --- /dev/null +++ b/network/dataset_collection/DATA_FASHION_MNIST/dataset.json @@ -0,0 +1,4 @@ +{ + "data_path": "./DATA_FASHION_MNIST/", + "data_mode": "MNIST_FASHION" +} diff --git a/network/dataset_collection/DATA_MNIST/convert.py b/network/dataset_collection/DATA_MNIST/convert.py new file mode 100644 index 0000000..dc2e15b --- /dev/null +++ b/network/dataset_collection/DATA_MNIST/convert.py @@ -0,0 +1,161 @@ +# MIT License +# Copyright 2022 University of Bremen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +# THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# +# David Rotermund ( davrot@uni-bremen.de ) +# +# +# Release history: +# ================ +# 1.0.0 -- 01.05.2022: first release +# +# + +import numpy as np + +# [offset] [type] [value] [description] +# 0000 32 bit integer 0x00000801(2049) magic number (MSB first) +# 0004 32 bit integer 60000 number of items +# 0008 unsigned byte ?? label +# 0009 unsigned byte ?? label +# ........ +# xxxx unsigned byte ?? label +# The labels values are 0 to 9. + + +class ReadLabel: + """Class for reading the labels from an MNIST label file""" + + def __init__(self, filename): + self.filename: str = filename + self.data = self.read_from_file(filename) + + def read_from_file(self, filename): + int32_data = np.dtype(np.uint32) + int32_data = int32_data.newbyteorder(">") + file = open(filename, "rb") + + magic_flag = np.frombuffer(file.read(4), int32_data)[0] + + if magic_flag != 2049: + data = np.zeros(0) + number_of_elements = 0 + else: + number_of_elements = np.frombuffer(file.read(4), int32_data)[0] + + if number_of_elements < 1: + data = np.zeros(0) + else: + data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8) + + file.close() + + return data + + +# [offset] [type] [value] [description] +# 0000 32 bit integer 0x00000803(2051) magic number +# 0004 32 bit integer 60000 number of images +# 0008 32 bit integer 28 number of rows +# 0012 32 bit integer 28 number of columns +# 0016 unsigned byte ?? pixel +# 0017 unsigned byte ?? pixel +# ........ +# xxxx unsigned byte ?? pixel +# Pixels are organized row-wise. +# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black). + + +class ReadPicture: + """Class for reading the images from an MNIST image file""" + + def __init__(self, filename): + self.filename: str = filename + self.data = self.read_from_file(filename) + + def read_from_file(self, filename): + int32_data = np.dtype(np.uint32) + int32_data = int32_data.newbyteorder(">") + file = open(filename, "rb") + + magic_flag = np.frombuffer(file.read(4), int32_data)[0] + + if magic_flag != 2051: + data = np.zeros(0) + number_of_elements = 0 + else: + number_of_elements = np.frombuffer(file.read(4), int32_data)[0] + + if number_of_elements < 1: + data = np.zeros(0) + number_of_rows = 0 + else: + number_of_rows = np.frombuffer(file.read(4), int32_data)[0] + + if number_of_rows != 28: + data = np.zeros(0) + number_of_columns = 0 + else: + number_of_columns = np.frombuffer(file.read(4), int32_data)[0] + + if number_of_columns != 28: + data = np.zeros(0) + else: + data = np.frombuffer( + file.read(number_of_elements * number_of_rows * number_of_columns), + dtype=np.uint8, + ) + data = data.reshape(number_of_elements, number_of_columns, number_of_rows) + + file.close() + + return data + + +def proprocess_data_set(test_mode): + + if test_mode is True: + filename_out_pattern: str = "TestPatternStorage.npy" + filename_out_label: str = "TestLabelStorage.npy" + filename_in_image: str = "t10k-images-idx3-ubyte" + filename_in_label = "t10k-labels-idx1-ubyte" + else: + filename_out_pattern = "TrainPatternStorage.npy" + filename_out_label = "TrainLabelStorage.npy" + filename_in_image = "train-images-idx3-ubyte" + filename_in_label = "train-labels-idx1-ubyte" + + pictures = ReadPicture(filename_in_image) + labels = ReadLabel(filename_in_label) + + # Down to 0 ... 1.0 + max_value = np.max(pictures.data.astype(np.float32)) + d = np.float32(pictures.data.astype(np.float32) / max_value) + + label_storage = np.uint64(labels.data) + pattern_storage = d.astype(np.float32) + + np.save(filename_out_pattern, pattern_storage) + np.save(filename_out_label, label_storage) + + +proprocess_data_set(True) +proprocess_data_set(False) diff --git a/DATA_MNIST/data_url.txt b/network/dataset_collection/DATA_MNIST/data_url.txt similarity index 100% rename from DATA_MNIST/data_url.txt rename to network/dataset_collection/DATA_MNIST/data_url.txt diff --git a/network/dataset_collection/DATA_MNIST/dataset.json b/network/dataset_collection/DATA_MNIST/dataset.json new file mode 100644 index 0000000..7f74d48 --- /dev/null +++ b/network/dataset_collection/DATA_MNIST/dataset.json @@ -0,0 +1,4 @@ +{ + "data_path": "./DATA_MNIST/", + "data_mode": "MNIST" +} diff --git a/network/load_previous_weights.py b/network/load_previous_weights.py new file mode 100644 index 0000000..636164f --- /dev/null +++ b/network/load_previous_weights.py @@ -0,0 +1,144 @@ +# %% +import torch +import glob +import numpy as np + +from network.SbS import SbS +from network.SplitOnOffLayer import SplitOnOffLayer +from network.Conv2dApproximation import Conv2dApproximation + + +def load_previous_weights( + network: torch.nn.Sequential, + overload_path: str, + logging, + device: torch.device, + default_dtype: torch.dtype, +) -> None: + + for id in range(0, len(network)): + + # ################################################# + # SbS + # ################################################# + + if isinstance(network[id], SbS) is True: + # Are there weights that overwrite the initial weights? + file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy") + + if len(file_to_load) > 1: + raise Exception( + f"Too many previous weights files {overload_path}/Weight_L{id}*.npy" + ) + + if len(file_to_load) == 1: + network[id].weights = torch.tensor( + np.load(file_to_load[0]), + dtype=default_dtype, + device=device, + ) + logging.info(f"Weights file used for layer {id} : {file_to_load[0]}") + + if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True: + + # ################################################# + # Conv2d weights + # ################################################# + + # Are there weights that overwrite the initial weights? + file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy") + + if len(file_to_load) > 1: + raise Exception( + f"Too many previous weights files {overload_path}/Weight_L{id}*.npy" + ) + + if len(file_to_load) == 1: + network[id]._parameters["weight"].data = torch.tensor( + np.load(file_to_load[0]), + dtype=default_dtype, + device=device, + ) + logging.info(f"Weights file used for layer {id} : {file_to_load[0]}") + + # ################################################# + # Conv2d bias + # ################################################# + + # Are there biases that overwrite the initial weights? + file_to_load = glob.glob(overload_path + "/Bias_L" + str(id) + "_*.npy") + + if len(file_to_load) > 1: + raise Exception( + f"Too many previous weights files {overload_path}/Weight_L{id}*.npy" + ) + + if len(file_to_load) == 1: + network[id]._parameters["bias"].data = torch.tensor( + np.load(file_to_load[0]), + dtype=default_dtype, + device=device, + ) + logging.info(f"Bias file used for layer {id} : {file_to_load[0]}") + + if isinstance(network[id], Conv2dApproximation) is True: + + # ################################################# + # Approximate Conv2d weights + # ################################################# + + # Are there weights that overwrite the initial weights? + file_to_load = glob.glob(overload_path + "/Weight_L" + str(id) + "_*.npy") + + if len(file_to_load) > 1: + raise Exception( + f"Too many previous weights files {overload_path}/Weight_L{id}*.npy" + ) + + if len(file_to_load) == 1: + network[id].weights.data = torch.tensor( + np.load(file_to_load[0]), + dtype=default_dtype, + device=device, + ) + logging.info(f"Weights file used for layer {id} : {file_to_load[0]}") + + # ################################################# + # Approximate Conv2d bias + # ################################################# + + # Are there biases that overwrite the initial weights? + file_to_load = glob.glob(overload_path + "/Bias_L" + str(id) + "_*.npy") + + if len(file_to_load) > 1: + raise Exception( + f"Too many previous weights files {overload_path}/Weight_L{id}*.npy" + ) + + if len(file_to_load) == 1: + network[id].bias.data = torch.tensor( + np.load(file_to_load[0]), + dtype=default_dtype, + device=device, + ) + logging.info(f"Bias file used for layer {id} : {file_to_load[0]}") + + # ################################################# + # SplitOnOffLayer + # ################################################# + if isinstance(network[id], SplitOnOffLayer) is True: + # Are there weights that overwrite the initial weights? + file_to_load = glob.glob(overload_path + "/Mean_L" + str(id) + "_*.npy") + + if len(file_to_load) > 1: + raise Exception( + f"Too many previous mean files {overload_path}/Mean_L{id}*.npy" + ) + + if len(file_to_load) == 1: + network[id].mean = torch.tensor( + np.load(file_to_load[0]), + dtype=default_dtype, + device=device, + ) + logging.info(f"Meanfile used for layer {id} : {file_to_load[0]}") diff --git a/network/loop_train_test.py b/network/loop_train_test.py new file mode 100644 index 0000000..6801417 --- /dev/null +++ b/network/loop_train_test.py @@ -0,0 +1,512 @@ +import torch +import time +from network.Parameter import Config +from torch.utils.tensorboard import SummaryWriter + +from network.SbS import SbS +from network.save_weight_and_bias import save_weight_and_bias + + +def add_weight_and_bias_to_histogram( + network: torch.nn.modules.container.Sequential, + tb: SummaryWriter, + iteration_number: int, +) -> None: + + for id in range(0, len(network)): + + # ################################################ + # Log the SbS Weights + # ################################################ + if isinstance(network[id], SbS) is True: + if network[id]._w_trainable is True: + + try: + tb.add_histogram( + f"Weights Layer {id}", + network[id].weights, + iteration_number, + ) + except ValueError: + pass + + # ################################################ + # Log the Conv2 Weights and Biases + # ################################################ + if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True: + if network[id]._w_trainable is True: + + try: + tb.add_histogram( + f"Weights Layer {id}", + network[id]._parameters["weight"].data, + iteration_number, + ) + except ValueError: + pass + try: + tb.add_histogram( + f"Bias Layer {id}", + network[id]._parameters["bias"].data, + iteration_number, + ) + except ValueError: + pass + tb.flush() + + +# loss_mode == 0: "normal" SbS loss function mixture +# loss_mode == 1: cross_entropy +def loss_function( + h: torch.Tensor, + labels: torch.Tensor, + device: torch.device, + default_dtype: torch.dtype, + loss_mode: int = 0, + number_of_output_neurons: int = 10, + loss_coeffs_mse: float = 0.0, + loss_coeffs_kldiv: float = 0.0, +) -> torch.Tensor | None: + + assert loss_mode >= 0 + assert loss_mode <= 1 + + h = h.squeeze(-1).squeeze(-1) + assert h.ndim == 2 + + if loss_mode == 0: + + # Convert label into one hot + target_one_hot: torch.Tensor = torch.zeros( + ( + labels.shape[0], + number_of_output_neurons, + ), + device=device, + dtype=default_dtype, + ) + + target_one_hot.scatter_( + 1, + labels.to(device).unsqueeze(1), + torch.ones( + (labels.shape[0], 1), + device=device, + dtype=default_dtype, + ), + ).unsqueeze(-1).unsqueeze(-1) + + h_y1 = torch.log(h + 1e-20) + + my_loss: torch.Tensor = ( + torch.nn.functional.mse_loss( + h, + target_one_hot, + reduction="sum", + ) + * loss_coeffs_mse + + torch.nn.functional.kl_div(h_y1, target_one_hot + 1e-20, reduction="sum") + * loss_coeffs_kldiv + ) / (loss_coeffs_kldiv + loss_coeffs_mse) + + return my_loss + elif loss_mode == 1: + my_loss = torch.nn.functional.cross_entropy( + h.squeeze(-1).squeeze(-1), labels.to(device) + ) + return my_loss + else: + return None + + +def forward_pass_train( + input: torch.Tensor, + labels: torch.Tensor, + the_dataset_train, + cfg: Config, + network: torch.nn.modules.container.Sequential, + device: torch.device, + default_dtype: torch.dtype, +) -> list[torch.Tensor]: + + h_collection = [] + h_collection.append( + the_dataset_train.pattern_filter_train(input, cfg) + .type(dtype=default_dtype) + .to(device=device) + ) + for id in range(0, len(network)): + if isinstance(network[id], SbS) is True: + h_collection.append(network[id](h_collection[-1], labels)) + else: + h_collection.append(network[id](h_collection[-1])) + + return h_collection + + +def forward_pass_test( + input: torch.Tensor, + the_dataset_test, + cfg: Config, + network: torch.nn.modules.container.Sequential, + device: torch.device, + default_dtype: torch.dtype, +) -> list[torch.Tensor]: + + h_collection = [] + h_collection.append( + the_dataset_test.pattern_filter_test(input, cfg) + .type(dtype=default_dtype) + .to(device=device) + ) + for id in range(0, len(network)): + h_collection.append(network[id](h_collection[-1])) + + return h_collection + + +def run_optimizer( + network: torch.nn.modules.container.Sequential, + optimizer: list, + cfg: Config, +) -> None: + for id in range(0, len(network)): + if isinstance(network[id], SbS) is True: + network[id].update_pre_care() + + for optimizer_item in optimizer: + if optimizer_item is not None: + optimizer_item.step() + + for id in range(0, len(network)): + if isinstance(network[id], SbS) is True: + network[id].update_after_care( + cfg.learning_parameters.learning_rate_threshold_w + / float( + network[id]._number_of_input_neurons + # * network[id]._kernel_size[0] + # * network[id]._kernel_size[1] + ), + ) + + +# #################################### +# Update the learning rate +# #################################### +def run_lr_scheduler( + cfg: Config, + lr_scheduler, + optimizer, + performance_for_batch: float, + my_loss_for_batch: float, + tb, + logging, +) -> None: + # Inter-epoch learning rate adaptation + for lr_scheduler_item in lr_scheduler: + if ( + (lr_scheduler_item is not None) + and (performance_for_batch >= 0.0) + and (my_loss_for_batch >= 0.0) + ): + if cfg.learning_parameters.lr_scheduler_use_performance is True: + lr_scheduler_item.step(100.0 - performance_for_batch) + else: + lr_scheduler_item.step(my_loss_for_batch) + + tb.add_scalar( + "Train Error", + 100.0 - performance_for_batch, + cfg.epoch_id, + ) + tb.add_scalar("Train Loss", my_loss_for_batch, cfg.epoch_id) + tb.add_scalar( + "Learning Rate Scale WF", + optimizer[0].param_groups[-1]["lr"], + cfg.epoch_id, + ) + tb.flush() + + +# def deal_with_gradient_scale(epoch_id: int, mini_batch_number: int, network): +# if (epoch_id == 0) and (mini_batch_number == 0): +# for id in range(0, len(network)): +# if isinstance(network[id], SbS) is True: +# network[id].after_batch(True) +# else: +# for id in range(0, len(network)): +# if isinstance(network[id], SbS) is True: +# network[id].after_batch() + + +def loop_train( + cfg: Config, + network: torch.nn.modules.container.Sequential, + my_loader_train: torch.utils.data.dataloader.DataLoader, + the_dataset_train, + optimizer: list, + device: torch.device, + default_dtype: torch.dtype, + logging, + adapt_learning_rate: bool, + tb: SummaryWriter, + lr_scheduler, + last_test_performance: float, +) -> tuple[float, float, float, float]: + + correct_in_minibatch: int = 0 + loss_in_minibatch: float = 0.0 + number_of_pattern_in_minibatch: int = 0 + + mini_batch_number: int = -1 + + full_loss: float = 0.0 + full_correct: float = 0.0 + full_count: float = 0.0 + + epoch_id: int = cfg.epoch_id + + my_loss_for_batch: float = -1.0 + performance_for_batch: float = -1.0 + + time_forward: float = 0.0 + time_backward: float = 0.0 + + with torch.enable_grad(): + + for h_x, h_x_labels in my_loader_train: + + time_mini_batch_start: float = time.perf_counter() + + # ############################################################ + # Reset the gradient after an update (or the first loop pass) + # ############################################################ + if number_of_pattern_in_minibatch == 0: + + # Reset the gradient of the torch optimizers + for optimizer_item in optimizer: + if optimizer_item is not None: + optimizer_item.zero_grad() + + loss_in_minibatch = 0.0 + mini_batch_number += 1 + correct_in_minibatch = 0 + time_forward = 0.0 + time_backward = 0.0 + + # #################################### + # Update the learning rate + # #################################### + if adapt_learning_rate is True: + run_lr_scheduler( + cfg=cfg, + lr_scheduler=lr_scheduler, + optimizer=optimizer, + performance_for_batch=performance_for_batch, + my_loss_for_batch=my_loss_for_batch, + tb=tb, + logging=logging, + ) + + logging.info( + ( + f"\t\t\tLearning rate: " + f"weights:{optimizer[0].param_groups[-1]['lr']:^15.3e} " + ) + ) + + if last_test_performance < 0: + logging.info("") + else: + logging.info( + ( + f"\t\t\tLast test performance: " + f"{last_test_performance/100.0:^6.2%}" + ) + ) + logging.info("----------------") + + number_of_pattern_in_minibatch += h_x_labels.shape[0] + full_count += h_x_labels.shape[0] + + # ##################################################### + # The network does the forward pass (training) + # ##################################################### + h_collection = forward_pass_train( + input=h_x, + labels=h_x_labels, + the_dataset_train=the_dataset_train, + cfg=cfg, + network=network, + device=device, + default_dtype=default_dtype, + ) + + # ##################################################### + # Calculate the loss function + # ##################################################### + my_loss: torch.Tensor | None = loss_function( + h=h_collection[-1], + labels=h_x_labels, + device=device, + default_dtype=default_dtype, + loss_mode=cfg.learning_parameters.loss_mode, + number_of_output_neurons=int( + cfg.network_structure.number_of_output_neurons + ), + loss_coeffs_mse=float(cfg.learning_parameters.loss_coeffs_mse), + loss_coeffs_kldiv=float(cfg.learning_parameters.loss_coeffs_kldiv), + ) + assert my_loss is not None + + time_after_forward_and_loss: float = time.perf_counter() + + # ##################################################### + # Backward pass + # ##################################################### + my_loss.backward() + loss_in_minibatch += my_loss.item() + full_loss += my_loss.item() + + time_after_backward: float = time.perf_counter() + + # ##################################################### + # Performance measures + # ##################################################### + + correct_in_minibatch += ( + (h_collection[-1].argmax(dim=1).squeeze().cpu() == h_x_labels) + .sum() + .item() + ) + full_correct += ( + (h_collection[-1].argmax(dim=1).squeeze().cpu() == h_x_labels) + .sum() + .item() + ) + + # We measure the scale of the propagated error + # during the first minibatch + # then we remember this size and scale + # the future error with it + # Kind of deals with the vanishing / + # exploding gradients + # deal_with_gradient_scale( + # epoch_id=epoch_id, + # mini_batch_number=mini_batch_number, + # network=network, + # ) + + # Measure the time for one mini-batch + time_forward += time_after_forward_and_loss - time_mini_batch_start + time_backward += time_after_backward - time_after_forward_and_loss + + if number_of_pattern_in_minibatch >= cfg.get_update_after_x_pattern(): + + logging.info( + ( + f"{epoch_id:^6}=>{mini_batch_number:^6} " + f"\t\tTraining {number_of_pattern_in_minibatch^6} pattern " + f"with {correct_in_minibatch/number_of_pattern_in_minibatch:^6.2%} " + f"\tForward time: \t{time_forward:^6.2f}sec" + ) + ) + + logging.info( + ( + f"\t\t\tLoss: {loss_in_minibatch/number_of_pattern_in_minibatch:^15.3e} " + f"\t\t\tBackward time: \t{time_backward:^6.2f}sec " + ) + ) + + my_loss_for_batch = loss_in_minibatch / number_of_pattern_in_minibatch + + performance_for_batch = ( + 100.0 * correct_in_minibatch / number_of_pattern_in_minibatch + ) + + # ################################################ + # Update the weights and biases + # ################################################ + run_optimizer(network=network, optimizer=optimizer, cfg=cfg) + + # ################################################ + # Save the Weights and Biases + # ################################################ + save_weight_and_bias( + cfg=cfg, network=network, iteration_number=epoch_id + ) + + # ################################################ + # Log the Weights and Biases + # ################################################ + add_weight_and_bias_to_histogram( + network=network, + tb=tb, + iteration_number=epoch_id, + ) + + # ################################################ + # Mark mini batch as done + # ################################################ + number_of_pattern_in_minibatch = 0 + + return ( + my_loss_for_batch, + performance_for_batch, + (full_loss / full_count), + (100.0 * full_correct / full_count), + ) + + +def loop_test( + epoch_id: int, + cfg: Config, + network: torch.nn.modules.container.Sequential, + my_loader_test: torch.utils.data.dataloader.DataLoader, + the_dataset_test, + device: torch.device, + default_dtype: torch.dtype, + logging, + tb: SummaryWriter, +) -> float: + + test_correct = 0 + test_count = 0 + test_complete: int = the_dataset_test.__len__() + + logging.info("") + logging.info("Testing:") + + for h_x, h_x_labels in my_loader_test: + time_0 = time.perf_counter() + + h_collection = forward_pass_test( + input=h_x, + the_dataset_test=the_dataset_test, + cfg=cfg, + network=network, + device=device, + default_dtype=default_dtype, + ) + h_h: torch.Tensor = h_collection[-1].detach().clone().cpu() + + test_correct += (h_h.argmax(dim=1).squeeze() == h_x_labels).sum().numpy() + test_count += h_h.shape[0] + performance = 100.0 * test_correct / test_count + time_1 = time.perf_counter() + time_measure_a = time_1 - time_0 + + logging.info( + ( + f"\t\t{test_count} of {test_complete}" + f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec" + ) + ) + + logging.info("") + + tb.add_scalar("Test Error", 100.0 - performance, epoch_id) + tb.flush() + + return performance diff --git a/network/save_weight_and_bias.py b/network/save_weight_and_bias.py new file mode 100644 index 0000000..ee42451 --- /dev/null +++ b/network/save_weight_and_bias.py @@ -0,0 +1,71 @@ +import torch + +from network.Parameter import Config + +import numpy as np +from network.SbS import SbS +from network.SplitOnOffLayer import SplitOnOffLayer +from network.Conv2dApproximation import Conv2dApproximation + + +def save_weight_and_bias( + cfg: Config, network: torch.nn.modules.container.Sequential, iteration_number: int +) -> None: + + for id in range(0, len(network)): + + # ################################################ + # Save the SbS Weights + # ################################################ + + if isinstance(network[id], SbS) is True: + if network[id]._w_trainable is True: + + np.save( + f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy", + network[id].weights.detach().cpu().numpy(), + ) + + # ################################################ + # Save the Conv2 Weights and Biases + # ################################################ + + if isinstance(network[id], torch.nn.modules.conv.Conv2d) is True: + if network[id]._w_trainable is True: + # Save the new values + np.save( + f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy", + network[id]._parameters["weight"].data.detach().cpu().numpy(), + ) + + # Save the new values + np.save( + f"{cfg.weight_path}/Bias_L{id}_S{iteration_number}.npy", + network[id]._parameters["bias"].data.detach().cpu().numpy(), + ) + + # ################################################ + # Save the Approximate Conv2 Weights and Biases + # ################################################ + + if isinstance(network[id], Conv2dApproximation) is True: + if network[id]._w_trainable is True: + # Save the new values + np.save( + f"{cfg.weight_path}/Weight_L{id}_S{iteration_number}.npy", + network[id].weights.data.detach().cpu().numpy(), + ) + + # Save the new values + if network[id].bias is not None: + np.save( + f"{cfg.weight_path}/Bias_L{id}_S{iteration_number}.npy", + network[id].bias.data.detach().cpu().numpy(), + ) + + if isinstance(network[id], SplitOnOffLayer) is True: + + np.save( + f"{cfg.weight_path}/Mean_L{id}_S{iteration_number}.npy", + network[id].mean.detach().cpu().numpy(), + ) diff --git a/network/test_settings_fashion/dataset.json b/network/test_settings_fashion/dataset.json new file mode 100644 index 0000000..76d5c06 --- /dev/null +++ b/network/test_settings_fashion/dataset.json @@ -0,0 +1,4 @@ +{ + "data_path": "./DATA_FASHION_MNIST/", + "data_mode": "MNIST_FASHION" +} diff --git a/network/test_settings_fashion/def_approx_cnn.json b/network/test_settings_fashion/def_approx_cnn.json new file mode 100644 index 0000000..226c664 --- /dev/null +++ b/network/test_settings_fashion/def_approx_cnn.json @@ -0,0 +1,29 @@ +{ + "epoch_id_max": 200, + "batch_size": 24, + "stage_id": 0, + "simulation_id": 0, + "learning_parameters": { + "loss_mode": 1, // X-Entropy + "number_of_batches_for_one_update": 20, + "learning_rate_gamma_w": 0.001, + "lr_scheduler_patience_w": -1, + "adapt_learning_rate_after_minibatch": false, + "w_trainable": [ + true + ] + }, + "augmentation": {}, + "image_statistics": {}, + "approximation_setting": { + "number_of_frac_bits": [ + 30 + ], // Quantization of the mantissa + "approximation_enable": [ + true + ], // In addition to the quantization more approx + "number_of_trunc_bits": [ + 1 + ] + } +} \ No newline at end of file diff --git a/network/test_settings_fashion/def_cnn.json b/network/test_settings_fashion/def_cnn.json new file mode 100644 index 0000000..f9f0f30 --- /dev/null +++ b/network/test_settings_fashion/def_cnn.json @@ -0,0 +1,19 @@ +{ + "epoch_id_max": 200, + "batch_size": 24, + "stage_id": 0, + "simulation_id": 0, + "learning_parameters": { + "loss_mode": 1, // X-Entropy + "number_of_batches_for_one_update": 20, + "learning_rate_gamma_w": 0.001, + "lr_scheduler_patience_w": -1, + "adapt_learning_rate_after_minibatch": false, + "w_trainable": [ + true + ] + }, + "augmentation": {}, + "image_statistics": {}, + "approximation_setting": {} +} \ No newline at end of file diff --git a/network/test_settings_fashion/network_approx_cnn.json b/network/test_settings_fashion/network_approx_cnn.json new file mode 100644 index 0000000..a5f1091 --- /dev/null +++ b/network/test_settings_fashion/network_approx_cnn.json @@ -0,0 +1,97 @@ +{ + "network_structure": { + "number_of_output_neurons": 10, + "layer_type": [ + "APPROX CONV2D", + "RELU", + "MAX POOLING", + "APPROX CONV2D", + "RELU", + "MAX POOLING", + "APPROX CONV2D", + "RELU", + "APPROX CONV2D" + ], + "strides": [ + [ + 1, + 1 + ], // "CONV2D" + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 1, + 1 + ], // "CONV2D" + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 1, + 1 + ], // "CONV2D" + [], // "RELU", + [ + 1, + 1 + ] // "CONV2D" + ], + "forward_neuron_numbers": [ + [ + 1, + 32 + ], // "CONV2D", + [], // "RELU", + [], // "MAX POOLING", + [ + 32, + 64 + ], // "CONV2D", + [], // "RELU", + [], // "MAX POOLING", + [ + 64, + 96 + ], // "CONV2D", + [], // "RELU", + [ + 96, + 10 + ] // "CONV2D", + ], + "forward_kernel_size": [ + [ + 5, + 5 + ], // "CONV2D", + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 5, + 5 + ], // "CONV2D", + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 3, + 3 + ], // "CONV2D", + [], // "RELU", + [ + 1, + 1 + ] // "CONV2D", + ] + } +} \ No newline at end of file diff --git a/network/test_settings_fashion/network_cnn.json b/network/test_settings_fashion/network_cnn.json new file mode 100644 index 0000000..25870ac --- /dev/null +++ b/network/test_settings_fashion/network_cnn.json @@ -0,0 +1,97 @@ +{ + "network_structure": { + "number_of_output_neurons": 10, + "layer_type": [ + "CONV2D", + "RELU", + "MAX POOLING", + "CONV2D", + "RELU", + "MAX POOLING", + "CONV2D", + "RELU", + "CONV2D" + ], + "strides": [ + [ + 1, + 1 + ], // "CONV2D" + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 1, + 1 + ], // "CONV2D" + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 1, + 1 + ], // "CONV2D" + [], // "RELU", + [ + 1, + 1 + ] // "CONV2D" + ], + "forward_neuron_numbers": [ + [ + 1, + 32 + ], // "CONV2D", + [], // "RELU", + [], // "MAX POOLING", + [ + 32, + 64 + ], // "CONV2D", + [], // "RELU", + [], // "MAX POOLING", + [ + 64, + 96 + ], // "CONV2D", + [], // "RELU", + [ + 96, + 10 + ] // "CONV2D", + ], + "forward_kernel_size": [ + [ + 5, + 5 + ], // "CONV2D", + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 5, + 5 + ], // "CONV2D", + [], // "RELU", + [ + 2, + 2 + ], // "MAX POOLING", + [ + 3, + 3 + ], // "CONV2D", + [], // "RELU", + [ + 1, + 1 + ] // "CONV2D", + ] + } +} \ No newline at end of file diff --git a/network/unused_code/LinearApproximation.py b/network/unused_code/LinearApproximation.py new file mode 100644 index 0000000..a9b0b3d --- /dev/null +++ b/network/unused_code/LinearApproximation.py @@ -0,0 +1,186 @@ +import torch +import math + +from network.CPP.PyMultiApp import MultiApp + + +class LinearApproximation(torch.nn.Module): + + in_features: int | None = None + out_features: int | None = None + use_bias: bool = False + + approximation_enable: bool = False + number_of_trunc_bits: int = -1 + number_of_frac: int = -1 + + number_of_processes: int = 1 + + weights: torch.nn.parameter.Parameter + bias: torch.nn.parameter.Parameter | None + + device: torch.device + dtype: torch.dtype + + def __init__( + self, + in_features: int, + out_features: int, + bias: bool = True, + approximation_enable: bool = False, + number_of_trunc_bits: int = -1, + number_of_frac: int = -1, + number_of_processes: int = 1, + device: torch.device | None = None, + dtype: torch.dtype | None = None, + ) -> None: + super().__init__() + + assert device is not None + self.device = device + + assert dtype is not None + self.dtype = dtype + + self.in_features = in_features + self.out_channels = out_features + self.use_bias = bias + + self.approximation_enable = approximation_enable + self.number_of_trunc_bits = number_of_trunc_bits + self.number_of_frac = number_of_frac + + self.number_of_processes = number_of_processes + + if self.use_bias is True: + self.bias: torch.nn.parameter.Parameter | None = ( + torch.nn.parameter.Parameter( + torch.empty( + (out_features), + dtype=self.dtype, + device=self.device, + ) + ) + ) + else: + self.bias = None + + self.weights: torch.nn.parameter.Parameter = torch.nn.parameter.Parameter( + torch.empty( + (out_features, in_features), + dtype=self.dtype, + device=self.device, + ) + ) + + self.functional_multi = FunctionalMultiLinear.apply + + self.reset_parameters() + + def reset_parameters(self) -> None: + # Stolen from original torch conv2 code + torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights) + if fan_in != 0: + bound = 1 / math.sqrt(fan_in) + torch.nn.init.uniform_(self.bias, -bound, bound) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + + assert input.dim() == 2 + + parameter_list = torch.tensor( + [ + int(self.approximation_enable), # 0 + int(self.number_of_trunc_bits), # 1 + int(self.number_of_frac), # 2 + int(self.number_of_processes), # 3 + ], + dtype=torch.int64, + ) + + output = self.functional_multi( + input.unsqueeze(-1).unsqueeze(-1), self.weights, parameter_list + ) + + output = output.squeeze(-1).squeeze(-1) + + if self.bias is not None: + output += self.bias.unsqueeze(0) + + return output + + +class FunctionalMultiLinear(torch.autograd.Function): + @staticmethod + def forward( # type: ignore + ctx, + input: torch.Tensor, + weights: torch.Tensor, + parameter_list: torch.Tensor, + ) -> torch.Tensor: + + assert input.ndim == 4 + assert input.dtype is torch.float32 + assert input.is_contiguous() is True + + assert weights.ndim == 2 + assert weights.dtype is torch.float32 + assert weights.is_contiguous() is True + + assert input.shape[1] == weights.shape[1] + + approximation_enable = bool(parameter_list[0]) + number_of_trunc_bits = int(parameter_list[1]) + number_of_frac = int(parameter_list[2]) + number_of_processes = int(parameter_list[3]) + + assert input.device == weights.device + + output = torch.zeros( + (input.shape[0], weights.shape[0], input.shape[2], input.shape[3]), + dtype=weights.dtype, + device=weights.device, + requires_grad=True, + ) + assert output.is_contiguous() is True + + multiplier: MultiApp = MultiApp() + + multiplier.update_with_init_vector_multi_pattern( + input.data_ptr(), + weights.data_ptr(), + output.data_ptr(), + int(output.shape[0]), # pattern + int(output.shape[1]), # feature channel + int(output.shape[2]), # x + int(output.shape[3]), # y + int(input.shape[1]), # input channel + int(number_of_processes), + bool(approximation_enable), + int(number_of_trunc_bits), + int(number_of_frac), + ) + + ctx.save_for_backward( + input.detach(), + weights.detach(), + ) + + return output + + @staticmethod + def backward(ctx, grad_output): + + (input, weights) = ctx.saved_tensors + + grad_input = ( + grad_output.unsqueeze(2) * weights.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) + ).sum(1) + grad_weights = ( + (grad_output.unsqueeze(2) * input.unsqueeze(1)).sum(0).sum(-1).sum(-1) + ) + grad_parameter_list = None + + return (grad_input, grad_weights, grad_parameter_list) diff --git a/network_sbs_L0.json b/network_sbs_L0.json new file mode 100644 index 0000000..19709d6 --- /dev/null +++ b/network_sbs_L0.json @@ -0,0 +1,39 @@ +{ + "network_structure": { + "number_of_output_neurons": 10, + "layer_type": [ + "SbS", + "SbS" + ], + "strides": [ + [ + 1, + 1 + ], + [ + 1, + 1 + ] + ], + "forward_neuron_numbers": [ + [ + 1, + 32 + ], + [ + 32, + 10 + ] + ], + "forward_kernel_size": [ + [ + 5, + 5 + ], + [ + 20, + 20 + ] + ] + } +} \ No newline at end of file diff --git a/network_sbs_mp.json b/network_sbs_mp.json new file mode 100644 index 0000000..a9411ac --- /dev/null +++ b/network_sbs_mp.json @@ -0,0 +1,91 @@ +{ + "network_structure": { + "number_of_output_neurons": 10, + "layer_type": [ + "SbS", + "MAX POOLING", + "SbS", + "MAX POOLING", + "SbS", + "SbS" + ], + "strides": [ + [ + 1, + 1 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 1, + 1 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 1, + 1 + ], // "SbS" + [ + 1, + 1 + ] // "SbS" + ], + "forward_neuron_numbers": [ + [ + 1, + 32 + ], // "SbS" + [ + 32, + 32 + ], // POOLING + [ + 32, + 64 + ], // "SbS" + [ + 64, + 64 + ], // POOLING + [ + 64, + 96 + ], // "SbS" + [ + 96, + 10 + ] // "SbS" + ], + "forward_kernel_size": [ + [ + 5, + 5 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 5, + 5 + ], // "SbS" + [ + 2, + 2 + ], // POOLING + [ + 3, + 3 + ], // "SbS" + [ + 1, + 1 + ] // "SbS" + ] + } +} \ No newline at end of file diff --git a/run_spikes.sh b/run_spikes.sh new file mode 100644 index 0000000..cbcab3b --- /dev/null +++ b/run_spikes.sh @@ -0,0 +1,19 @@ +python=~/P3.10GPU/bin/python3 + +if [ -f "./def.json" ] +then + echo "json file found" +else + echo "No json file exists" + exit +fi + + +$python learn_it.py def.json +status=$? +if [ $status != "0" ] +then + echo "Problem detected (learn_it.py)!!!" + exit 1 +fi + diff --git a/test_all.sh b/test_all.sh deleted file mode 100644 index 587fbfc..0000000 --- a/test_all.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -for i in $(seq 1 1 999) -do - echo $i - /home/davrot/P3.10/bin/python3 test_it.py mnist.json $i -done \ No newline at end of file diff --git a/test_it.py b/test_it.py deleted file mode 100644 index e4c085b..0000000 --- a/test_it.py +++ /dev/null @@ -1,278 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -# %% -import os - -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" - -import numpy as np -import sys -import torch -import time -import dataconf -import logging -from datetime import datetime -import glob - -from Dataset import ( - DatasetMaster, - DatasetCIFAR, - DatasetMNIST, - DatasetFashionMNIST, -) -from Parameter import Config -from SbS import SbS - - -####################################################################### -# We want to log what is going on into a file and screen # -####################################################################### - -now = datetime.now() -dt_string_filename = now.strftime("%Y_%m_%d_%H_%M_%S") -logging.basicConfig( - filename="log_" + dt_string_filename + ".txt", - filemode="w", - level=logging.INFO, - format="%(asctime)s %(message)s", -) -logging.getLogger().addHandler(logging.StreamHandler()) - -####################################################################### -# Load the config data from the json file # -####################################################################### - -if len(sys.argv) < 2: - raise Exception("Argument: Config file name is missing") - -if len(sys.argv) < 3: - raise Exception("Argument: Weight and epsilon file id is missing") - - -filename: str = sys.argv[1] - -if os.path.exists(filename) is False: - raise Exception(f"Config file not found! {filename}") - -cfg = dataconf.file(filename, Config) -logging.info(f"Using configuration file: {filename}") - -cfg.learning_step = int(sys.argv[2]) -assert cfg.learning_step > 0 - -####################################################################### -# Prepare the test and training data # -####################################################################### - -# Load the input data -the_dataset_test: DatasetMaster -if cfg.data_mode == "CIFAR10": - the_dataset_test = DatasetCIFAR( - train=False, path_pattern=cfg.data_path, path_label=cfg.data_path - ) -elif cfg.data_mode == "MNIST": - the_dataset_test = DatasetMNIST( - train=False, path_pattern=cfg.data_path, path_label=cfg.data_path - ) -elif cfg.data_mode == "MNIST_FASHION": - the_dataset_test = DatasetFashionMNIST( - train=False, path_pattern=cfg.data_path, path_label=cfg.data_path - ) -else: - raise Exception("data_mode unknown") - -cfg.image_statistics.mean = the_dataset_test.mean - -# The basic size -cfg.image_statistics.the_size = [ - the_dataset_test.pattern_storage.shape[2], - the_dataset_test.pattern_storage.shape[3], -] - -# Minus the stuff we cut away in the pattern filter -cfg.image_statistics.the_size[0] -= 2 * cfg.augmentation.crop_width_in_pixel -cfg.image_statistics.the_size[1] -= 2 * cfg.augmentation.crop_width_in_pixel - -my_loader_test: torch.utils.data.DataLoader = torch.utils.data.DataLoader( - the_dataset_test, batch_size=cfg.batch_size, shuffle=False -) - -logging.info("*** Data loaded.") - -####################################################################### -# Build the network # -####################################################################### - -wf: list[np.ndarray] = [] -eps_xy: list[np.ndarray] = [] -network = torch.nn.Sequential() -for id in range(0, len(cfg.network_structure.is_pooling_layer)): - if id == 0: - input_size: list[int] = cfg.image_statistics.the_size - else: - input_size = network[id - 1].output_size.tolist() - - network.append( - SbS( - number_of_input_neurons=cfg.network_structure.forward_neuron_numbers[id][0], - number_of_neurons=cfg.network_structure.forward_neuron_numbers[id][1], - input_size=input_size, - forward_kernel_size=cfg.network_structure.forward_kernel_size[id], - number_of_spikes=cfg.number_of_spikes, - epsilon_t=cfg.get_epsilon_t(), - epsilon_xy_intitial=cfg.learning_parameters.eps_xy_intitial, - epsilon_0=cfg.epsilon_0, - weight_noise_amplitude=cfg.learning_parameters.weight_noise_amplitude, - is_pooling_layer=cfg.network_structure.is_pooling_layer[id], - strides=cfg.network_structure.strides[id], - dilation=cfg.network_structure.dilation[id], - padding=cfg.network_structure.padding[id], - alpha_number_of_iterations=cfg.learning_parameters.alpha_number_of_iterations, - number_of_cpu_processes=cfg.number_of_cpu_processes, - ) - ) - - eps_xy.append(network[id].epsilon_xy.detach().clone().numpy()) - wf.append(network[id].weights.detach().clone().numpy()) - -logging.info("*** Network generated.") - - -for id in range(0, len(network)): - - # Are there weights that overwrite the initial weights? - file_to_load = glob.glob( - cfg.learning_parameters.overload_path + "/Weight_L" + str(id) + "*.npy" - ) - - if len(file_to_load) > 1: - raise Exception( - f"Too many previous weights files {cfg.learning_parameters.overload_path}/Weight_L{id}*.npy" - ) - - if len(file_to_load) == 1: - network[id].weights = torch.tensor( - np.load(file_to_load[0]), - dtype=torch.float32, - ) - wf[id] = np.load(file_to_load[0]) - logging.info(f"File used: {file_to_load[0]}") - - # Are there epsinlon xy files that overwrite the initial epsilon xy? - file_to_load = glob.glob( - cfg.learning_parameters.overload_path + "/EpsXY_L" + str(id) + "*.npy" - ) - - if len(file_to_load) > 1: - raise Exception( - f"Too many previous epsilon xy files {cfg.learning_parameters.overload_path}/EpsXY_L{id}*.npy" - ) - - if len(file_to_load) == 1: - network[id].epsilon_xy = torch.tensor( - np.load(file_to_load[0]), - dtype=torch.float32, - ) - eps_xy[id] = np.load(file_to_load[0]) - logging.info(f"File used: {file_to_load[0]}") - -for id in range(0, len(network)): - # Load previous weights and epsilon xy - if cfg.learning_step > 0: - filename = ( - cfg.weight_path - + "/Weight_L" - + str(id) - + "_S" - + str(cfg.learning_step) - + ".npy" - ) - if os.path.exists(filename) is True: - network[id].weights = torch.tensor( - np.load(filename), - dtype=torch.float32, - ) - wf[id] = np.load(filename) - - filename = ( - cfg.eps_xy_path - + "/EpsXY_L" - + str(id) - + "_S" - + str(cfg.learning_step) - + ".npy" - ) - if os.path.exists(filename) is True: - network[id].epsilon_xy = torch.tensor( - np.load(filename), - dtype=torch.float32, - ) - eps_xy[id] = np.load(filename) - - -####################################################################### -# Some variable declarations # -####################################################################### - -test_correct: int = 0 -test_all: int = 0 -test_complete: int = the_dataset_test.__len__() - -logging.info("") - -with torch.no_grad(): - logging.info("Testing:") - - for h_x, h_x_labels in my_loader_test: - time_0 = time.perf_counter() - - h_h: torch.Tensor = network( - the_dataset_test.pattern_filter_test(h_x, cfg).type(dtype=torch.float32) - ) - - test_correct += (h_h.argmax(dim=1).squeeze() == h_x_labels).sum().numpy() - test_all += h_h.shape[0] - performance = 100.0 * test_correct / test_all - time_1 = time.perf_counter() - time_measure_a = time_1 - time_0 - - logging.info( - ( - f"\t\t{test_all} of {test_complete}" - f" with {performance/100:^6.2%} \t Time used: {time_measure_a:^6.2f}sec" - ) - ) - np_performance = np.array(performance) - np.save(f"{cfg.results_path}/{cfg.learning_step}.npy", np_performance) - - -# %%