From 982ff849b8e6bd43ef6ac059edb391246698acd9 Mon Sep 17 00:00:00 2001 From: David Rotermund <54365609+davrot@users.noreply.github.com> Date: Sun, 15 Jan 2023 00:54:46 +0100 Subject: [PATCH] Delete network/dataset_collection directory --- .../DATA_CIFAR10/convert.py | 126 -------------- .../DATA_CIFAR10/data_url.txt | 8 - .../DATA_CIFAR10/dataset.json | 4 - .../DATA_FASHION_MNIST/convert.py | 161 ------------------ .../DATA_FASHION_MNIST/data_url.txt | 8 - .../DATA_FASHION_MNIST/dataset.json | 4 - .../dataset_collection/DATA_MNIST/convert.py | 161 ------------------ .../DATA_MNIST/data_url.txt | 8 - .../DATA_MNIST/dataset.json | 4 - 9 files changed, 484 deletions(-) delete mode 100644 network/dataset_collection/DATA_CIFAR10/convert.py delete mode 100644 network/dataset_collection/DATA_CIFAR10/data_url.txt delete mode 100644 network/dataset_collection/DATA_CIFAR10/dataset.json delete mode 100644 network/dataset_collection/DATA_FASHION_MNIST/convert.py delete mode 100644 network/dataset_collection/DATA_FASHION_MNIST/data_url.txt delete mode 100644 network/dataset_collection/DATA_FASHION_MNIST/dataset.json delete mode 100644 network/dataset_collection/DATA_MNIST/convert.py delete mode 100644 network/dataset_collection/DATA_MNIST/data_url.txt delete mode 100644 network/dataset_collection/DATA_MNIST/dataset.json diff --git a/network/dataset_collection/DATA_CIFAR10/convert.py b/network/dataset_collection/DATA_CIFAR10/convert.py deleted file mode 100644 index badbfa0..0000000 --- a/network/dataset_collection/DATA_CIFAR10/convert.py +++ /dev/null @@ -1,126 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -import numpy as np -import pickle - - -def give_filenames(id: int) -> tuple[str, str, int]: - if id == 0: - start_id: int = 0 - prefix: str = "Test" - filename: str = "cifar-10-batches-py/test_batch" - if id == 1: - start_id = 0 - prefix = "Train" - filename = "cifar-10-batches-py/data_batch_1" - if id == 2: - start_id = 10000 - prefix = "Train" - filename = "cifar-10-batches-py/data_batch_2" - if id == 3: - start_id = 20000 - prefix = "Train" - filename = "cifar-10-batches-py/data_batch_3" - if id == 4: - start_id = 30000 - prefix = "Train" - filename = "cifar-10-batches-py/data_batch_4" - if id == 5: - start_id = 40000 - prefix = "Train" - filename = "cifar-10-batches-py/data_batch_5" - return filename, prefix, start_id - - -def load_data(filename: str) -> tuple[np.ndarray, np.ndarray]: - fo = open(filename, "rb") - dict_data = pickle.load(fo, encoding="bytes") - _, labels_temp, data_temp, _ = dict_data.items() - data: np.ndarray = np.array(data_temp[1]) - labels: np.ndarray = np.array(labels_temp[1]) - return data, labels - - -def split_into_three_color_channels( - image: np.ndarray, -) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - channel_r = image[0:1024].astype(np.float32) - channel_r = channel_r.reshape(32, 32) - channel_g = image[1024:2048].astype(np.float32) - channel_g = channel_g.reshape(32, 32) - channel_b = image[2048:3072].astype(np.float32) - channel_b = channel_b.reshape(32, 32) - return channel_r, channel_g, channel_b - - -def process_data_set(test_data_mode: bool) -> None: - - if test_data_mode is True: - filename_out_pattern: str = "TestPatternStorage.npy" - filename_out_label: str = "TestLabelStorage.npy" - number_of_pictures: int = 10000 - start_id: int = 0 - end_id: int = 0 - else: - filename_out_pattern = "TrainPatternStorage.npy" - filename_out_label = "TrainLabelStorage.npy" - number_of_pictures = 50000 - start_id = 1 - end_id = 5 - - np_data: np.ndarray = np.zeros((number_of_pictures, 32, 32, 3), dtype=np.float32) - np_label: np.ndarray = np.zeros((number_of_pictures), dtype=np.uint64) - - for id in range(start_id, end_id + 1): - filename, _, start_id_pattern = give_filenames(id) - pictures, labels = load_data(filename) - - for i in range(0, pictures.shape[0]): - channel_r, channel_g, channel_b = split_into_three_color_channels( - pictures[i, :] - ) - np_data[i + start_id_pattern, :, :, 0] = channel_r - np_data[i + start_id_pattern, :, :, 1] = channel_g - np_data[i + start_id_pattern, :, :, 2] = channel_b - np_label[i + start_id_pattern] = labels[i] - - np_data /= np.max(np_data) - - label_storage: np.ndarray = np_label.astype(dtype=np.uint64) - pattern_storage: np.ndarray = np_data.astype(dtype=np.float32) - - np.save(filename_out_pattern, pattern_storage) - np.save(filename_out_label, label_storage) - - -process_data_set(True) -process_data_set(False) diff --git a/network/dataset_collection/DATA_CIFAR10/data_url.txt b/network/dataset_collection/DATA_CIFAR10/data_url.txt deleted file mode 100644 index bcc1a82..0000000 --- a/network/dataset_collection/DATA_CIFAR10/data_url.txt +++ /dev/null @@ -1,8 +0,0 @@ -https://www.cs.toronto.edu/~kriz/cifar.html - -Download the CIFAR-10 python version -https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz - -Then -tar -xvzf cifar-10-python.tar.gz -python convert.py diff --git a/network/dataset_collection/DATA_CIFAR10/dataset.json b/network/dataset_collection/DATA_CIFAR10/dataset.json deleted file mode 100644 index 01eb008..0000000 --- a/network/dataset_collection/DATA_CIFAR10/dataset.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "data_path": "./DATA_CIFAR10/", - "data_mode": "CIFAR10" -} diff --git a/network/dataset_collection/DATA_FASHION_MNIST/convert.py b/network/dataset_collection/DATA_FASHION_MNIST/convert.py deleted file mode 100644 index dc2e15b..0000000 --- a/network/dataset_collection/DATA_FASHION_MNIST/convert.py +++ /dev/null @@ -1,161 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -import numpy as np - -# [offset] [type] [value] [description] -# 0000 32 bit integer 0x00000801(2049) magic number (MSB first) -# 0004 32 bit integer 60000 number of items -# 0008 unsigned byte ?? label -# 0009 unsigned byte ?? label -# ........ -# xxxx unsigned byte ?? label -# The labels values are 0 to 9. - - -class ReadLabel: - """Class for reading the labels from an MNIST label file""" - - def __init__(self, filename): - self.filename: str = filename - self.data = self.read_from_file(filename) - - def read_from_file(self, filename): - int32_data = np.dtype(np.uint32) - int32_data = int32_data.newbyteorder(">") - file = open(filename, "rb") - - magic_flag = np.frombuffer(file.read(4), int32_data)[0] - - if magic_flag != 2049: - data = np.zeros(0) - number_of_elements = 0 - else: - number_of_elements = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_elements < 1: - data = np.zeros(0) - else: - data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8) - - file.close() - - return data - - -# [offset] [type] [value] [description] -# 0000 32 bit integer 0x00000803(2051) magic number -# 0004 32 bit integer 60000 number of images -# 0008 32 bit integer 28 number of rows -# 0012 32 bit integer 28 number of columns -# 0016 unsigned byte ?? pixel -# 0017 unsigned byte ?? pixel -# ........ -# xxxx unsigned byte ?? pixel -# Pixels are organized row-wise. -# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black). - - -class ReadPicture: - """Class for reading the images from an MNIST image file""" - - def __init__(self, filename): - self.filename: str = filename - self.data = self.read_from_file(filename) - - def read_from_file(self, filename): - int32_data = np.dtype(np.uint32) - int32_data = int32_data.newbyteorder(">") - file = open(filename, "rb") - - magic_flag = np.frombuffer(file.read(4), int32_data)[0] - - if magic_flag != 2051: - data = np.zeros(0) - number_of_elements = 0 - else: - number_of_elements = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_elements < 1: - data = np.zeros(0) - number_of_rows = 0 - else: - number_of_rows = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_rows != 28: - data = np.zeros(0) - number_of_columns = 0 - else: - number_of_columns = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_columns != 28: - data = np.zeros(0) - else: - data = np.frombuffer( - file.read(number_of_elements * number_of_rows * number_of_columns), - dtype=np.uint8, - ) - data = data.reshape(number_of_elements, number_of_columns, number_of_rows) - - file.close() - - return data - - -def proprocess_data_set(test_mode): - - if test_mode is True: - filename_out_pattern: str = "TestPatternStorage.npy" - filename_out_label: str = "TestLabelStorage.npy" - filename_in_image: str = "t10k-images-idx3-ubyte" - filename_in_label = "t10k-labels-idx1-ubyte" - else: - filename_out_pattern = "TrainPatternStorage.npy" - filename_out_label = "TrainLabelStorage.npy" - filename_in_image = "train-images-idx3-ubyte" - filename_in_label = "train-labels-idx1-ubyte" - - pictures = ReadPicture(filename_in_image) - labels = ReadLabel(filename_in_label) - - # Down to 0 ... 1.0 - max_value = np.max(pictures.data.astype(np.float32)) - d = np.float32(pictures.data.astype(np.float32) / max_value) - - label_storage = np.uint64(labels.data) - pattern_storage = d.astype(np.float32) - - np.save(filename_out_pattern, pattern_storage) - np.save(filename_out_label, label_storage) - - -proprocess_data_set(True) -proprocess_data_set(False) diff --git a/network/dataset_collection/DATA_FASHION_MNIST/data_url.txt b/network/dataset_collection/DATA_FASHION_MNIST/data_url.txt deleted file mode 100644 index 58ff44e..0000000 --- a/network/dataset_collection/DATA_FASHION_MNIST/data_url.txt +++ /dev/null @@ -1,8 +0,0 @@ -https://github.com/zalandoresearch/fashion-mnist - -We need: -t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz - -Then -gzip -d *.gz -python convert.py diff --git a/network/dataset_collection/DATA_FASHION_MNIST/dataset.json b/network/dataset_collection/DATA_FASHION_MNIST/dataset.json deleted file mode 100644 index 76d5c06..0000000 --- a/network/dataset_collection/DATA_FASHION_MNIST/dataset.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "data_path": "./DATA_FASHION_MNIST/", - "data_mode": "MNIST_FASHION" -} diff --git a/network/dataset_collection/DATA_MNIST/convert.py b/network/dataset_collection/DATA_MNIST/convert.py deleted file mode 100644 index dc2e15b..0000000 --- a/network/dataset_collection/DATA_MNIST/convert.py +++ /dev/null @@ -1,161 +0,0 @@ -# MIT License -# Copyright 2022 University of Bremen -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# David Rotermund ( davrot@uni-bremen.de ) -# -# -# Release history: -# ================ -# 1.0.0 -- 01.05.2022: first release -# -# - -import numpy as np - -# [offset] [type] [value] [description] -# 0000 32 bit integer 0x00000801(2049) magic number (MSB first) -# 0004 32 bit integer 60000 number of items -# 0008 unsigned byte ?? label -# 0009 unsigned byte ?? label -# ........ -# xxxx unsigned byte ?? label -# The labels values are 0 to 9. - - -class ReadLabel: - """Class for reading the labels from an MNIST label file""" - - def __init__(self, filename): - self.filename: str = filename - self.data = self.read_from_file(filename) - - def read_from_file(self, filename): - int32_data = np.dtype(np.uint32) - int32_data = int32_data.newbyteorder(">") - file = open(filename, "rb") - - magic_flag = np.frombuffer(file.read(4), int32_data)[0] - - if magic_flag != 2049: - data = np.zeros(0) - number_of_elements = 0 - else: - number_of_elements = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_elements < 1: - data = np.zeros(0) - else: - data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8) - - file.close() - - return data - - -# [offset] [type] [value] [description] -# 0000 32 bit integer 0x00000803(2051) magic number -# 0004 32 bit integer 60000 number of images -# 0008 32 bit integer 28 number of rows -# 0012 32 bit integer 28 number of columns -# 0016 unsigned byte ?? pixel -# 0017 unsigned byte ?? pixel -# ........ -# xxxx unsigned byte ?? pixel -# Pixels are organized row-wise. -# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black). - - -class ReadPicture: - """Class for reading the images from an MNIST image file""" - - def __init__(self, filename): - self.filename: str = filename - self.data = self.read_from_file(filename) - - def read_from_file(self, filename): - int32_data = np.dtype(np.uint32) - int32_data = int32_data.newbyteorder(">") - file = open(filename, "rb") - - magic_flag = np.frombuffer(file.read(4), int32_data)[0] - - if magic_flag != 2051: - data = np.zeros(0) - number_of_elements = 0 - else: - number_of_elements = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_elements < 1: - data = np.zeros(0) - number_of_rows = 0 - else: - number_of_rows = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_rows != 28: - data = np.zeros(0) - number_of_columns = 0 - else: - number_of_columns = np.frombuffer(file.read(4), int32_data)[0] - - if number_of_columns != 28: - data = np.zeros(0) - else: - data = np.frombuffer( - file.read(number_of_elements * number_of_rows * number_of_columns), - dtype=np.uint8, - ) - data = data.reshape(number_of_elements, number_of_columns, number_of_rows) - - file.close() - - return data - - -def proprocess_data_set(test_mode): - - if test_mode is True: - filename_out_pattern: str = "TestPatternStorage.npy" - filename_out_label: str = "TestLabelStorage.npy" - filename_in_image: str = "t10k-images-idx3-ubyte" - filename_in_label = "t10k-labels-idx1-ubyte" - else: - filename_out_pattern = "TrainPatternStorage.npy" - filename_out_label = "TrainLabelStorage.npy" - filename_in_image = "train-images-idx3-ubyte" - filename_in_label = "train-labels-idx1-ubyte" - - pictures = ReadPicture(filename_in_image) - labels = ReadLabel(filename_in_label) - - # Down to 0 ... 1.0 - max_value = np.max(pictures.data.astype(np.float32)) - d = np.float32(pictures.data.astype(np.float32) / max_value) - - label_storage = np.uint64(labels.data) - pattern_storage = d.astype(np.float32) - - np.save(filename_out_pattern, pattern_storage) - np.save(filename_out_label, label_storage) - - -proprocess_data_set(True) -proprocess_data_set(False) diff --git a/network/dataset_collection/DATA_MNIST/data_url.txt b/network/dataset_collection/DATA_MNIST/data_url.txt deleted file mode 100644 index 7a6b872..0000000 --- a/network/dataset_collection/DATA_MNIST/data_url.txt +++ /dev/null @@ -1,8 +0,0 @@ -http://yann.lecun.com/exdb/mnist/ - -We need: -t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz - -Then -gzip -d *.gz -python convert.py diff --git a/network/dataset_collection/DATA_MNIST/dataset.json b/network/dataset_collection/DATA_MNIST/dataset.json deleted file mode 100644 index 7f74d48..0000000 --- a/network/dataset_collection/DATA_MNIST/dataset.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "data_path": "./DATA_MNIST/", - "data_mode": "MNIST" -}