Add files via upload
This commit is contained in:
parent
9647bc9785
commit
6975d84087
9 changed files with 484 additions and 0 deletions
126
dataset_collection/DATA_CIFAR10/convert.py
Normal file
126
dataset_collection/DATA_CIFAR10/convert.py
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
# MIT License
|
||||||
|
# Copyright 2022 University of Bremen
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
# a copy of this software and associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
# Software is furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included
|
||||||
|
# in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# David Rotermund ( davrot@uni-bremen.de )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Release history:
|
||||||
|
# ================
|
||||||
|
# 1.0.0 -- 01.05.2022: first release
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
def give_filenames(id: int) -> tuple[str, str, int]:
|
||||||
|
if id == 0:
|
||||||
|
start_id: int = 0
|
||||||
|
prefix: str = "Test"
|
||||||
|
filename: str = "cifar-10-batches-py/test_batch"
|
||||||
|
if id == 1:
|
||||||
|
start_id = 0
|
||||||
|
prefix = "Train"
|
||||||
|
filename = "cifar-10-batches-py/data_batch_1"
|
||||||
|
if id == 2:
|
||||||
|
start_id = 10000
|
||||||
|
prefix = "Train"
|
||||||
|
filename = "cifar-10-batches-py/data_batch_2"
|
||||||
|
if id == 3:
|
||||||
|
start_id = 20000
|
||||||
|
prefix = "Train"
|
||||||
|
filename = "cifar-10-batches-py/data_batch_3"
|
||||||
|
if id == 4:
|
||||||
|
start_id = 30000
|
||||||
|
prefix = "Train"
|
||||||
|
filename = "cifar-10-batches-py/data_batch_4"
|
||||||
|
if id == 5:
|
||||||
|
start_id = 40000
|
||||||
|
prefix = "Train"
|
||||||
|
filename = "cifar-10-batches-py/data_batch_5"
|
||||||
|
return filename, prefix, start_id
|
||||||
|
|
||||||
|
|
||||||
|
def load_data(filename: str) -> tuple[np.ndarray, np.ndarray]:
|
||||||
|
fo = open(filename, "rb")
|
||||||
|
dict_data = pickle.load(fo, encoding="bytes")
|
||||||
|
_, labels_temp, data_temp, _ = dict_data.items()
|
||||||
|
data: np.ndarray = np.array(data_temp[1])
|
||||||
|
labels: np.ndarray = np.array(labels_temp[1])
|
||||||
|
return data, labels
|
||||||
|
|
||||||
|
|
||||||
|
def split_into_three_color_channels(
|
||||||
|
image: np.ndarray,
|
||||||
|
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||||
|
channel_r = image[0:1024].astype(np.float32)
|
||||||
|
channel_r = channel_r.reshape(32, 32)
|
||||||
|
channel_g = image[1024:2048].astype(np.float32)
|
||||||
|
channel_g = channel_g.reshape(32, 32)
|
||||||
|
channel_b = image[2048:3072].astype(np.float32)
|
||||||
|
channel_b = channel_b.reshape(32, 32)
|
||||||
|
return channel_r, channel_g, channel_b
|
||||||
|
|
||||||
|
|
||||||
|
def process_data_set(test_data_mode: bool) -> None:
|
||||||
|
|
||||||
|
if test_data_mode is True:
|
||||||
|
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||||
|
filename_out_label: str = "TestLabelStorage.npy"
|
||||||
|
number_of_pictures: int = 10000
|
||||||
|
start_id: int = 0
|
||||||
|
end_id: int = 0
|
||||||
|
else:
|
||||||
|
filename_out_pattern = "TrainPatternStorage.npy"
|
||||||
|
filename_out_label = "TrainLabelStorage.npy"
|
||||||
|
number_of_pictures = 50000
|
||||||
|
start_id = 1
|
||||||
|
end_id = 5
|
||||||
|
|
||||||
|
np_data: np.ndarray = np.zeros((number_of_pictures, 32, 32, 3), dtype=np.float32)
|
||||||
|
np_label: np.ndarray = np.zeros((number_of_pictures), dtype=np.uint64)
|
||||||
|
|
||||||
|
for id in range(start_id, end_id + 1):
|
||||||
|
filename, _, start_id_pattern = give_filenames(id)
|
||||||
|
pictures, labels = load_data(filename)
|
||||||
|
|
||||||
|
for i in range(0, pictures.shape[0]):
|
||||||
|
channel_r, channel_g, channel_b = split_into_three_color_channels(
|
||||||
|
pictures[i, :]
|
||||||
|
)
|
||||||
|
np_data[i + start_id_pattern, :, :, 0] = channel_r
|
||||||
|
np_data[i + start_id_pattern, :, :, 1] = channel_g
|
||||||
|
np_data[i + start_id_pattern, :, :, 2] = channel_b
|
||||||
|
np_label[i + start_id_pattern] = labels[i]
|
||||||
|
|
||||||
|
np_data /= np.max(np_data)
|
||||||
|
|
||||||
|
label_storage: np.ndarray = np_label.astype(dtype=np.uint64)
|
||||||
|
pattern_storage: np.ndarray = np_data.astype(dtype=np.float32)
|
||||||
|
|
||||||
|
np.save(filename_out_pattern, pattern_storage)
|
||||||
|
np.save(filename_out_label, label_storage)
|
||||||
|
|
||||||
|
|
||||||
|
process_data_set(True)
|
||||||
|
process_data_set(False)
|
8
dataset_collection/DATA_CIFAR10/data_url.txt
Normal file
8
dataset_collection/DATA_CIFAR10/data_url.txt
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
https://www.cs.toronto.edu/~kriz/cifar.html
|
||||||
|
|
||||||
|
Download the CIFAR-10 python version
|
||||||
|
https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
|
||||||
|
|
||||||
|
Then
|
||||||
|
tar -xvzf cifar-10-python.tar.gz
|
||||||
|
python convert.py
|
4
dataset_collection/DATA_CIFAR10/dataset.json
Normal file
4
dataset_collection/DATA_CIFAR10/dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"data_path": "./DATA_CIFAR10/",
|
||||||
|
"data_mode": "CIFAR10"
|
||||||
|
}
|
161
dataset_collection/DATA_FASHION_MNIST/convert.py
Normal file
161
dataset_collection/DATA_FASHION_MNIST/convert.py
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
# MIT License
|
||||||
|
# Copyright 2022 University of Bremen
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
# a copy of this software and associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
# Software is furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included
|
||||||
|
# in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# David Rotermund ( davrot@uni-bremen.de )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Release history:
|
||||||
|
# ================
|
||||||
|
# 1.0.0 -- 01.05.2022: first release
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# [offset] [type] [value] [description]
|
||||||
|
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
|
||||||
|
# 0004 32 bit integer 60000 number of items
|
||||||
|
# 0008 unsigned byte ?? label
|
||||||
|
# 0009 unsigned byte ?? label
|
||||||
|
# ........
|
||||||
|
# xxxx unsigned byte ?? label
|
||||||
|
# The labels values are 0 to 9.
|
||||||
|
|
||||||
|
|
||||||
|
class ReadLabel:
|
||||||
|
"""Class for reading the labels from an MNIST label file"""
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.filename: str = filename
|
||||||
|
self.data = self.read_from_file(filename)
|
||||||
|
|
||||||
|
def read_from_file(self, filename):
|
||||||
|
int32_data = np.dtype(np.uint32)
|
||||||
|
int32_data = int32_data.newbyteorder(">")
|
||||||
|
file = open(filename, "rb")
|
||||||
|
|
||||||
|
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if magic_flag != 2049:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_elements = 0
|
||||||
|
else:
|
||||||
|
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_elements < 1:
|
||||||
|
data = np.zeros(0)
|
||||||
|
else:
|
||||||
|
data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# [offset] [type] [value] [description]
|
||||||
|
# 0000 32 bit integer 0x00000803(2051) magic number
|
||||||
|
# 0004 32 bit integer 60000 number of images
|
||||||
|
# 0008 32 bit integer 28 number of rows
|
||||||
|
# 0012 32 bit integer 28 number of columns
|
||||||
|
# 0016 unsigned byte ?? pixel
|
||||||
|
# 0017 unsigned byte ?? pixel
|
||||||
|
# ........
|
||||||
|
# xxxx unsigned byte ?? pixel
|
||||||
|
# Pixels are organized row-wise.
|
||||||
|
# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
|
||||||
|
|
||||||
|
|
||||||
|
class ReadPicture:
|
||||||
|
"""Class for reading the images from an MNIST image file"""
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.filename: str = filename
|
||||||
|
self.data = self.read_from_file(filename)
|
||||||
|
|
||||||
|
def read_from_file(self, filename):
|
||||||
|
int32_data = np.dtype(np.uint32)
|
||||||
|
int32_data = int32_data.newbyteorder(">")
|
||||||
|
file = open(filename, "rb")
|
||||||
|
|
||||||
|
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if magic_flag != 2051:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_elements = 0
|
||||||
|
else:
|
||||||
|
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_elements < 1:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_rows = 0
|
||||||
|
else:
|
||||||
|
number_of_rows = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_rows != 28:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_columns = 0
|
||||||
|
else:
|
||||||
|
number_of_columns = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_columns != 28:
|
||||||
|
data = np.zeros(0)
|
||||||
|
else:
|
||||||
|
data = np.frombuffer(
|
||||||
|
file.read(number_of_elements * number_of_rows * number_of_columns),
|
||||||
|
dtype=np.uint8,
|
||||||
|
)
|
||||||
|
data = data.reshape(number_of_elements, number_of_columns, number_of_rows)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def proprocess_data_set(test_mode):
|
||||||
|
|
||||||
|
if test_mode is True:
|
||||||
|
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||||
|
filename_out_label: str = "TestLabelStorage.npy"
|
||||||
|
filename_in_image: str = "t10k-images-idx3-ubyte"
|
||||||
|
filename_in_label = "t10k-labels-idx1-ubyte"
|
||||||
|
else:
|
||||||
|
filename_out_pattern = "TrainPatternStorage.npy"
|
||||||
|
filename_out_label = "TrainLabelStorage.npy"
|
||||||
|
filename_in_image = "train-images-idx3-ubyte"
|
||||||
|
filename_in_label = "train-labels-idx1-ubyte"
|
||||||
|
|
||||||
|
pictures = ReadPicture(filename_in_image)
|
||||||
|
labels = ReadLabel(filename_in_label)
|
||||||
|
|
||||||
|
# Down to 0 ... 1.0
|
||||||
|
max_value = np.max(pictures.data.astype(np.float32))
|
||||||
|
d = np.float32(pictures.data.astype(np.float32) / max_value)
|
||||||
|
|
||||||
|
label_storage = np.uint64(labels.data)
|
||||||
|
pattern_storage = d.astype(np.float32)
|
||||||
|
|
||||||
|
np.save(filename_out_pattern, pattern_storage)
|
||||||
|
np.save(filename_out_label, label_storage)
|
||||||
|
|
||||||
|
|
||||||
|
proprocess_data_set(True)
|
||||||
|
proprocess_data_set(False)
|
8
dataset_collection/DATA_FASHION_MNIST/data_url.txt
Normal file
8
dataset_collection/DATA_FASHION_MNIST/data_url.txt
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
https://github.com/zalandoresearch/fashion-mnist
|
||||||
|
|
||||||
|
We need:
|
||||||
|
t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
|
||||||
|
|
||||||
|
Then
|
||||||
|
gzip -d *.gz
|
||||||
|
python convert.py
|
4
dataset_collection/DATA_FASHION_MNIST/dataset.json
Normal file
4
dataset_collection/DATA_FASHION_MNIST/dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"data_path": "./DATA_FASHION_MNIST/",
|
||||||
|
"data_mode": "MNIST_FASHION"
|
||||||
|
}
|
161
dataset_collection/DATA_MNIST/convert.py
Normal file
161
dataset_collection/DATA_MNIST/convert.py
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
# MIT License
|
||||||
|
# Copyright 2022 University of Bremen
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
# a copy of this software and associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
# Software is furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included
|
||||||
|
# in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# David Rotermund ( davrot@uni-bremen.de )
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Release history:
|
||||||
|
# ================
|
||||||
|
# 1.0.0 -- 01.05.2022: first release
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# [offset] [type] [value] [description]
|
||||||
|
# 0000 32 bit integer 0x00000801(2049) magic number (MSB first)
|
||||||
|
# 0004 32 bit integer 60000 number of items
|
||||||
|
# 0008 unsigned byte ?? label
|
||||||
|
# 0009 unsigned byte ?? label
|
||||||
|
# ........
|
||||||
|
# xxxx unsigned byte ?? label
|
||||||
|
# The labels values are 0 to 9.
|
||||||
|
|
||||||
|
|
||||||
|
class ReadLabel:
|
||||||
|
"""Class for reading the labels from an MNIST label file"""
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.filename: str = filename
|
||||||
|
self.data = self.read_from_file(filename)
|
||||||
|
|
||||||
|
def read_from_file(self, filename):
|
||||||
|
int32_data = np.dtype(np.uint32)
|
||||||
|
int32_data = int32_data.newbyteorder(">")
|
||||||
|
file = open(filename, "rb")
|
||||||
|
|
||||||
|
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if magic_flag != 2049:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_elements = 0
|
||||||
|
else:
|
||||||
|
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_elements < 1:
|
||||||
|
data = np.zeros(0)
|
||||||
|
else:
|
||||||
|
data = np.frombuffer(file.read(number_of_elements), dtype=np.uint8)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# [offset] [type] [value] [description]
|
||||||
|
# 0000 32 bit integer 0x00000803(2051) magic number
|
||||||
|
# 0004 32 bit integer 60000 number of images
|
||||||
|
# 0008 32 bit integer 28 number of rows
|
||||||
|
# 0012 32 bit integer 28 number of columns
|
||||||
|
# 0016 unsigned byte ?? pixel
|
||||||
|
# 0017 unsigned byte ?? pixel
|
||||||
|
# ........
|
||||||
|
# xxxx unsigned byte ?? pixel
|
||||||
|
# Pixels are organized row-wise.
|
||||||
|
# Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).
|
||||||
|
|
||||||
|
|
||||||
|
class ReadPicture:
|
||||||
|
"""Class for reading the images from an MNIST image file"""
|
||||||
|
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.filename: str = filename
|
||||||
|
self.data = self.read_from_file(filename)
|
||||||
|
|
||||||
|
def read_from_file(self, filename):
|
||||||
|
int32_data = np.dtype(np.uint32)
|
||||||
|
int32_data = int32_data.newbyteorder(">")
|
||||||
|
file = open(filename, "rb")
|
||||||
|
|
||||||
|
magic_flag = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if magic_flag != 2051:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_elements = 0
|
||||||
|
else:
|
||||||
|
number_of_elements = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_elements < 1:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_rows = 0
|
||||||
|
else:
|
||||||
|
number_of_rows = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_rows != 28:
|
||||||
|
data = np.zeros(0)
|
||||||
|
number_of_columns = 0
|
||||||
|
else:
|
||||||
|
number_of_columns = np.frombuffer(file.read(4), int32_data)[0]
|
||||||
|
|
||||||
|
if number_of_columns != 28:
|
||||||
|
data = np.zeros(0)
|
||||||
|
else:
|
||||||
|
data = np.frombuffer(
|
||||||
|
file.read(number_of_elements * number_of_rows * number_of_columns),
|
||||||
|
dtype=np.uint8,
|
||||||
|
)
|
||||||
|
data = data.reshape(number_of_elements, number_of_columns, number_of_rows)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def proprocess_data_set(test_mode):
|
||||||
|
|
||||||
|
if test_mode is True:
|
||||||
|
filename_out_pattern: str = "TestPatternStorage.npy"
|
||||||
|
filename_out_label: str = "TestLabelStorage.npy"
|
||||||
|
filename_in_image: str = "t10k-images-idx3-ubyte"
|
||||||
|
filename_in_label = "t10k-labels-idx1-ubyte"
|
||||||
|
else:
|
||||||
|
filename_out_pattern = "TrainPatternStorage.npy"
|
||||||
|
filename_out_label = "TrainLabelStorage.npy"
|
||||||
|
filename_in_image = "train-images-idx3-ubyte"
|
||||||
|
filename_in_label = "train-labels-idx1-ubyte"
|
||||||
|
|
||||||
|
pictures = ReadPicture(filename_in_image)
|
||||||
|
labels = ReadLabel(filename_in_label)
|
||||||
|
|
||||||
|
# Down to 0 ... 1.0
|
||||||
|
max_value = np.max(pictures.data.astype(np.float32))
|
||||||
|
d = np.float32(pictures.data.astype(np.float32) / max_value)
|
||||||
|
|
||||||
|
label_storage = np.uint64(labels.data)
|
||||||
|
pattern_storage = d.astype(np.float32)
|
||||||
|
|
||||||
|
np.save(filename_out_pattern, pattern_storage)
|
||||||
|
np.save(filename_out_label, label_storage)
|
||||||
|
|
||||||
|
|
||||||
|
proprocess_data_set(True)
|
||||||
|
proprocess_data_set(False)
|
8
dataset_collection/DATA_MNIST/data_url.txt
Normal file
8
dataset_collection/DATA_MNIST/data_url.txt
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
http://yann.lecun.com/exdb/mnist/
|
||||||
|
|
||||||
|
We need:
|
||||||
|
t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
|
||||||
|
|
||||||
|
Then
|
||||||
|
gzip -d *.gz
|
||||||
|
python convert.py
|
4
dataset_collection/DATA_MNIST/dataset.json
Normal file
4
dataset_collection/DATA_MNIST/dataset.json
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"data_path": "./DATA_MNIST/",
|
||||||
|
"data_mode": "MNIST"
|
||||||
|
}
|
Loading…
Reference in a new issue