# Tensorflow / Keras -- A fast non-introduction {:.no_toc} ## Top This is a fast overview how to get an MNIST example running under TF Keras If you just want to start with Tensorflow / Keras (especially if it is a scientific project), then you want to reconsider using Keras. In this case please check (& use) PyTorch. Questions to [David Rotermund](mailto:davrot@uni-bremen.de) ## Data loader / Data generator ||| |---|---| | [keras.utils.Sequence](https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence) | "Base object for fitting to a sequence of data, such as a dataset."| | [tf.keras.utils.to_categorical](https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical) | "Converts a class vector (integers) to binary class matrix."| ## Basic ```python from tensorflow import keras import numpy as np class DataGenerator(keras.utils.Sequence): def __init__( self, train: bool = True, size_of_batch: int = 32, number_of_classes: int = 10, do_shuffle: bool = True, ) -> None: super(DataGenerator, self).__init__() if train is True: self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy") self.label_storage: np.ndarray = np.load("./train_label_storage.npy") else: self.pattern_storage = np.load("./test_pattern_storage.npy") self.label_storage = np.load("./test_label_storage.npy") self.pattern_storage = self.pattern_storage.astype(np.float32) self.pattern_storage /= np.max(self.pattern_storage) self.dimensions: tuple[int, int] = ( self.pattern_storage.shape[1], self.pattern_storage.shape[2], ) # How many pattern are there? self.number_of_pattern: int = self.label_storage.shape[0] self.size_of_batch: int = size_of_batch self.number_of_classes: int = number_of_classes self.do_shuffle: bool = do_shuffle if self.pattern_storage.ndim == 3: self.number_of_channel: int = 1 else: self.number_of_channel = self.pattern_storage.shape[3] self.available_indices: np.ndarray = np.arange(self.number_of_pattern) self.on_epoch_end() def on_epoch_end(self) -> None: self.available_indices = np.arange(self.number_of_pattern) if self.do_shuffle is True: np.random.shuffle(self.available_indices) def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]: selected_indices: np.ndarray = self.available_indices[ index * self.size_of_batch : (index + 1) * self.size_of_batch ] image, target = self.__data_generation(selected_indices) return image, target def __data_generation( self, list_of_indice: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: image = np.empty( (self.size_of_batch, *self.dimensions, self.number_of_channel), dtype=np.float32, ) target = np.empty((self.size_of_batch), dtype=int) for i in range(0, len(list_of_indice)): if self.pattern_storage.ndim == 3: image[i, :, :, 0] = self.pattern_storage[ self.available_indices[list_of_indice[i]], :, : ] else: image[i, :, :, :] = self.pattern_storage[ self.available_indices[list_of_indice[i]], :, :, : ] target[i] = self.label_storage[self.available_indices[list_of_indice[i]]] return image, keras.utils.to_categorical( target, num_classes=self.number_of_classes ) def __len__(self): return int(np.floor(self.number_of_pattern / self.size_of_batch)) if __name__ == "__main__": pass ``` ## With data augmentation To the pre-processing chain **self.data_augmentation** you can add other preprocessing layers which are then applied to the input before given to the network. ```python from tensorflow import keras import numpy as np class DataGenerator(keras.utils.Sequence): def __init__( self, train: bool = True, size_of_batch: int = 32, number_of_classes: int = 10, do_shuffle: bool = True, ) -> None: super(DataGenerator, self).__init__() if train is True: self.pattern_storage: np.ndarray = np.load("./train_pattern_storage.npy") self.label_storage: np.ndarray = np.load("./train_label_storage.npy") else: self.pattern_storage = np.load("./test_pattern_storage.npy") self.label_storage = np.load("./test_label_storage.npy") self.pattern_storage = self.pattern_storage.astype(np.float32) self.pattern_storage /= np.max(self.pattern_storage) self.dimensions: tuple[int, int] = ( self.pattern_storage.shape[1], self.pattern_storage.shape[2], ) reduction: tuple[int, int] = (4, 4) if train is True: self.data_augmentation = keras.Sequential( [ keras.layers.RandomCrop( height=self.dimensions[0] - reduction[0], width=self.dimensions[1] - reduction[1], ), ] ) else: self.data_augmentation = keras.Sequential( [ keras.layers.CenterCrop( height=self.dimensions[0] - reduction[0], width=self.dimensions[1] - reduction[1], ), ] ) # How many pattern are there? self.number_of_pattern: int = self.label_storage.shape[0] self.size_of_batch: int = size_of_batch self.number_of_classes: int = number_of_classes self.do_shuffle: bool = do_shuffle if self.pattern_storage.ndim == 3: self.number_of_channel: int = 1 else: self.number_of_channel = self.pattern_storage.shape[3] self.available_indices: np.ndarray = np.arange(self.number_of_pattern) self.on_epoch_end() def on_epoch_end(self) -> None: self.available_indices = np.arange(self.number_of_pattern) if self.do_shuffle is True: np.random.shuffle(self.available_indices) def __getitem__(self, index: int) -> tuple[np.ndarray, np.ndarray]: selected_indices: np.ndarray = self.available_indices[ index * self.size_of_batch : (index + 1) * self.size_of_batch ] image, target = self.__data_generation(selected_indices) return image, target def __data_generation( self, list_of_indice: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: image = np.empty( (self.size_of_batch, *self.dimensions, self.number_of_channel), dtype=np.float32, ) target = np.empty((self.size_of_batch), dtype=int) for i in range(0, len(list_of_indice)): if self.pattern_storage.ndim == 3: image[i, :, :, 0] = self.pattern_storage[ self.available_indices[list_of_indice[i]], :, : ] else: image[i, :, :, :] = self.pattern_storage[ self.available_indices[list_of_indice[i]], :, :, : ] target[i] = self.label_storage[self.available_indices[list_of_indice[i]]] image = self.data_augmentation(image) return image, keras.utils.to_categorical( target, num_classes=self.number_of_classes ) def __len__(self): return int(np.floor(self.number_of_pattern / self.size_of_batch)) if __name__ == "__main__": pass ``` ## Train an example MNIST network ||| |---|---| |[tf.keras.backend.clear_session](https://www.tensorflow.org/api_docs/python/tf/keras/backend/clear_session) | "Resets all state generated by Keras."| |[tf.keras.Sequential](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential) | "Sequential groups a linear stack of layers into a tf.keras.Model."| |[network.add()](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#add) | "Adds a layer instance on top of the layer stack." | |[tf.keras.layers.Conv2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D) | "2D convolution layer (e.g. spatial convolution over images)."| |[tf.keras.layers.MaxPool2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPooling2D) | "Max pooling operation for 2D spatial data."| |[tf.keras.layers.Flatten](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Flatten) | "Flattens the input. Does not affect the batch size."| |[tf.keras.layers.Dense](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense) | "Just your regular densely-connected NN layer."| |[network.compile()](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#compile) | "Configures the model for training."| |[tf.keras.metrics.categorical_crossentropy](https://www.tensorflow.org/api_docs/python/tf/keras/metrics/categorical_crossentropy) | "Computes the categorical crossentropy loss."| |[tf.keras.optimizers.Adam](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam) | "Optimizer that implements the Adam algorithm."| |[network.fit() ](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#fit) | Trains the model for a fixed number of epochs (iterations on a dataset).| |[network.summary()](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#summary) | "Prints a string summary of the network."| |[network.save()](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#save) | "Saves the model to Tensorflow SavedModel or a single HDF5 file."| Parameters for the layers: ||| |---|---| |[padding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D) | "One of "valid", "same" or "causal" (case-insensitive). "valid" means no padding. "same" results in padding with zeros evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input. "causal" results in causal (dilated) convolutions, e.g. output[t] does not depend on input[t+1:]. "| |[use_bias](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D)| "Boolean, whether the layer uses a bias vector."| |[activation](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D)| "Activation function to use. If you don't specify anything, no activation is applied (see [keras.activations](https://www.tensorflow.org/api_docs/python/tf/keras/activations))."| [data_format](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D)| " A string, one of channels_last (default) or channels_first."| ```python from tensorflow import keras from DataGenerator import DataGenerator epoch_max: int = 50 number_of_classes: int = 10 size_of_batch_train: int = 100 train_data = DataGenerator( train=True, size_of_batch=size_of_batch_train, number_of_classes=number_of_classes, do_shuffle=True, ) number_of_channels: int = train_data.number_of_channel input_dimensions = train_data.dimensions number_of_pattern_train = train_data.number_of_pattern number_of_output_channels_conv1: int = 32 number_of_output_channels_conv2: int = 64 number_of_neurons_flatten1: int = 1024 kernel_size_conv1: tuple[int, int] = (5, 5) kernel_size_pool1: tuple[int, int] = (2, 2) kernel_size_conv2: tuple[int, int] = (5, 5) kernel_size_pool2: tuple[int, int] = (2, 2) stride_conv1: tuple[int, int] = (1, 1) stride_pool1: tuple[int, int] = (2, 2) stride_conv2: tuple[int, int] = (1, 1) stride_pool2: tuple[int, int] = (2, 2) keras.backend.clear_session() network = keras.Sequential() # Conv 1 network.add( keras.layers.Conv2D( number_of_output_channels_conv1, kernel_size=kernel_size_conv1, activation="relu", input_shape=(input_dimensions[0], input_dimensions[1], number_of_channels), padding="valid", strides=stride_conv1, data_format="channels_last", use_bias=True, ) ) # Pool 1 network.add( keras.layers.MaxPooling2D( pool_size=kernel_size_pool1, padding="valid", strides=stride_pool1, data_format="channels_last", ) ) # Conv 2 network.add( keras.layers.Conv2D( number_of_output_channels_conv2, kernel_size=kernel_size_conv2, activation="relu", padding="valid", strides=stride_conv2, data_format="channels_last", use_bias=True, ) ) # Pool 2 network.add( keras.layers.MaxPooling2D( pool_size=kernel_size_pool2, padding="valid", strides=stride_pool2, data_format="channels_last", ) ) # Flatten network.add(keras.layers.Flatten(data_format="channels_last")) # Full layer network.add( keras.layers.Dense(number_of_neurons_flatten1, activation="relu", use_bias=True) ) # Output layer network.add(keras.layers.Dense(number_of_classes, activation="softmax")) network.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=["accuracy"], ) for epoch_id in range(0, epoch_max): print(f"Epoch: {epoch_id} of {epoch_max - 1}") network.fit(x=train_data) network.summary() network.save("Model_" + str(epoch_id) + ".h5") ``` ## Test the example network performance ||| |---|---| |[tf.keras.models.load_model](https://www.tensorflow.org/api_docs/python/tf/keras/saving/load_model) | "Loads a model saved via model.save()."| |[network.evaluate()](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#evaluate) | "Returns the loss value & metrics values for the model in test mode."| ```python from tensorflow import keras from DataGenerator import DataGenerator number_of_classes: int = 10 size_of_batch_test: int = 100 model_id: int = 49 test_data = DataGenerator( train=False, size_of_batch=size_of_batch_test, number_of_classes=number_of_classes, do_shuffle=False, ) keras.backend.clear_session() network = keras.models.load_model("./Model_" + str(model_id) + ".h5") test_loss, test_acc = network.evaluate(x=test_data) print(f"Correct: {test_acc * 100.0:.2f}%") ``` ## How to extract the activities from the network ```python from tensorflow import keras from DataGenerator import DataGenerator import numpy as np number_of_classes: int = 10 size_of_batch_test: int = 100 model_id: int = 49 pattern_batch_id: int = 0 pattern_id: int = 42 test_data = DataGenerator( train=False, size_of_batch=size_of_batch_test, number_of_classes=number_of_classes, do_shuffle=False, ) keras.backend.clear_session() network = keras.models.load_model("./Model_" + str(model_id) + ".h5") image, target = test_data.__getitem__(pattern_batch_id) the_target = target[pattern_id] print("Layer 1 (Conv1)") input_0 = image[pattern_id : pattern_id + 1, :, :, :] output_0 = network.layers[0](input_0) print("Input Shape:") print(input_0.shape) print("Output Shape:") print(output_0.numpy().shape) print("") print("Layer 2 (Pool1)") input_1 = output_0 output_1 = network.layers[1](input_1) print("Input Shape:") print(input_1.numpy().shape) print("Output Shape:") print(output_1.numpy().shape) print("") print("Layer 3 (Conv2)") input_2 = output_1 output_2 = network.layers[2](input_2) print("Input Shape:") print(input_2.numpy().shape) print("Output Shape:") print(output_2.numpy().shape) print("") print("Layer 4 (Pool2)") input_3 = output_2 output_3 = network.layers[3](input_3) print("Input Shape:") print(input_3.numpy().shape) print("Output Shape:") print(output_3.numpy().shape) print("") print("Layer 5 (Flatten)") input_4 = output_3 output_4 = network.layers[4](input_4) print("Input Shape:") print(input_4.numpy().shape) print("Output Shape:") print(output_4.numpy().shape) print("") print("Layer 6 (Full)") input_5 = output_4 output_5 = network.layers[5](input_5) print("Input Shape:") print(input_5.numpy().shape) print("Output Shape:") print(output_5.numpy().shape) print("") print("Layer 7 (Output)") input_6 = output_5 output_6 = network.layers[6](input_6) print("Input Shape:") print(input_6.numpy().shape) print("Output Shape:") print(output_6.numpy().shape) print("") print("\nEstimation") print(np.round(output_6.numpy(), 4)) print("Strongest reponse is at " + str(np.argmax(output_6.numpy()))) print("Correct output is " + str(np.argmax(the_target))) ``` ## Extracting weight and bias Here is one way to extract the weights and bias of the whole network. Alternatively you can use [get_weights](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#get_weights) from [tf.keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) in combination with [get_layer](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential#get_layer) of [tf.keras.Sequential](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential). ```python from tensorflow import keras from DataGenerator import DataGenerator number_of_classes: int = 10 size_of_batch_test: int = 100 model_id: int = 49 pattern_batch_id: int = 0 pattern_id: int = 42 test_data = DataGenerator( train=False, size_of_batch=size_of_batch_test, number_of_classes=number_of_classes, do_shuffle=False, ) keras.backend.clear_session() network = keras.models.load_model("./Model_" + str(model_id) + ".h5") weights_bias = network.get_weights() counter_layer: int = 0 for i in range(0, len(weights_bias), 2): print("Layer " + str(counter_layer) + " weights_bias position: " + str(i) + " =>") print(weights_bias[i].shape) counter_layer += 1 print("") counter_layer = 0 for i in range(1, len(weights_bias), 2): print("Bias " + str(counter_layer) + " weights_bias position: " + str(i) + " =>") print(weights_bias[i].shape) counter_layer += 1 ``` ## Type of layers Reduced list with the most relevant network layers ||| |---|---| |[Activation](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Activation)| Applies an activation function to an output.| |[AveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling1D)| Average pooling for temporal data.| |[AveragePooling2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling2D)| Average pooling operation for spatial data.| |[AveragePooling3D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling3D) |Average pooling operation for 3D data (spatial or spatio-temporal).| |[BatchNormalization](https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization)| Layer that normalizes its inputs.| |[Conv1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv1D)| 1D convolution layer (e.g. temporal convolution).| |[Conv2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D) |2D convolution layer (e.g. spatial convolution over images).| |[Conv3D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv3D)| 3D convolution layer (e.g. spatial convolution over volumes).| |[Dense](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense)| Just your regular densely-connected NN layer.| |[Dropout](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout) |Applies Dropout to the input.| |[Flatten](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Flatten) |Flattens the input. Does not affect the batch size.| |[MaxPooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPooling1D)| Max pooling operation for 1D temporal data.| |[MaxPooling2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPooling2D) |Max pooling operation for 2D spatial data.| |[MaxPooling3D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPooling3D) |Max pooling operation for 3D data (spatial or spatio-temporal).| |[SpatialDropout1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/SpatialDropout1D) |Spatial 1D version of Dropout.| |[SpatialDropout2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/SpatialDropout2D) |Spatial 2D version of Dropout.| |[SpatialDropout3D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/SpatialDropout3D) |Spatial 3D version of Dropout.| |[ZeroPadding1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/ZeroPadding1D) |Zero-padding layer for 1D input (e.g. temporal sequence).| |[ZeroPadding2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/ZeroPadding2D) |Zero-padding layer for 2D input (e.g. picture).| |[ZeroPadding3D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/ZeroPadding3D) |Zero-padding layer for 3D data (spatial or spatio-temporal).| Preprocessing layers Reduced list with the most relevant preprocessing layers CenterCrop A preprocessing layer which crops images. RandomContrast A preprocessing layer which randomly adjusts contrast during training. RandomCrop A preprocessing layer which randomly crops images during training. RandomFlip A preprocessing layer which randomly flips images during training. RandomHeight A preprocessing layer which randomly varies image height during training. RandomRotation A preprocessing layer which randomly rotates images during training. RandomTranslation A preprocessing layer which randomly translates images during training. RandomWidth A preprocessing layer which randomly varies image width during training. RandomZoom A preprocessing layer which randomly zooms images during training. Rescaling A preprocessing layer which rescales input values to a new range. Resizing A preprocessing layer which resizes images. Activation functions Reduced list with the most relevant activation functions hard_sigmoid(...) Hard sigmoid activation function. relu(...) Applies the rectified linear unit activation function. sigmoid(...) Sigmoid activation function, sigmoid(x) = 1 / (1 + exp(-x)). softmax(...) Softmax converts a vector of values to a probability distribution. softplus(...) Softplus activation function, softplus(x) = log(exp(x) + 1). softsign(...) Softsign activation function, softsign(x) = x / (abs(x) + 1). tanh(...) Hyperbolic tangent activation function. Loss-functions Reduced list with the most relevant loss functions BinaryCrossentropy Computes the cross-entropy loss between true labels and predicted labels. CategoricalCrossentropy Computes the crossentropy loss between the labels and predictions. KLDivergence Computes Kullback-Leibler divergence loss between y_true and y_pred. MeanAbsoluteError Computes the mean of absolute difference between labels and predictions. MeanSquaredError Computes the mean of squares of errors between labels and predictions. Poisson Computes the Poisson loss between y_true and y_pred. SparseCategoricalCrossentropy Computes the crossentropy loss between the labels and predictions. Optimizer Reduced list with the most relevant optimizer Adagrad Optimizer that implements the Adagrad algorithm. Adam Optimizer that implements the Adam algorithm. RMSprop Optimizer that implements the RMSprop algorithm. SGD Gradient descent (with momentum) optimizer. Metrics A very reduced list with the most relevant metrics Accuracy Calculates how often predictions equal labels. BinaryAccuracy Calculates how often predictions match binary labels. BinaryCrossentropy Computes the crossentropy metric between the labels and predictions. CategoricalAccuracy Calculates how often predictions match one-hot labels. CategoricalCrossentropy Computes the crossentropy metric between the labels and predictions. KLDivergence Computes Kullback-Leibler divergence metric between y_true and y_pred. Mean Computes the (weighted) mean of the given values. MeanAbsoluteError Computes the mean absolute error between the labels and predictions. MeanSquaredError Computes the mean squared error between y_true and y_pred. Poisson Computes the Poisson metric between y_true and y_pred. Precision Computes the precision of the predictions with respect to the labels. RootMeanSquaredError Computes root mean squared error metric between y_true and y_pred. SparseCategoricalAccuracy Calculates how often predictions match integer labels. SparseCategoricalCrossentropy Computes the crossentropy metric between the labels and predictions. SparseTopKCategoricalAccuracy Computes how often integer targets are in the top K predictions. Sum Computes the (weighted) sum of the given values. TopKCategoricalAccuracy Computes how often targets are in the top K predictions. ```python ```