Add files via upload

2024-08-16 17:23:13 +02:00 · 2024-08-16 17:23:13 +02:00 · 8df0d910ea
commit 8df0d910ea
parent 544a358be1
2 changed files with 563 additions and 0 deletions
--- a/offline_encoder/config.json
+++ b/offline_encoder/config.json
@ -0,0 +1,100 @@
+{
+    // Define parameters
+    // ========================================================
+    // Unit abbreviations:
+    //   dva: degrees of visual angle
+    //   pix: pixels
+    "verbose": true,
+
+    // display: Defines geometry of target display
+    // ========================================================
+    // The encoded image will be scaled such that it optimally uses
+    // the max space available. If the orignal image has a different aspect
+    // ratio than the display region, it will only use one spatial
+    // dimension (horizontal or vertical) to its full extent
+    //
+    // If one dva corresponds to different pix_per_dva on the display,
+    // (i.e. varying distance observers from screen), it should be set
+    // larger than the largest pix_per_dva required, for avoiding
+    // extrapolation artefacts or blur.
+    //
+    "display": {
+        "size_max_x_dva": 10.0,  // maximum x size of encoded image
+        "size_max_y_dva": 10.0,  // minimum y size of encoded image
+        "pix_per_dva": 40.0,  // scaling factor pixels to dva
+        "scale": "same_range"  // "same_luminance" or "same_range"
+    },
+
+    // gabor: Defines paras of Gabor filters for contour extraction
+    // ==============================================================
+    "gabor": {
+        "sigma_kernel_dva": 0.06,
+        "lambda_kernel_dva": 0.12,
+        "n_orientations": 8
+    },
+
+    // encoding: Defines parameters of sparse encoding process
+    // ========================================================
+    // Roughly speaking, after contour extraction dictionary elements
+    // will be placed starting from the position with the highest
+    // overlap with the contour. Elements placed can be surrounded
+    // by a dead or inhibitory zone to prevent placing further elements
+    // too closely. The procedure will map 'n_patches_compute' elements
+    // and then stop. For each element one obtains an overlap with the
+    // contour image.
+    //
+    // After placement, the overlaps found are normalized to the max
+    // overlap found, and then all elements with a larger normalized overlap
+    // than 'overlap_threshold' will be selected. These remaining
+    // elements will comprise a 'full' encoding of the contour.
+    //
+    // To generate even sparser representations, the full encoding can
+    // be reduced to a certain percentage of elements in the full encoding
+    // by setting the variable 'percentages'
+    //
+    // Example: n_patches_compute: 100 reduced by overlap_threshold: 0.1
+    // to 80 elements. Requesting a percentage of 30% yields representation
+    // with 24 elements.
+    //
+    "encoding": {
+        "n_patches_compute": 100,  // this amount of patches will be placed
+        "use_exp_deadzone": true,  // parameters of Gaussian deadzone
+        "size_exp_deadzone_dva": 1.20,  // PREVIOUSLY 1.4283
+        "use_cutout_deadzone": true,  // parameters of cutout deadzone
+        "size_cutout_deadzone_dva": 0.65,  // PREVIOUSLY 0.7575
+        "overlap_threshold": 0.1,  // relative overlap threshold
+        "percentages": 100
+    },
+
+    "number_of_patches": 100, // TODO: Repeated from encoding
+
+    // dictionary: Defines parameters of dictionary
+    // ========================================================
+    "dictionary": {
+        "size_dva": 1.0,  // PREVIOUSLY 1.25,
+        "clocks": {
+            "n_dir": 8,  // number of directions for clock pointer segments
+            "n_open": 4,  // number of opening angles between two clock pointer segments
+            "pointer_width": 0.07,  // PREVIOUSLY 0.05,  // relative width and size of tip extension of clock pointer
+            "pointer_length": 0.18  // PREVIOUSLY 0.15,  // relative length of clock pointer
+        },
+        "phosphene": {
+            "sigma_width": 0.18  // DEFAULT 0.15,  // half-width of Gaussian
+        }
+    },
+
+
+    // control: For controlling plotting options and flow of script
+    // ========================================================
+    "control": {
+        "force_torch_use_cpu": false,  // force using CPU even if GPU available
+        // "show_capture": true,  // shows captured image
+        // "show_object": true,  // shows detected object
+        "show_mode": "cv2", // "pyplot" or "cv2"
+        "show_image": true, // shows input image
+        "show_contours": true,  // shows extracted contours
+        "show_percept": true  // shows percept
+}
+
+
+}
--- a/offline_encoder/offline_encoding.py
+++ b/offline_encoder/offline_encoding.py
@ -0,0 +1,463 @@
+# %%
+#
+# offline_encoding.py
+# ========================================================
+# encode visual scenes into sparse representations using
+# different kinds of dictionaries
+#
+# -> derived from OnlineEncoding.py
+#
+# Version 1.0, 16.04.2024:
+#
+
+
+# Import Python modules
+# ========================================================
+# import csv
+# import time
+# import os
+# import glob
+import matplotlib.pyplot as plt
+import torch
+import torchvision as tv  # type:ignore
+# from PIL import Image
+import cv2
+import numpy as np
+import json
+from jsmin import jsmin  # type:ignore
+
+
+# Import our modules
+# ========================================================
+from processing_chain.ContourExtract import ContourExtract
+from processing_chain.PatchGenerator import PatchGenerator
+from processing_chain.Sparsifier import Sparsifier
+# from processing_chain.DiscardElements import discard_elements_simple
+from processing_chain.BuildImage import BuildImage
+# from processing_chain.WebCam import WebCam
+# from processing_chain.Yolo5Segmentation import Yolo5Segmentation
+
+
+class OfflineEncoding:
+
+    # INPUT PARAMETERS
+    config: dict
+
+    # DERIVED PARAMETERS
+    default_dtype: torch.dtype
+    torch_device: str
+    display_size_max_x_pix: float
+    display_size_max_y_pix: float
+    # padding_fill: float
+    # DEFINED PREVIOUSLY IN "apply_parameter_changes":
+    padding_pix: int
+    sigma_kernel_pix: float
+    lambda_kernel_pix: float
+    out_x: int
+    out_y: int
+    clocks: torch.Tensor
+    phosphene: torch.Tensor
+    clocks_filter: torch.Tensor
+
+    # DELIVERED BY ENCODING
+    position_found: None | torch.Tensor
+    canvas_size: None | torch.Tensor
+
+    def __init__(self, config="config.json"):
+
+        # Define parameters
+        # ========================================================
+        print("OffE-Init: Loading configuration parameters...")
+        with open(config, "r") as file:
+            config = json.loads(jsmin(file.read()))
+
+        # store in class
+        self.config = config
+        self.position_found = None
+        self.canvas_size = None
+
+        # get sub-dicts for easier access
+        display = self.config["display"]
+        dictionary = self.config["dictionary"]
+        gabor = self.config["gabor"]
+
+        # print(
+        #     "OE-Init: Defining paths, creating dirs, setting default device and datatype"
+        # )
+        # self.path = {"output": "test/output/level1/", "input": "test/images_test/"}
+        # Make output directories, if necessary: the place were we dump the new images to...
+        # os.makedirs(self.path["output"], mode=0o777, exist_ok=True)
+
+        # Check if GPU is available and use it, if possible
+        # =================================================
+        self.default_dtype = torch.float32
+        torch.set_default_dtype(self.default_dtype)
+        if self.config["control"]["force_torch_use_cpu"]:
+            torch_device = "cpu"
+        else:
+            torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using {torch_device} as TORCH device...")
+        self.torch_device = torch_device
+
+        print("OffE-Init: Compute display scaling factors and padding RGB values")
+
+        # global scaling factors for all pixel-related length scales
+        self.display_size_max_x_pix = (
+            display["size_max_x_dva"] * display["pix_per_dva"]
+        )
+        self.display_size_max_y_pix = (
+            display["size_max_y_dva"] * display["pix_per_dva"]
+        )
+
+        # determine padding fill value
+        tmp = tv.transforms.Grayscale(num_output_channels=1)
+        tmp_value = torch.full((3, 1, 1), 254.0/255)
+        self.padding_fill = int(tmp(tmp_value).squeeze())
+
+        # PREVIOUSLY, A SEPARATE ROUTINE APPLIED PARAMETER CHANGES
+        # WE DISCARD THIS HERE BUT KEEP THE CODE AS EXAMPLE
+        #
+        # self.apply_parameter_changes()
+        # return
+    #
+    # def apply_parameter_changes(self):
+        #
+        # GET NEW PARAMETERS
+        print("OffE-Init: Computing image/patch sizes from parameters")
+
+        # BLOCK: dictionary ----------------
+        # set patch size for both dictionaries, make sure it is odd number
+        dictionary_size_pix = (
+            1
+            + (int(dictionary["size_dva"] *
+                   display["pix_per_dva"]) // 2) * 2
+        )
+
+        # BLOCK: gabor ---------------------
+        # convert contour-related parameters to pixel units
+        self.sigma_kernel_pix = (
+            gabor["sigma_kernel_dva"] *
+            display["pix_per_dva"]
+        )
+        self.lambda_kernel_pix = (
+            gabor["lambda_kernel_dva"] *
+            display["pix_per_dva"]
+        )
+
+        # BLOCK: gabor & dictionary ------------------
+        # Padding
+        # -------
+        self.padding_pix = int(
+            max(3.0 * self.sigma_kernel_pix, 1.1 * dictionary_size_pix)
+        )
+
+        # define target video/representation width/height
+        multiple_of = 4
+        out_x = self.display_size_max_x_pix + 2 * self.padding_pix
+        out_y = self.display_size_max_y_pix + 2 * self.padding_pix
+        out_x += (multiple_of - (out_x % multiple_of)) % multiple_of
+        out_y += (multiple_of - (out_y % multiple_of)) % multiple_of
+        self.out_x = int(out_x)
+        self.out_y = int(out_y)
+
+        # generate dictionaries
+        # ---------------------
+        # BLOCK: dictionary --------------------------
+        print("OffE-Init: Generating dictionaries...")
+        patch_generator = PatchGenerator(torch_device=self.torch_device)
+        self.phosphene = patch_generator.alphabet_phosphene(
+            patch_size=dictionary_size_pix,
+            sigma_width=dictionary["phosphene"]["sigma_width"]
+            * dictionary_size_pix,
+        )
+        # BLOCK: dictionary & gabor --------------------------
+        self.clocks_filter, self.clocks, segments = patch_generator.alphabet_clocks(
+            patch_size=dictionary_size_pix,
+            n_dir=dictionary["clocks"]["n_dir"],
+            n_filter=gabor["n_orientations"],
+            segment_width=dictionary["clocks"]["pointer_width"]
+            * dictionary_size_pix,
+            segment_length=dictionary["clocks"]["pointer_length"]
+            * dictionary_size_pix,
+        )
+
+        return
+
+    # TODO image supposed to be torch.Tensor(3, Y, X) within 0...1
+    def encode(self, image: torch.Tensor, number_of_patches: int = 42, border_pixel_value: float = 254.0 / 255) -> dict:
+
+        assert len(image.shape) == 3, "Input image must be RGB (3 dimensions)!"
+        assert image.shape[0] == 3, "Input image format must be (3, HEIGHT, WIDTH)!"
+        control = self.config["control"]
+
+        
+        # determine padding fill value
+        tmp = tv.transforms.Grayscale(num_output_channels=1)
+        tmp_value = torch.full((3, 1, 1), border_pixel_value)
+        padding_fill = float(tmp(tmp_value).squeeze())
+
+        # show input image, if desired...
+        if control["show_image"]:
+            self.__show_torch_frame(
+                image,
+                title="Encode: Input Image",
+                target=control["show_mode"]
+            )
+
+        # some constants for addressing specific components of output arrays
+        image_id_const: int = 0
+        overlap_index_const: int = 1
+
+        # Determine target size of image
+        # image: [RGB, Height, Width], dtype= tensor.torch.uint8
+        print("OffE-Encode: Computing downsampling factor image -> display")
+        f_x: float = self.display_size_max_x_pix / image.shape[-1]
+        f_y: float = self.display_size_max_y_pix / image.shape[-2]
+        f_xy_min: float = min(f_x, f_y)
+        downsampling_x: int = int(f_xy_min * image.shape[-1])
+        downsampling_y: int = int(f_xy_min * image.shape[-2])
+
+        # CURRENTLY we do not crop in the end...
+        # Image size for removing the fft crop later
+        # center_crop_x: int = downsampling_x
+        # center_crop_y: int = downsampling_y
+
+        # define contour extraction processing chain
+        # ------------------------------------------
+        print("OffE-Encode: Extracting contours")
+        train_processing_chain = tv.transforms.Compose(
+            transforms=[
+                tv.transforms.Grayscale(num_output_channels=1),  # RGB to grayscale
+                tv.transforms.Resize(
+                    size=(downsampling_y, downsampling_x)
+                ),  # downsampling
+                tv.transforms.Pad(  # extra white padding around the picture
+                    padding=(self.padding_pix, self.padding_pix),
+                    fill=padding_fill,
+                ),
+                ContourExtract(  # contour extraction
+                    n_orientations=self.config["gabor"]["n_orientations"],
+                    sigma_kernel=self.sigma_kernel_pix,
+                    lambda_kernel=self.lambda_kernel_pix,
+                    torch_device=self.torch_device,
+                ),
+                # CURRENTLY we do not crop in the end!
+                # tv.transforms.CenterCrop(  # Remove the padding
+                #     size=(center_crop_x, center_crop_y)
+                # ),
+            ],
+        )
+        # ...with and without orientation channels
+        contour = train_processing_chain(image.unsqueeze(0))
+        contour_collapse = train_processing_chain.transforms[-1].create_collapse(
+            contour
+        )
+
+        if control["show_contours"]:
+            self.__show_torch_frame(
+                contour_collapse,
+                title="Encode: Contours Extracted",
+                cmap="gray",
+                target=control["show_mode"],
+            )
+
+        # generate a prior for mapping the contour to the dictionary
+        # CURRENTLY we use an uniform prior...
+        # ----------------------------------------------------------
+        dictionary_prior = torch.ones(
+            (self.clocks_filter.shape[0]),
+            dtype=self.default_dtype,
+            device=torch.device(self.torch_device),
+        )
+
+        # instantiate and execute sparsifier
+        # ----------------------------------
+        print("OffE-Encode: Performing sparsification")
+        encoding = self.config["encoding"]
+        display = self.config["display"]
+        sparsifier = Sparsifier(
+            dictionary_filter=self.clocks_filter,
+            dictionary=self.clocks,
+            dictionary_prior=dictionary_prior,
+            number_of_patches=encoding["n_patches_compute"],
+            size_exp_deadzone=encoding["size_exp_deadzone_dva"]
+            * display["pix_per_dva"],
+            plot_use_map=False,  # self.control["plot_deadzone"],
+            deadzone_exp=encoding["use_exp_deadzone"],
+            deadzone_hard_cutout=encoding["use_cutout_deadzone"],
+            deadzone_hard_cutout_size=encoding["size_cutout_deadzone_dva"]
+            * display["pix_per_dva"],
+            padding_deadzone_size_x=self.padding_pix,
+            padding_deadzone_size_y=self.padding_pix,
+            torch_device=self.torch_device,
+        )
+        sparsifier(contour)
+        assert sparsifier.position_found is not None
+
+        # extract and normalize the overlap found
+        overlap_found = sparsifier.overlap_found[
+            image_id_const, :, overlap_index_const
+        ]
+        overlap_found = overlap_found / overlap_found.max()
+
+        # get overlap above certain threshold, extract corresponding elements
+        overlap_idcs_valid = torch.where(
+            overlap_found >= encoding["overlap_threshold"]
+        )[0]
+        position_selection = sparsifier.position_found[
+            image_id_const : image_id_const + 1, overlap_idcs_valid, :
+        ]
+        n_elements = len(overlap_idcs_valid)
+        print(f"OffE-Encode: {n_elements} elements positioned!")
+
+        contour_shape = contour.shape
+
+        n_cut = min(position_selection.shape[-2], number_of_patches)
+
+        data_out = {
+            "position_found": position_selection[:, :n_cut, :],
+            "canvas_size": contour_shape,
+        }
+
+        self.position_found = data_out["position_found"]
+        self.canvas_size = data_out["canvas_size"]
+
+        return data_out
+
+    def render(self):
+
+        assert self.position_found is not None, "Use ""encode"" before rendering!"
+        assert self.canvas_size is not None, "Use ""encode"" before rendering!"
+
+        control = self.config["control"]
+
+        # build the full image!
+        image_clocks = BuildImage(
+            canvas_size=self.canvas_size,
+            dictionary=self.clocks,
+            position_found=self.position_found,
+            default_dtype=self.default_dtype,
+            torch_device=self.torch_device,
+        )
+
+        # normalize to range [0...1]
+        m = image_clocks[0].max()
+        if m == 0:
+            m = 1
+        image_clocks_normalized = image_clocks[0] / m
+
+        # embed into frame of desired output size
+        out_torch = self.__embed_image(
+            image_clocks_normalized, out_height=self.out_y, out_width=self.out_x
+        )
+
+        # show, if desired...    
+        if control["show_percept"]:
+            self.__show_torch_frame(
+                out_torch, title="Percept",
+                cmap="gray", target=control["show_mode"]
+            )
+
+        return
+
+    def __show_torch_frame(self,
+        frame_torch: torch.Tensor,
+        title: str = "default",
+        cmap: str = "viridis",
+        target: str = "pyplot",
+    ):
+        frame_numpy = (
+            (frame_torch.movedim(0, -1) * 255).type(dtype=torch.uint8).cpu().numpy()
+        )
+        if target == "pyplot":
+            plt.imshow(frame_numpy, cmap=cmap)
+            plt.title(title)
+            plt.show()
+        if target == "cv2":
+            if frame_numpy.ndim == 3:
+                if frame_numpy.shape[-1] == 1:
+                    frame_numpy = np.tile(frame_numpy, [1, 1, 3])
+                    frame_numpy = (frame_numpy - frame_numpy.min()) / (
+                        frame_numpy.max() - frame_numpy.min()
+                    )
+            # print(frame_numpy.shape, frame_numpy.max(), frame_numpy.min())
+            cv2.namedWindow(title, cv2.WINDOW_NORMAL)
+            cv2.imshow(title, frame_numpy[:, :, (2, 1, 0)])
+            cv2.waitKey(1)
+
+        return
+
+    def __embed_image(self, frame_torch, out_height, out_width, init_value=0):
+
+        out_shape = torch.tensor(frame_torch.shape)
+
+        frame_width = frame_torch.shape[-1]
+        frame_height = frame_torch.shape[-2]
+
+        frame_width_idx0 = max([0, (frame_width - out_width) // 2])
+        frame_height_idx0 = max([0, (frame_height - out_height) // 2])
+
+        select_width = min([frame_width, out_width])
+        select_height = min([frame_height, out_height])
+
+        out_shape[-1] = out_width
+        out_shape[-2] = out_height
+
+        out_torch = init_value * torch.ones(tuple(out_shape))
+
+        out_width_idx0 = max([0, (out_width - frame_width) // 2])
+        out_height_idx0 = max([0, (out_height - frame_height) // 2])
+
+        out_torch[
+            ...,
+            out_height_idx0: (out_height_idx0 + select_height),
+            out_width_idx0: (out_width_idx0 + select_width),
+        ] = frame_torch[
+            ...,
+            frame_height_idx0: (frame_height_idx0 + select_height),
+            frame_width_idx0: (frame_width_idx0 + select_width),
+        ]
+
+        return out_torch
+
+    def __del__(self):
+
+        print("OffE-Delete: exiting gracefully!")
+        # TODO ...only do it when necessary
+        cv2.destroyAllWindows()
+
+        return
+
+
+if __name__ == "__main__":
+
+    source = 'bernd.jpg'
+    img_cv2 = cv2.imread(source)
+    img_torch = torch.Tensor(img_cv2[:, :, (2, 1, 0)]).movedim(-1, 0) / 255
+    # show_torch_frame(img_torch, target="cv2", title=source)
+    print(f"CV2 Shape: {img_cv2.shape}")
+    print(f"Torch Shape: {img_torch.shape}")
+
+    img = img_torch
+    frame_width = img.shape[-1]
+    frame_height = img.shape[-2]
+    print(
+        f"OffE-Test: Processing image {source} of {frame_width} x {frame_height}."
+    )
+
+    # TEST  tfg = tv.transforms.Grayscale(num_output_channels=1)
+    # TEST  pixel_fill = torch.full((3, 1, 1), 254.0 / 255)
+    # TEST  value_fill = float(tfg(pixel_fill).squeeze())
+    # TEST  tfp = tv.transforms.Pad(padding=(1, 1), fill=value_fill)
+
+    # TEST  img_gray = tfg(img[:, :3, :3])
+    # TEST  img_pad = tfp(img_gray)
+
+    oe = OfflineEncoding()
+    encoding = oe.encode(img)
+    stimulus = oe.render()
+    if oe.config["control"]["show_mode"] == "cv2":
+        cv2.waitKey(5000)
+    del oe
+
+# %%