asyml · hepengfe · Jun 7, 2022 · Jun 7, 2022 · Jun 7, 2022 · Jun 7, 2022
diff --git a/forte/data/data_pack.py b/forte/data/data_pack.py
@@ -32,7 +32,6 @@
 
 import numpy as np
 from sortedcontainers import SortedList
-
 from forte.common.exception import (
     ProcessExecutionException,
     UnknownOntologyClassException,
@@ -53,7 +52,6 @@
     Generics,
     AudioAnnotation,
     ImageAnnotation,
-    Grids,
     Payload,
 )
 
@@ -171,7 +169,6 @@ def __init__(self, pack_name: Optional[str] = None):
         self._data_store: DataStore = DataStore()
         self._entry_converter: EntryConverter = EntryConverter()
         self.image_annotations: List[ImageAnnotation] = []
-        self.grids: List[Grids] = []
 
         self.text_payloads: List[Payload] = []
         self.audio_payloads: List[Payload] = []
@@ -244,7 +241,7 @@ def text(self) -> str:
     @property
     def audio(self) -> Optional[np.ndarray]:
         r"""Return the audio of the data pack"""
-        return self.get_payload_data_at(Modality.Audio, 0)
+        return cast(np.ndarray, self.get_payload_data_at(Modality.Audio, 0))
 
     @property
     def all_annotations(self) -> Iterator[Annotation]:

diff --git a/forte/data/data_store.py b/forte/data/data_store.py
@@ -28,7 +28,6 @@
 from forte.data.ontology.top import (
     Annotation,
     AudioAnnotation,
-    Grids,
     Group,
     ImageAnnotation,
     Link,
@@ -775,7 +774,6 @@ def _add_entry_raw(
             Group,
             Generics,
             ImageAnnotation,
-            Grids,
             Payload,
             MultiPackLink,
             MultiPackGroup,

diff --git a/forte/data/entry_converter.py b/forte/data/entry_converter.py
@@ -24,7 +24,6 @@
     Generics,
     AudioAnnotation,
     ImageAnnotation,
-    Grids,
     MultiPackGeneric,
     MultiPackGroup,
     MultiPackLink,
@@ -124,15 +123,6 @@ def save_entry_object(
                 tid=entry.tid,
                 allow_duplicate=allow_duplicate,
             )
-        elif data_store_ref._is_subclass(entry.entry_type(), Grids):
-            # Will be deprecated in future
-            data_store_ref.add_entry_raw(
-                type_name=entry.entry_type(),
-                attribute_data=[entry.image_payload_idx, None],
-                base_class=Grids,
-                tid=entry.tid,
-                allow_duplicate=allow_duplicate,
-            )
         elif data_store_ref._is_subclass(entry.entry_type(), MultiPackLink):
             data_store_ref.add_entry_raw(
                 type_name=entry.entry_type(),

diff --git a/forte/data/ontology/core.py b/forte/data/ontology/core.py
@@ -23,6 +23,7 @@
 from typing import (
     Iterable,
     Optional,
+    Tuple,
     Type,
     Hashable,
     TypeVar,
@@ -33,7 +34,7 @@
     overload,
     List,
 )
-
+import math
 import numpy as np
 
 from forte.data.container import ContainerType
@@ -49,6 +50,7 @@
     "FList",
     "FNdArray",
     "MultiEntry",
+    "Grid",
 ]
 
 default_entry_fields = [
@@ -635,5 +637,223 @@ def index_key(self) -> int:
         return self.tid
 
 
+class Grid:
+    """
+    Regular grid with a grid configuration dependent on the image size.
+    It is a data structure used to retrieve grid-related objects such as grid
+    cells from the image. Grid itself doesn't store any data.
+
+    Based the image size and the grid shape,
+    we compute the height and the width of grid cells.
+    For example, if the image size (image_height,image_width) is (640, 480)
+    and the grid shape (height, width) is (2, 3)
+    the size of grid cells (self.c_h, self.c_w) will be (320, 160).
+
+    However, when the image size is not divisible by the grid shape, we round
+    up the resulting size(floating number) to an integer.
+    In this way, as each grid
+    cell taking one more pixel, we make the last grid cell per column and row
+    size(height and width) to be the remainder of the image size divided by the
+    grid cell size which is smaller than other grid cell.
+    For example, if the image
+    size is (128, 128) and the grid shape is (13, 13), the first 11 grid cells
+    per column and row will have a size of (10, 10) since 128/13=9.85, so we
+    round up to 10. The last grid cell per column and row will have a size of
+    (8, 8) since 128%10=8.
+
+
+    We require each grid to be bounded/intialized with one image size since
+    the number of different image shapes are limited per computer vision task.
+    For example, we can only have one image size (640, 480) from a CV dataset,
+    and we could augment the dataset with few other image sizes
+    (320, 240), (480, 640). Then there are only three image sizes.
+    Therefore, it won't be troublesome to
+    have a grid for each image size, and we can check the image size during the
+    initialization of the grid.
+    By contrast, if the grid is totally "free-form"
+    that we don't initialize it with any
+    image size and pass the image size directly into the method/operation on
+    the fly, the API would be more complex and image size check would be
+    repeated everytime the method is called.
+
+    Args:
+        height: the number of grid cell per column, the unit is one grid cell.
+        width: the number of grid cell per row, the unit is one grid cell.
+        image_height: the number of pixels per column in the image.
+        image_width: the number of pixels per row in the image.
+    """
+
+    def __init__(
+        self,
+        height: int,
+        width: int,
+        image_height: int,
+        image_width: int,
+    ):
+        if image_height <= 0 or image_width <= 0:
+            raise ValueError(
+                "both image height and width must be positive"
+                f"but the image shape is {(image_height, image_width)}"
+                "please input a valid image shape"
+            )
+        if height <= 0 or width <= 0:
+            raise ValueError(
+                f"height({height}) and "
+                f"width({width}) both must be larger than 0"
+            )
+        if height >= image_height or width >= image_width:
+            raise ValueError(
+                "Grid height and width must be smaller than image height and width"
+            )
+
+        self._height = height
+        self._width = width
+
+        self._image_height = image_height
+        self._image_width = image_width
+
+        # if the resulting size of grid is not an integer, we round it up.
+        # The last grid cell per row and column might be out of the image size
+        # since we constrain the maximum pixel locations by the image size
+        self.c_h, self.c_w = (
+            math.ceil(image_height / self._height),
+            math.ceil(image_width / self._width),
+        )
+
+        if self.c_h <= 0 or self.c_w <= 0:
+            raise ValueError(
+                "cell height and width must be positive"
+                f"but the cell shape is {(self.c_h, self.c_w)}"
+                "please adjust image shape or grid shape accordingly"
+            )
+
+    def get_grid_cell(self, img_arr: np.ndarray, h_idx: int, w_idx: int):
+        """
+        Get the array data of a grid cell from image of the image payload index.
+        The array is a masked version of the original image, and it has
+        the same size as the original image. The array entries that are not
+        within the grid cell will masked as zeros. The image array entries that
+        are within the grid cell will kept.
+
+        Note: all indices are zero-based and counted from top left corner of
+        the image.
+
+        Args:
+            img_arr: image data represented as a numpy array.
+            h_idx: the zero-based height(row) index of the grid cell in the
+                grid, the unit is one grid cell.
+            w_idx: the zero-based width(column) index of the grid cell in the
+                grid, the unit is one grid cell.
+
+        Raises:
+            ValueError: ``h_idx`` is out of the range specified by ``height``.
+            ValueError: ``w_idx`` is out of the range specified by ``width``.
+
+        Returns:
+            numpy array that represents the grid cell.
+        """
+        if not 0 <= h_idx < self._height:
+            raise ValueError(
+                f"input parameter h_idx ({h_idx}) is"
+                "out of scope of h_idx range"
+                f" {(0, self._height)}"
+            )
+        if not 0 <= w_idx < self._width:
+            raise ValueError(
+                f"input parameter w_idx ({w_idx}) is"
+                "out of scope of w_idx range"
+                f" {(0, self._width)}"
+            )
+        # initialize a numpy zeros array
+        array = np.zeros((self._image_height, self._image_width))
+        # set grid cell entry values to the values of the original image array
+        # (entry values outside of grid cell remain zeros)
+        # An example of computing grid height index range is
+        # index * cell height : min((index + 1) * cell height, image_height).
+        # It's similar for computing cell width index range
+        # Plus, we constrain the maximum pixel locations by the image size as
+        # the last grid cell per row and column might be out of the image size
+        array[
+            h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
+            w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
+        ] = img_arr[
+            h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
+            w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
+        ]
+        return array
+
+    def get_grid_cell_center(self, h_idx: int, w_idx: int) -> Tuple[int, int]:
+        """
+        Get the center pixel position of the grid cell at the specific height
+        index and width index in the ``Grid``.
+        The computation of the center position of the grid cell is
+        dividing the grid cell height range (unit: pixel) and
+        width range (unit: pixel) by 2 (round down)
+
+        Suppose an extreme case that a grid cell has a height range
+        (unit: pixel) of (0, 3)
+        and a width range (unit: pixel) of (0, 3) the grid cell center
+        would be (1, 1).
+        Since the grid cell size is usually very large,
+        the offset of the grid cell center is minor.
+
+        Note: all indices are zero-based and counted from top left corner of
+        the grid.
+
+        Args:
+            h_idx: the height(row) index of the grid cell in the grid,
+                the unit is one grid cell.
+            w_idx: the width(column) index of the grid cell in the
+                grid, the unit is one grid cell.
+
+        Returns:
+            A tuple of (y index, x index)
+        """
+
+        return (
+            (h_idx * self.c_h + min((h_idx + 1) * self.c_h, self._image_height))
+            // 2,
+            (w_idx * self.c_w + min((w_idx + 1) * self.c_w, self._image_width))
+            // 2,
+        )
+
+    @property
+    def num_grid_cells(self):
+        return self._height * self._width
+
+    @property
+    def height(self):
+        return self._height
+
+    @property
+    def width(self):
+        return self._width
+
+    def __repr__(self):
+        return str(
+            (self._height, self._width, self._image_height, self._image_width)
+        )
+
+    def __eq__(self, other):
+        if other is None:
+            return False
+        return (
+            self._height,
+            self._width,
+            self._image_height,
+            self._image_width,
+        ) == (
+            other._height,
+            other._width,
+            other.image_height,
+            other.image_width,
+        )
+
+    def __hash__(self):
+        return hash(
+            (self._height, self._width, self._image_height, self._image_width)
+        )
+
+
 GroupType = TypeVar("GroupType", bound=BaseGroup)
 LinkType = TypeVar("LinkType", bound=BaseLink)