Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
a27d779
Grids -> Grid
hepengfe Jun 7, 2022
a195f51
initialize box center as image center when no indices are given (rela…
hepengfe Jun 7, 2022
1e494c8
Merge branch 'master' into grid_improve
hepengfe Jun 7, 2022
2547fbb
Merge branch 'grid_improve' of https://github.com/feipenghe/forte int…
hepengfe Jun 7, 2022
b366978
fix variable name
hepengfe Jun 7, 2022
b901387
self._check_default_box_center -> self._check_box_center_status
hepengfe Jun 7, 2022
ab63ad8
fix pylint errors
hepengfe Jun 7, 2022
db52114
Merge branch 'master' into grid_improve
hepengfe Jun 8, 2022
854c271
move grid to from top.py to core.py
hepengfe Jun 20, 2022
d7dbbc8
Merge branch 'asyml:master' into grid_improve
hepengfe Jun 22, 2022
e40e396
move grid from top.py to core.py as a data structure and remove grids…
hepengfe Jun 22, 2022
e238385
update grids test
hepengfe Jun 22, 2022
fb8cf49
Merge branch 'grid_improve' of https://github.com/feipenghe/forte int…
hepengfe Jun 22, 2022
3bcc8ab
adjust grid tests
hepengfe Jun 22, 2022
e759466
grid integration in DataStore
hepengfe Jun 22, 2022
210c52c
pylint
hepengfe Jun 22, 2022
222968b
remove grid store in DataStore
hepengfe Jun 27, 2022
4c34bf4
add more docstring and round up grid cell shape computation
hepengfe Jun 27, 2022
2f4b36c
adjust grid test
hepengfe Jun 27, 2022
6b5fff3
improve docstring of grid
hepengfe Jun 28, 2022
ae2f871
add more test cases
hepengfe Jun 28, 2022
79ac80e
fix docstring
hepengfe Jun 28, 2022
a2318a3
Merge branch 'master' into grid_improve
hepengfe Jun 28, 2022
3035cd4
docstring
hepengfe Jun 28, 2022
091e6cc
Merge branch 'grid_improve' of https://github.com/feipenghe/forte int…
hepengfe Jun 28, 2022
803f960
add more property and methods for Box based on three uses cases
hepengfe Jun 29, 2022
e222677
remove code adding grid into DataStore
hepengfe Jun 29, 2022
04323d1
add more tests for Box
hepengfe Jun 29, 2022
5babd23
add more check on grid shape
hepengfe Jun 29, 2022
2881a7e
Merge branch 'master' into grid_improve
hepengfe Jun 29, 2022
0037402
pylint: long line and too many public methods
hepengfe Jun 29, 2022
acdefdb
Fix mypy
hepengfe Jun 29, 2022
fc1d537
Merge branch 'master' into grid_improve
hepengfe Jun 30, 2022
7276830
resolve the merge conflict
hepengfe Jun 30, 2022
0007f78
add test cases for box out of the image
hepengfe Jun 30, 2022
9588ac7
Grids -> Grid
hepengfe Jun 30, 2022
c3b2ba8
rm Grids
hepengfe Jun 30, 2022
c0b3d8c
rm Grids in data store
hepengfe Jun 30, 2022
7203b23
add offset setter, getter and checker
hepengfe Jun 30, 2022
5fd59ca
change ImagePayload adding operation
hepengfe Jun 30, 2022
83083fc
pylint fix
hepengfe Jun 30, 2022
3dacf0d
fix mypy and check conditions only in most basic box property (Box.cx…
hepengfe Jun 30, 2022
deb2a30
pylint
hepengfe Jun 30, 2022
bfed96f
remove unused DataPack attributes
hepengfe Jul 1, 2022
4023891
fix docstring and add upper bound in get_grid_cell_center
hepengfe Jul 1, 2022
5e8c483
Fix docstring
hepengfe Jul 1, 2022
618a974
add more examples for box
hepengfe Jul 1, 2022
e7e0cc4
pylint
hepengfe Jul 1, 2022
7d7d04a
add more docstring
hepengfe Jul 7, 2022
416cbbc
remove unsued variables
hepengfe Jul 7, 2022
90612b7
add docstring for grid
hepengfe Jul 7, 2022
d2b10c0
add specifications for units
hepengfe Jul 7, 2022
d49a319
pylint fix
hepengfe Jul 7, 2022
286428f
Merge branch 'master' into grid_improve
hepengfe Jul 7, 2022
8deb0c9
adjust image payload adding
hepengfe Jul 7, 2022
884cb6f
mypy error
hepengfe Jul 7, 2022
d48b693
add more units
hepengfe Jul 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions forte/data/data_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

import numpy as np
from sortedcontainers import SortedList

from forte.common.exception import (
ProcessExecutionException,
UnknownOntologyClassException,
Expand All @@ -53,7 +52,6 @@
Generics,
AudioAnnotation,
ImageAnnotation,
Grids,
Payload,
)

Expand Down Expand Up @@ -171,7 +169,6 @@ def __init__(self, pack_name: Optional[str] = None):
self._data_store: DataStore = DataStore()
self._entry_converter: EntryConverter = EntryConverter()
self.image_annotations: List[ImageAnnotation] = []
self.grids: List[Grids] = []

self.text_payloads: List[Payload] = []
self.audio_payloads: List[Payload] = []
Expand Down Expand Up @@ -244,7 +241,7 @@ def text(self) -> str:
@property
def audio(self) -> Optional[np.ndarray]:
r"""Return the audio of the data pack"""
return self.get_payload_data_at(Modality.Audio, 0)
return cast(np.ndarray, self.get_payload_data_at(Modality.Audio, 0))

@property
def all_annotations(self) -> Iterator[Annotation]:
Expand Down
2 changes: 0 additions & 2 deletions forte/data/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from forte.data.ontology.top import (
Annotation,
AudioAnnotation,
Grids,
Group,
ImageAnnotation,
Link,
Expand Down Expand Up @@ -775,7 +774,6 @@ def _add_entry_raw(
Group,
Generics,
ImageAnnotation,
Grids,
Payload,
MultiPackLink,
MultiPackGroup,
Expand Down
10 changes: 0 additions & 10 deletions forte/data/entry_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
Generics,
AudioAnnotation,
ImageAnnotation,
Grids,
MultiPackGeneric,
MultiPackGroup,
MultiPackLink,
Expand Down Expand Up @@ -124,15 +123,6 @@ def save_entry_object(
tid=entry.tid,
allow_duplicate=allow_duplicate,
)
elif data_store_ref._is_subclass(entry.entry_type(), Grids):
# Will be deprecated in future
data_store_ref.add_entry_raw(
type_name=entry.entry_type(),
attribute_data=[entry.image_payload_idx, None],
base_class=Grids,
tid=entry.tid,
allow_duplicate=allow_duplicate,
)
elif data_store_ref._is_subclass(entry.entry_type(), MultiPackLink):
data_store_ref.add_entry_raw(
type_name=entry.entry_type(),
Expand Down
222 changes: 221 additions & 1 deletion forte/data/ontology/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from typing import (
Iterable,
Optional,
Tuple,
Type,
Hashable,
TypeVar,
Expand All @@ -33,7 +34,7 @@
overload,
List,
)

import math
import numpy as np

from forte.data.container import ContainerType
Expand All @@ -49,6 +50,7 @@
"FList",
"FNdArray",
"MultiEntry",
"Grid",
]

default_entry_fields = [
Expand Down Expand Up @@ -635,5 +637,223 @@ def index_key(self) -> int:
return self.tid


class Grid:
"""
Regular grid with a grid configuration dependent on the image size.
It is a data structure used to retrieve grid-related objects such as grid
cells from the image. Grid itself doesn't store any data.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the core functionality of a Grid is used to index the image area so that we can easily compute the overlap of any two areas. Let's don't forget this.

Btw, is there any use case you find that we need to retrieve the image inside the grid cell?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think bounding box prediction is usually based on only image on inside a grid cell (anchor box) in YOLO algorithm.

Copy link
Member

@hunterhector hunterhector Jul 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Is it a convention or just a choice of a particular library (YOLO)?
  2. you will need to explain these things to the users. before that, can you make sure you explain this to the team so people understand what you are doing?


Based the image size and the grid shape,
we compute the height and the width of grid cells.
For example, if the image size (image_height,image_width) is (640, 480)
and the grid shape (height, width) is (2, 3)
the size of grid cells (self.c_h, self.c_w) will be (320, 160).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you might want to discuss how we handle the cases where the numbers are indivisible?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed


However, when the image size is not divisible by the grid shape, we round
up the resulting size(floating number) to an integer.
In this way, as each grid
cell taking one more pixel, we make the last grid cell per column and row
size(height and width) to be the remainder of the image size divided by the
grid cell size which is smaller than other grid cell.
For example, if the image
size is (128, 128) and the grid shape is (13, 13), the first 11 grid cells
per column and row will have a size of (10, 10) since 128/13=9.85, so we
round up to 10. The last grid cell per column and row will have a size of
(8, 8) since 128%10=8.


We require each grid to be bounded/intialized with one image size since
the number of different image shapes are limited per computer vision task.
For example, we can only have one image size (640, 480) from a CV dataset,
and we could augment the dataset with few other image sizes
(320, 240), (480, 640). Then there are only three image sizes.
Therefore, it won't be troublesome to
have a grid for each image size, and we can check the image size during the
initialization of the grid.
By contrast, if the grid is totally "free-form"
that we don't initialize it with any
image size and pass the image size directly into the method/operation on
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You said "we don't have to initialize it with any image size", but your __init__ argument requires the image sizes

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rewrote the docstring.

the fly, the API would be more complex and image size check would be
repeated everytime the method is called.

Args:
height: the number of grid cell per column, the unit is one grid cell.
width: the number of grid cell per row, the unit is one grid cell.
image_height: the number of pixels per column in the image.
image_width: the number of pixels per row in the image.
"""

def __init__(
self,
height: int,
width: int,
image_height: int,
image_width: int,
):
if image_height <= 0 or image_width <= 0:
raise ValueError(
"both image height and width must be positive"
f"but the image shape is {(image_height, image_width)}"
"please input a valid image shape"
)
if height <= 0 or width <= 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't we also check for image_height

raise ValueError(
f"height({height}) and "
f"width({width}) both must be larger than 0"
)
if height >= image_height or width >= image_width:
raise ValueError(
"Grid height and width must be smaller than image height and width"
)

self._height = height
self._width = width

self._image_height = image_height
self._image_width = image_width

# if the resulting size of grid is not an integer, we round it up.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to go into the docstring, users need to know.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rewrote the docstring.

# The last grid cell per row and column might be out of the image size
# since we constrain the maximum pixel locations by the image size
self.c_h, self.c_w = (
math.ceil(image_height / self._height),
math.ceil(image_width / self._width),
)

if self.c_h <= 0 or self.c_w <= 0:
raise ValueError(
"cell height and width must be positive"
f"but the cell shape is {(self.c_h, self.c_w)}"
"please adjust image shape or grid shape accordingly"
)

def get_grid_cell(self, img_arr: np.ndarray, h_idx: int, w_idx: int):
"""
Get the array data of a grid cell from image of the image payload index.
The array is a masked version of the original image, and it has
the same size as the original image. The array entries that are not
within the grid cell will masked as zeros. The image array entries that
are within the grid cell will kept.

Note: all indices are zero-based and counted from top left corner of
the image.

Args:
img_arr: image data represented as a numpy array.
h_idx: the zero-based height(row) index of the grid cell in the
grid, the unit is one grid cell.
w_idx: the zero-based width(column) index of the grid cell in the
grid, the unit is one grid cell.

Raises:
ValueError: ``h_idx`` is out of the range specified by ``height``.
ValueError: ``w_idx`` is out of the range specified by ``width``.

Returns:
numpy array that represents the grid cell.
"""
if not 0 <= h_idx < self._height:
raise ValueError(
f"input parameter h_idx ({h_idx}) is"
"out of scope of h_idx range"
f" {(0, self._height)}"
)
if not 0 <= w_idx < self._width:
raise ValueError(
f"input parameter w_idx ({w_idx}) is"
"out of scope of w_idx range"
f" {(0, self._width)}"
)
# initialize a numpy zeros array
array = np.zeros((self._image_height, self._image_width))
# set grid cell entry values to the values of the original image array
# (entry values outside of grid cell remain zeros)
# An example of computing grid height index range is
# index * cell height : min((index + 1) * cell height, image_height).
# It's similar for computing cell width index range
# Plus, we constrain the maximum pixel locations by the image size as
# the last grid cell per row and column might be out of the image size
array[
h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
] = img_arr[
h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would the behavior related to the residual (incomplete) grids on the edge?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the residual grid cell (index range) will be smaller than other regular ones.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needed to be explained in the docstring clearly.

return array

def get_grid_cell_center(self, h_idx: int, w_idx: int) -> Tuple[int, int]:
"""
Get the center pixel position of the grid cell at the specific height
index and width index in the ``Grid``.
The computation of the center position of the grid cell is
dividing the grid cell height range (unit: pixel) and
width range (unit: pixel) by 2 (round down)

Suppose an extreme case that a grid cell has a height range
(unit: pixel) of (0, 3)
and a width range (unit: pixel) of (0, 3) the grid cell center
would be (1, 1).
Since the grid cell size is usually very large,
the offset of the grid cell center is minor.

Note: all indices are zero-based and counted from top left corner of
the grid.

Args:
h_idx: the height(row) index of the grid cell in the grid,
the unit is one grid cell.
w_idx: the width(column) index of the grid cell in the
grid, the unit is one grid cell.

Returns:
A tuple of (y index, x index)
"""

return (
(h_idx * self.c_h + min((h_idx + 1) * self.c_h, self._image_height))
// 2,
(w_idx * self.c_w + min((w_idx + 1) * self.c_w, self._image_width))
// 2,
)

@property
def num_grid_cells(self):
return self._height * self._width

@property
def height(self):
return self._height

@property
def width(self):
return self._width

def __repr__(self):
return str(
(self._height, self._width, self._image_height, self._image_width)
)

def __eq__(self, other):
if other is None:
return False
return (
self._height,
self._width,
self._image_height,
self._image_width,
) == (
other._height,
other._width,
other.image_height,
other.image_width,
)

def __hash__(self):
return hash(
(self._height, self._width, self._image_height, self._image_width)
)


GroupType = TypeVar("GroupType", bound=BaseGroup)
LinkType = TypeVar("LinkType", bound=BaseLink)
Loading