Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DOTA dataset #2551

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,10 @@ Digital Typhoon

.. autoclass:: DigitalTyphoon

DOTA
^^^^
.. autoclass:: DOTA

ETCI2021 Flood Detection
^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`DeepGlobe Land Cover`_,S,DigitalGlobe +Vivid,-,803,7,"2,448x2,448",0.5,RGB
`DFC2022`_,S,Aerial,"CC-BY-4.0","3,981",15,"2,000x2,000",0.5,RGB
`Digital Typhoon`_,"C, R",Himawari,"CC-BY-4.0","189,364",8,512,5000,Infrared
`DOTA`_,OD,"Google Earth, Gaofen-2, Jilin-1","CC-BY-NC-4.0","5,229",15,"varying","varying",RGB
`ETCI2021 Flood Detection`_,S,Sentinel-1,-,"66,810",2,256x256,5--20,SAR
`EuroSAT`_,C,Sentinel-2,"MIT","27,000",10,64x64,10,MSI
`FAIR1M`_,OD,Gaofen/Google Earth,"CC-BY-NC-SA-3.0","15,000",37,"1,024x1,024",0.3--0.8,RGB
Expand Down
146 changes: 146 additions & 0 deletions tests/data/dota/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil
import tarfile
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image


def create_dummy_image(path: Path, size: tuple[int, int] = (64, 64)) -> None:
"""Create small dummy image."""
img = np.random.randint(0, 255, (*size, 3), dtype=np.uint8)
Image.fromarray(img).save(path)


def create_annotation_file(
path: Path, is_hbb: bool = False, no_boxes: bool = False
) -> None:
"""Create dummy annotation file with scaled coordinates."""
if is_hbb:
# Horizontal boxes scaled for 64x64
boxes = [
'10.0 10.0 20.0 10.0 20.0 20.0 10.0 20.0 plane 0\n',
'30.0 30.0 40.0 30.0 40.0 40.0 30.0 40.0 ship 0\n',
]
else:
# Oriented boxes scaled for 64x64
boxes = [
'10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0\n',
'30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0\n',
]

if no_boxes:
boxes = []

with open(path, 'w') as f:
f.write('imagesource:dummy\n')
f.write('gsd:1.0\n')
f.writelines(boxes)


def create_test_data(root: Path) -> None:
"""Create DOTA test dataset."""
splits = ['train', 'val']
versions = ['1.0', '2.0']

# Create directory structure
for split in splits:
num_samples = 3 if split == 'train' else 2

if os.path.exists(root / split):
shutil.rmtree(root / split)
for version in versions:
# Create images and annotations
for i in range(num_samples):
img_name = f'P{version[0]}_{i:04d}.png'
ann_name = f'P{version[0]}_{i:04d}.txt'

# Create directories
(root / split / 'images').mkdir(parents=True, exist_ok=True)
(root / split / 'annotations').mkdir(parents=True, exist_ok=True)
if version == '2.0':
(root / split / 'annotations_hbb').mkdir(
parents=True, exist_ok=True
)

# Create files
if i == 0:
no_boxes = True
else:
no_boxes = False
create_dummy_image(root / split / 'images' / img_name)
create_annotation_file(
root / split / 'annotations' / ann_name, False, no_boxes
)
if version == '2.0':
create_annotation_file(
root / split / 'annotations_hbb' / ann_name, True, no_boxes
)

# Create tar archives
for type_ in ['images', 'annotations']:
src_dir = root / split / type_
if src_dir.exists():
tar_name = f'dotav{version[0]}_{type_}_{split}.tar.gz'
with tarfile.open(root / tar_name, 'w:gz') as tar:
tar.add(src_dir, arcname=f'{split}/{type_}')

# print md5sums
def md5(fname: str) -> str:
hash_md5 = hashlib.md5()
with open(fname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)
return hash_md5.hexdigest()

for version in versions:
for type_ in ['images', 'annotations']:
for split in splits:
tar_name = f'dotav{version[0]}_{type_}_{split}.tar.gz'
print(f'{tar_name} md5: {md5(tar_name)}')


def create_sample_df(root: Path) -> pd.DataFrame:
"""Create sample DataFrame for test data."""
rows = []
splits = ['train', 'val']
versions = ['1.0', '2.0']

for split in splits:
num_samples = 3 if split == 'train' else 2
for version in versions:
for i in range(num_samples):
img_name = f'P{version[0]}_{i:04d}.png'
ann_name = f'P{version[0]}_{i:04d}.txt'

row = {
'image_path': str(Path(split) / 'images' / img_name),
'annotation_path': str(Path(split) / 'annotations' / ann_name),
'split': split,
'version': version,
}

if version == '2.0':
row['annotation_hbb_path'] = str(
Path(split) / 'annotations_hbb' / ann_name
)
else:
row['annotation_hbb_path'] = None

rows.append(row)

df = pd.DataFrame(rows)
df.to_parquet(root / 'samples.parquet')
return df


if __name__ == '__main__':
root = Path('.')
create_test_data(root)
df = create_sample_df(root)
Binary file added tests/data/dota/dotav1_annotations_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1_annotations_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1_images_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1_images_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2_annotations_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2_annotations_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2_images_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2_images_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/samples.parquet
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/data/dota/train/annotations/P1_0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/P1_0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/P1_0002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/train/annotations/P2_0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/P2_0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/P2_0002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/train/annotations_hbb/P2_0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations_hbb/P2_0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 10.0 20.0 20.0 10.0 20.0 plane 0
30.0 30.0 40.0 30.0 40.0 40.0 30.0 40.0 ship 0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations_hbb/P2_0002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 10.0 20.0 20.0 10.0 20.0 plane 0
30.0 30.0 40.0 30.0 40.0 40.0 30.0 40.0 ship 0
Binary file added tests/data/dota/train/images/P1_0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P1_0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P1_0002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P2_0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P2_0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P2_0002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions tests/data/dota/val/annotations/P1_0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/val/annotations/P1_0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/val/annotations/P2_0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/val/annotations/P2_0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/val/annotations_hbb/P2_0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/val/annotations_hbb/P2_0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 10.0 20.0 20.0 10.0 20.0 plane 0
30.0 30.0 40.0 30.0 40.0 40.0 30.0 40.0 ship 0
Binary file added tests/data/dota/val/images/P1_0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/val/images/P1_0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/val/images/P2_0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/val/images/P2_0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
157 changes: 157 additions & 0 deletions tests/datasets/test_dota.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from itertools import product
from pathlib import Path

import matplotlib.pyplot as plt
import pytest
import torch
import torch.nn as nn
from _pytest.fixtures import SubRequest
from pytest import MonkeyPatch

from torchgeo.datasets import DOTA, DatasetNotFoundError

pytest.importorskip('pyarrow')


class TestDOTA:
@pytest.fixture(params=product(['train', 'val'], ['1.0', '2.0']))
def dataset(
self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
) -> DOTA:
url = os.path.join('tests', 'data', 'dota', '{}')
monkeypatch.setattr(DOTA, 'url', url)

file_info = {
'train': {
'images': {
'1.0': {
'filename': 'dotav1_images_train.tar.gz',
'md5': '14296c11c897cb7718558815a2b1bf69',
},
'2.0': {
'filename': 'dotav2_images_train.tar.gz',
'md5': 'fc80227b1f9b99cf5a7a3d0c5798efd0',
},
},
'annotations': {
'1.0': {
'filename': 'dotav1_annotations_train.tar.gz',
'md5': '805dc01688e00895a594c637569a2e1a',
},
'2.0': {
'filename': 'dotav2_annotations_train.tar.gz',
'md5': '723bceb26bc52a5de45902fada335c36',
},
},
},
'val': {
'images': {
'1.0': {
'filename': 'dotav1_images_val.tar.gz',
'md5': 'a95acf48281b7fc800666974730aeffd',
},
'2.0': {
'filename': 'dotav2_images_val.tar.gz',
'md5': '7c4ebb3317f970b26de273cd7313d46f',
},
},
'annotations': {
'1.0': {
'filename': 'dotav1_annotations_val.tar.gz',
'md5': '435a4a77c62eff955dd30a1b2a13894f',
},
'2.0': {
'filename': 'dotav2_annotations_val.tar.gz',
'md5': '86b629c6c8a1d924841d34de4eeb87ec',
},
},
},
}
monkeypatch.setattr(DOTA, 'file_info', file_info)

root = tmp_path
split, version = request.param
if version == '2.0':
bbox_orientation = 'obb'
else:
bbox_orientation = 'hbb'

transforms = nn.Identity()

return DOTA(
root,
split,
version=version,
bbox_orientation=bbox_orientation,
transforms=transforms,
download=True,
checksum=True,
)

def test_getitem(self, dataset: DOTA) -> None:
for i in range(len(dataset)):
x = dataset[i]
assert isinstance(x, dict)
assert isinstance(x['image'], torch.Tensor)
assert isinstance(x['labels'], torch.Tensor)
assert isinstance(x['boxes'], torch.Tensor)

if dataset.bbox_orientation == 'obb':
assert x['boxes'].shape[1] == 8
else:
assert x['boxes'].shape[1] == 4

assert x['labels'].shape[0] == x['boxes'].shape[0]

def test_len(self, dataset: DOTA) -> None:
if dataset.split == 'train':
assert len(dataset) == 3
else:
assert len(dataset) == 2

def test_already_downloaded(self, dataset: DOTA) -> None:
DOTA(root=dataset.root, download=True)

def test_not_yet_extracted(self, tmp_path: Path) -> None:
files = [
'dotav1_images_train.tar.gz',
'dotav1_annotations_train.tar.gz',
'dotav1_images_val.tar.gz',
'dotav1_annotations_val.tar.gz',
'dotav2_images_train.tar.gz',
'dotav2_annotations_train.tar.gz',
'dotav2_images_val.tar.gz',
'dotav2_annotations_val.tar.gz',
'samples.parquet',
]
for path in files:
shutil.copyfile(
os.path.join('tests', 'data', 'dota', path),
os.path.join(str(tmp_path), path),
)

DOTA(root=tmp_path)

def test_invalid_split(self) -> None:
with pytest.raises(AssertionError):
DOTA(split='foo')

def test_corrupted(self, tmp_path: Path) -> None:
with open(os.path.join(tmp_path, 'dotav1_images_train.tar.gz'), 'w') as f:
f.write('bad')
with pytest.raises(RuntimeError, match='Archive'):
DOTA(root=tmp_path, checksum=True)

def test_not_downloaded(self, tmp_path: Path) -> None:
with pytest.raises(DatasetNotFoundError, match='Dataset not found'):
DOTA(tmp_path)

def test_plot(self, dataset: DOTA) -> None:
x = dataset[1]
dataset.plot(x, suptitle='Test')
plt.close()
Loading