Skip to content

Commit 53afdf2

Browse files
authored
Merge pull request Oslandia#88 from Oslandia/tanz
WIP: Tanzania dataset integration
2 parents fa7f70e + c1609b3 commit 53afdf2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+6190
-763
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ deeposlandia/webapp/static/aerial
88
deeposlandia/webapp/static/shapes
99
deeposlandia/webapp/static/mapillary
1010
deeposlandia/webapp/static/mapillary_agg
11+
deeposlandia/webapp/static/tanzania
1112
deeposlandia/webapp/static/predicted
1213
deeposlandia/webapp/static/bower/
1314
deeposlandia.egg-info/

README.md

+21
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,23 @@ this image from this dataset is depicted below.
8888

8989
![Example of image, with labels and predictions](./images/aerial_prediction_example.png)
9090

91+
## Open AI Tanzania
92+
93+
This dataset comes from
94+
the
95+
[Tanzania challenge](https://blog.werobotics.org/2018/08/06/welcome-to-the-open-ai-tanzania-challenge/),
96+
that took place at the autumn 2018. The dataset contains 13 labelled images (2
97+
of them were assigned to validation in this project), and 9 additional images
98+
for testing purpose. The image resolution is very high (6~8 cm per pixel), that
99+
allowing a fine data preprocessing step.
100+
101+
In such a dataset, one tries to automatically detect building footprints by
102+
distinguishing complete buildings, incomplete buildings and foudations.
103+
104+
![Example of image, with labels and predictions](./images/tanzania_prediction_example.png)
105+
106+
**(:warning: model training in processing... :-) )**
107+
91108
## Shapes
92109

93110
To complete the project, and make the test easier, a randomly-generated shape
@@ -97,6 +114,10 @@ rectangle, one circle and/or one triangle per image, or neither of them. Their
97114
location into each image is randomly generated (they just can't be too close to
98115
image borders). The shape and background colors are randomly generated as well.
99116

117+
## How to add a new dataset?
118+
119+
If you want to contribute to the repo by adding a new dataset, please consult the [following instructions](./deeposlandia/add_a_dataset.md).
120+
100121
# Flask application
101122

102123
A Flask Web application may be launched locally through

config.ini.sample

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ shapes = /path/to/shape/dataset/
77
mapillary = /path/to/mapillary/dataset/
88
mapillary_agg = /path/to/agregated/mapillary/dataset/
99
aerial = /path/to/aerial/dataset/
10+
tanzania = /path/to/tanzania/dataset/
1011

1112
[folder]
1213
project_folder = /path/to/static/files/

deeposlandia/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
11
"""Deeposlandia package
22
"""
33

4+
import logging
5+
6+
import daiquiri
7+
48
__version__ = '0.4'
9+
10+
daiquiri.setup(level=logging.INFO,outputs=(
11+
daiquiri.output.Stream(formatter=daiquiri.formatter.ColorFormatter(
12+
fmt=("%(asctime)s :: %(levelname)s :: %(module)s :: "
13+
"%(funcName)s : %(color)s%(message)s%(color_stop)s"))),
14+
))
15+
logger = daiquiri.getLogger("root")

deeposlandia/datagen.py

+26-12
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,19 @@
1717
import os
1818
import sys
1919

20+
import daiquiri
2021
import pandas as pd
2122

2223
from deeposlandia import utils
23-
from deeposlandia.dataset import AerialDataset, MapillaryDataset, ShapeDataset
24+
from deeposlandia.datasets import AVAILABLE_DATASETS
25+
from deeposlandia.datasets.mapillary import MapillaryDataset
26+
from deeposlandia.datasets.aerial import AerialDataset
27+
from deeposlandia.datasets.shapes import ShapeDataset
28+
from deeposlandia.datasets.tanzania import TanzaniaDataset
29+
30+
31+
logger = daiquiri.getLogger(__name__)
32+
2433

2534
def add_instance_arguments(parser):
2635
"""Add instance-specific arguments from the command line
@@ -38,8 +47,9 @@ def add_instance_arguments(parser):
3847
parser.add_argument('-a', '--aggregate-label', action='store_true',
3948
help="Aggregate labels with respect to their categories")
4049
parser.add_argument('-D', '--dataset',
41-
required=True,
42-
help="Dataset type (either mapillary, shapes or aerial)")
50+
required=True, choices=AVAILABLE_DATASETS,
51+
help=("Dataset type (to be chosen amongst available"
52+
"datasets)"))
4353
parser.add_argument('-p', '--datapath',
4454
default="data",
4555
help="Relative path towards data directory")
@@ -95,18 +105,22 @@ def add_instance_arguments(parser):
95105
train_dataset = AerialDataset(args.image_size)
96106
validation_dataset = AerialDataset(args.image_size)
97107
test_dataset = AerialDataset(args.image_size)
108+
elif args.dataset == "tanzania":
109+
train_dataset = TanzaniaDataset(args.image_size)
110+
validation_dataset = TanzaniaDataset(args.image_size)
111+
test_dataset = TanzaniaDataset(args.image_size)
98112
else:
99-
utils.logger.error("Unsupported dataset type. Please choose "
100-
"'mapillary', 'shapes' or 'aerial'")
113+
logger.error("Unsupported dataset type. Please choose amongst %s"
114+
% AVAILABLE_DATASETS)
101115
sys.exit(1)
102116

103117
# Dataset populating/loading (depends on the existence of a specification file)
104118
if os.path.isfile(prepro_folder["training_config"]):
105119
train_dataset.load(prepro_folder["training_config"],
106120
args.nb_training_image)
107121
else:
108-
utils.logger.info(("No existing configuration file for this dataset. Create {}"
109-
"").format(prepro_folder["training_config"]))
122+
logger.info(("No existing configuration file for this dataset. "
123+
"Create %s." % prepro_folder['training_config']))
110124
input_image_dir = os.path.join(input_folder, "training")
111125
train_dataset.populate(prepro_folder["training"], input_image_dir,
112126
nb_images=args.nb_training_image,
@@ -117,8 +131,8 @@ def add_instance_arguments(parser):
117131
validation_dataset.load(prepro_folder["validation_config"],
118132
args.nb_validation_image)
119133
else:
120-
utils.logger.info(("No existing configuration file for this dataset. Create {}"
121-
"").format(prepro_folder["validation_config"]))
134+
logger.info(("No existing configuration file for this dataset. "
135+
"Create %s." % prepro_folder['validation_config']))
122136
input_image_dir = os.path.join(input_folder, "validation")
123137
validation_dataset.populate(prepro_folder["validation"],
124138
input_image_dir,
@@ -129,8 +143,8 @@ def add_instance_arguments(parser):
129143
if os.path.isfile(prepro_folder["testing_config"]):
130144
test_dataset.load(prepro_folder["testing_config"], args.nb_testing_image)
131145
else:
132-
utils.logger.info(("No existing configuration file for this dataset. Create {}"
133-
"").format(prepro_folder["testing_config"]))
146+
logger.info(("No existing configuration file for this dataset. "
147+
"Create %s." % prepro_folder['testing_config']))
134148
input_image_dir = os.path.join(input_folder, "testing")
135149
test_dataset.populate(prepro_folder["testing"],
136150
input_image_dir,
@@ -141,5 +155,5 @@ def add_instance_arguments(parser):
141155

142156
glossary = pd.DataFrame(train_dataset.labels)
143157
glossary["popularity"] = train_dataset.get_label_popularity()
144-
utils.logger.info("Data glossary:\n{}".format(glossary))
158+
logger.info("Data glossary:\n%s" % glossary)
145159
sys.exit(0)

0 commit comments

Comments
 (0)