Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ocrd cli #33

Merged
merged 20 commits into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ocrd-tool.json
27 changes: 13 additions & 14 deletions qurator/eynollah/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,19 @@ def main(
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
sys.exit(1)
eynollah = Eynollah(
image,
None,
out,
model,
save_images,
save_layout,
save_deskewed,
save_all,
enable_plotting,
allow_enhancement,
curved_line,
full_layout,
allow_scaling,
headers_off,
image_filename=image,
dir_out=out,
dir_models=model,
dir_of_cropped_images=save_images,
dir_of_layout=save_layout,
dir_of_deskewed=save_deskewed,
dir_of_all=save_all,
enable_plotting=enable_plotting,
allow_enhancement=allow_enhancement,
curved_line=curved_line,
full_layout=full_layout,
allow_scaling=allow_scaling,
headers_off=headers_off,
)
pcgts = eynollah.run()
eynollah.writer.write_pagexml(pcgts)
Expand Down
58 changes: 32 additions & 26 deletions qurator/eynollah/eynollah.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
order_of_regions,
find_number_of_columns_in_document,
return_boxes_of_images_by_order_of_reading_new)
from .utils.pil_cv2 import check_dpi
from .utils.pil_cv2 import check_dpi, pil2cv
from .utils.xml import order_and_id_of_texts
from .plot import EynollahPlotter
from .writer import EynollahXmlWriter
Expand All @@ -79,10 +79,11 @@
class Eynollah:
def __init__(
self,
image_filename,
image_filename_stem,
dir_out,
dir_models,
image_filename,
image_pil=None,
image_filename_stem=None,
dir_out=None,
dir_of_cropped_images=None,
dir_of_layout=None,
dir_of_deskewed=None,
Expand All @@ -92,30 +93,35 @@ def __init__(
curved_line=False,
full_layout=False,
allow_scaling=False,
headers_off=False
headers_off=False,
override_dpi=None,
logger=None,
pcgts=None,
):
if image_pil:
self._imgs = self._cache_images(image_pil=image_pil)
else:
self._imgs = self._cache_images(image_filename=image_filename)
self.image_filename = image_filename
self.dir_out = dir_out
self.image_filename_stem = image_filename_stem
self.allow_enhancement = allow_enhancement
self.curved_line = curved_line
self.full_layout = full_layout
self.allow_scaling = allow_scaling
self.headers_off = headers_off
if not self.image_filename_stem:
self.image_filename_stem = Path(Path(image_filename).name).stem
self.override_dpi = override_dpi
self.plotter = None if not enable_plotting else EynollahPlotter(
dir_of_all=dir_of_all,
dir_of_deskewed=dir_of_deskewed,
dir_of_cropped_images=dir_of_cropped_images,
dir_of_layout=dir_of_layout,
image_filename=image_filename,
image_filename_stem=self.image_filename_stem)
image_filename_stem=Path(Path(image_filename).name).stem)
self.writer = EynollahXmlWriter(
dir_out=self.dir_out,
image_filename=self.image_filename,
curved_line=self.curved_line)
self.logger = getLogger('eynollah')
curved_line=self.curved_line,
pcgts=pcgts)
self.logger = logger if logger else getLogger('eynollah')
self.dir_models = dir_models

self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5"
Expand All @@ -128,24 +134,26 @@ def __init__(
self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5"
self.model_textline_dir = dir_models + "/model_textline_newspapers.h5"

self._imgs = {}
def _cache_images(self, image_filename=None, image_pil=None):
ret = {}
if image_filename:
ret['img'] = cv2.imread(image_filename)
else:
ret['img'] = pil2cv(image_pil)
ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
for prefix in ('', '_grayscale'):
ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
return ret

def imread(self, grayscale=False, uint8=True):
key = 'img'
if grayscale:
key += '_grayscale'
if uint8:
key += '_uint8'
if key not in self._imgs:
if grayscale:
img = cv2.imread(self.image_filename, cv2.IMREAD_GRAYSCALE)
else:
img = cv2.imread(self.image_filename)
if uint8:
img = img.astype(np.uint8)
self._imgs[key] = img
return self._imgs[key].copy()


def predict_enhancement(self, img):
self.logger.debug("enter predict_enhancement")
model_enhancement, session_enhancement = self.start_new_session_and_model(self.model_dir_of_enhancement)
Expand Down Expand Up @@ -346,10 +354,9 @@ def resize_image_with_column_classifier(self, is_image_enhanced):

def resize_and_enhance_image_with_column_classifier(self):
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
try:
dpi = check_dpi(self.image_filename)
except:
dpi = 230
if self.override_dpi:
return self.override_dpi
dpi = check_dpi(self.imread())
self.logger.info("Detected %s DPI", dpi)
img = self.imread()

Expand Down Expand Up @@ -1446,7 +1453,6 @@ def run_enhancement(self):
scale = 1
if is_image_enhanced:
if self.allow_enhancement:
cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem) + ".tif", img_res)
img_res = img_res.astype(np.uint8)
self.get_image_and_scales(img_org, img_res, scale)
else:
Expand Down
54 changes: 54 additions & 0 deletions qurator/eynollah/ocrd-tool.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"version": "0.0.1",
"git_url": "https://github.com/qurator-spk/eynollah",
"tools": {
"ocrd-eynollah-segment": {
"executable": "ocrd-eynollah-segment",
"categories": ["Layout analysis"],
"description": "Segment page into regions and lines and do reading order detection with eynollah",
"input_file_grp": ["OCR-D-IMG", "OCR-D-SEG-PAGE", "OCR-D-GT-SEG-PAGE"],
"output_file_grp": ["OCR-D-SEG-LINE"],
"steps": ["layout/segmentation/region", "layout/segmentation/line"],
"parameters": {
"models": {
"type": "string",
"format": "file",
"cacheable": true,
"description": "Path to directory containing models to be used (See https://qurator-data.de/eynollah)",
"required": true
},
"dpi": {
"type": "number",
"format": "float",
"description": "pixel density in dots per inch (overrides any meta-data in the images); ignored if <= 0 (with fall-back 230)",
"default": 0
},
"full_layout": {
"type": "boolean",
"default": true,
"description": "Try to detect all element subtypes, including drop-caps and headings"
},
"curved_line": {
"type": "boolean",
"default": false,
"description": "try to return contour of textlines instead of just rectangle bounding box. Needs more processing time"
},
"allow_enhancement": {
"type": "boolean",
"default": true,
"description": "if the input image has less than 300 DPI, then upscale and enhance"
},
"allow_scaling": {
"type": "boolean",
"default": false,
"description": "if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection"
},
"headers_off": {
"type": "boolean",
"default": false,
"description": "ignore the special role of headings during reading order detection"
}
}
}
}
}
11 changes: 11 additions & 0 deletions qurator/eynollah/ocrd_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from .processor import EynollahProcessor
from click import command
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor

@command()
@ocrd_cli_options
def main(*args, **kwargs):
return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)

if __name__ == '__main__':
main()
2 changes: 0 additions & 2 deletions qurator/eynollah/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def __init__(
dir_of_deskewed,
dir_of_layout,
dir_of_cropped_images,
image_filename,
image_filename_stem,
image_org=None,
scale_x=1,
Expand All @@ -31,7 +30,6 @@ def __init__(
self.dir_of_layout = dir_of_layout
self.dir_of_cropped_images = dir_of_cropped_images
self.dir_of_deskewed = dir_of_deskewed
self.image_filename = image_filename
self.image_filename_stem = image_filename_stem
# XXX TODO hacky these cannot be set at init time
self.image_org = image_org
Expand Down
59 changes: 59 additions & 0 deletions qurator/eynollah/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from json import loads
from pkg_resources import resource_string
from tempfile import NamedTemporaryFile
from os.path import join

from ocrd import Processor
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import to_xml
from ocrd_utils import (
getLogger,
MIMETYPE_PAGE,
assert_file_grp_cardinality,
make_file_id
)

from .eynollah import Eynollah
from .utils.pil_cv2 import pil2cv

OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))

class EynollahProcessor(Processor):

def __init__(self, *args, **kwargs):
kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-eynollah-segment']
kwargs['version'] = OCRD_TOOL['version']
super().__init__(*args, **kwargs)

def process(self):
LOG = getLogger('eynollah')
assert_file_grp_cardinality(self.input_file_grp, 1)
assert_file_grp_cardinality(self.output_file_grp, 1)
for n, input_file in enumerate(self.input_files):
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %s (%d/%d) ", page_id, n + 1, len(self.input_files))
pcgts = page_from_file(self.workspace.download_file(input_file))
self.add_metadata(pcgts)
page = pcgts.get_Page()
page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
eynollah_kwargs = {
'dir_models': self.resolve_resource(self.parameter['models']),
'allow_enhancement': self.parameter['allow_enhancement'],
'curved_line': self.parameter['curved_line'],
'full_layout': self.parameter['full_layout'],
'allow_scaling': self.parameter['allow_scaling'],
'headers_off': self.parameter['headers_off'],
'override_dpi': self.parameter['dpi'] if self.parameter['dpi'] > 0 else None,
'logger': LOG,
'pcgts': pcgts,
'image_pil': page_image,
'image_filename': None}
Eynollah(**eynollah_kwargs).run()
file_id = make_file_id(input_file, self.output_file_grp)
self.workspace.add_file(
ID=file_id,
file_grp=self.output_file_grp,
pageId=page_id,
mimetype=MIMETYPE_PAGE,
local_filename=join(self.output_file_grp, file_id) + '.xml',
content=to_xml(pcgts))
22 changes: 13 additions & 9 deletions qurator/eynollah/utils/pil_cv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,23 @@
# from sbb_binarization

def cv2pil(img):
return Image.fromarray(img.astype('uint8'))
return Image.fromarray(img)

def pil2cv(img):
# from ocrd/workspace.py
color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
return cvtColor(pil_as_np_array, color_conversion)

def check_dpi(image_filename):
exif = OcrdExif(Image.open(image_filename))
print(exif.to_xml())
resolution = exif.resolution
if exif.resolutionUnit == 'cm':
resolution /= 2.54
return int(resolution)

def check_dpi(img):
try:
exif = OcrdExif(cv2pil(img))
resolution = exif.resolution
if resolution == 1:
raise Exception()
if exif.resolutionUnit == 'cm':
resolution /= 2.54
return int(resolution)
except Exception as e:
print(e)
return 230
13 changes: 8 additions & 5 deletions qurator/eynollah/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@ def __init__(self, *, dir_out, image_filename, curved_line, pcgts=None):
self.counter = EynollahIdCounter()
self.dir_out = dir_out
self.image_filename = image_filename
self.image_filename_stem = Path(Path(image_filename).name).stem
self.curved_line = curved_line
self.pcgts = pcgts if pcgts else PcGtsType()
self.pcgts = pcgts
self.scale_x = None # XXX set outside __init__
self.scale_y = None # XXX set outside __init__
self.height_org = None # XXX set outside __init__
self.width_org = None # XXX set outside __init__

@property
def image_filename_stem(self):
return Path(Path(self.image_filename).name).stem

def calculate_page_coords(self, cont_page):
self.logger.debug('enter calculate_page_coords')
points_page_print = ""
Expand Down Expand Up @@ -87,7 +90,7 @@ def serialize_lines_in_marginal(self, marginal_region, all_found_texline_polygon
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
points += ' '
points_co += ' '
coords.set_points(points_co[:-1])

def serialize_lines_in_region(self, text_region, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, counter):
Expand Down Expand Up @@ -141,7 +144,7 @@ def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, o
self.logger.debug('enter build_pagexml_no_full_layout')

# create the file structure
pcgts = create_page_xml(self.image_filename, self.height_org, self.width_org)
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
page = pcgts.get_Page()
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))

Expand Down Expand Up @@ -181,7 +184,7 @@ def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_t
self.logger.debug('enter build_pagexml_full_layout')

# create the file structure
pcgts = create_page_xml(self.image_filename, self.height_org, self.width_org)
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
page = pcgts.get_Page()
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))

Expand Down
Loading