Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix processor resolve preset #1256

Merged
merged 5 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/ocrd/decorators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def ocrd_cli_wrap_processor(
**kwargs
):
if not sys.argv[1:]:
processorClass(workspace=None, show_help=True)
processorClass(None, show_help=True)
sys.exit(1)
if dump_json or dump_module_dir or help or version or show_resource or list_resources:
processorClass(
workspace=None,
None,
dump_json=dump_json,
dump_module_dir=dump_module_dir,
show_help=help,
Expand All @@ -71,10 +71,16 @@ def ocrd_cli_wrap_processor(
initLogging()

LOG = getLogger('ocrd.cli_wrap_processor')
assert kwargs['input_file_grp'] is not None
assert kwargs['output_file_grp'] is not None
# LOG.info('kwargs=%s' % kwargs)
if 'parameter' in kwargs:
# Disambiguate parameter file/literal, and resolve file
disposable = processorClass(workspace=None)
# (but avoid entering processing context of constructor)
class DisposableSubclass(processorClass):
def show_version(self):
pass
disposable = DisposableSubclass(None, show_version=True)
def resolve(name):
try:
return disposable.resolve_resource(name)
Expand Down
7 changes: 2 additions & 5 deletions src/ocrd/decorators/ocrd_cli_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ def cli(mets_url):
option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME),
option('-w', '--working-dir', help="Working Directory"),
option('-U', '--mets-server-url', help="METS server URL. Starts with http:// then TCP, otherwise unix socket path"),
# TODO OCR-D/core#274
# option('-I', '--input-file-grp', required=True),
# option('-O', '--output-file-grp', required=True),
option('-I', '--input-file-grp', default='INPUT'),
option('-O', '--output-file-grp', default='OUTPUT'),
option('-I', '--input-file-grp', default=None),
option('-O', '--output-file-grp', default=None),
option('-g', '--page-id'),
option('--overwrite', is_flag=True, default=False),
option('--profile', is_flag=True, default=False),
Expand Down
52 changes: 29 additions & 23 deletions src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,8 @@ def __init__(
workspace : Workspace,
ocrd_tool=None,
parameter=None,
# TODO OCR-D/core#274
# input_file_grp=None,
# output_file_grp=None,
input_file_grp="INPUT",
output_file_grp="OUTPUT",
input_file_grp=None,
output_file_grp=None,
page_id=None,
resolve_resource=None,
show_resource=None,
Expand Down Expand Up @@ -119,8 +116,6 @@ def __init__(
on stdout.
"""
self.ocrd_tool = ocrd_tool
if parameter is None:
parameter = {}
if dump_json:
print(json.dumps(ocrd_tool, indent=True))
return
Expand All @@ -131,27 +126,22 @@ def __init__(
for res in self.list_all_resources():
print(res)
return
if resolve_resource or show_resource:
initLogging()
if resolve_resource:
try:
res_fname = self.resolve_resource(resolve_resource or show_resource)
res = self.resolve_resource(resolve_resource)
print(res)
except ResourceNotFoundError as e:
log = getLogger('ocrd.processor.base')
log.critical(e.message)
sys.exit(1)
return
if show_resource:
try:
self.show_resource(show_resource)
except ResourceNotFoundError as e:
log = getLogger('ocrd.processor.base')
log.critical(e.message)
sys.exit(1)
if resolve_resource:
print(res_fname)
return
fpath = Path(res_fname)
if fpath.is_dir():
with pushd_popd(fpath):
fileobj = io.BytesIO()
with tarfile.open(fileobj=fileobj, mode='w:gz') as tarball:
tarball.add('.')
fileobj.seek(0)
copyfileobj(fileobj, sys.stdout.buffer)
else:
sys.stdout.buffer.write(fpath.read_bytes())
return
if show_help:
self.show_help(subcommand=subcommand)
Expand All @@ -170,6 +160,8 @@ def __init__(
self.input_file_grp = input_file_grp
self.output_file_grp = output_file_grp
self.page_id = None if page_id == [] or page_id is None else page_id
if parameter is None:
parameter = {}
parameterValidator = ParameterValidator(ocrd_tool)
report = parameterValidator.validate(parameter)
if not report.is_valid:
Expand Down Expand Up @@ -233,6 +225,7 @@ def resolve_resource(self, val):
Args:
val (string): resource value to resolve
"""
initLogging()
executable = self.ocrd_tool['executable']
log = getLogger('ocrd.processor.base')
if exists(val):
Expand All @@ -250,6 +243,19 @@ def resolve_resource(self, val):
return ret[0]
raise ResourceNotFoundError(val, executable)

def show_resource(self, val):
res_fname = self.resolve_resource(val)
fpath = Path(res_fname)
if fpath.is_dir():
with pushd_popd(fpath):
fileobj = io.BytesIO()
with tarfile.open(fileobj=fileobj, mode='w:gz') as tarball:
tarball.add('.')
fileobj.seek(0)
copyfileobj(fileobj, sys.stdout.buffer)
else:
sys.stdout.buffer.write(fpath.read_bytes())

def list_all_resources(self):
"""
List all resources found in the filesystem and matching content-type by filename suffix
Expand Down
4 changes: 2 additions & 2 deletions src/ocrd/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def download_file(self, f, _recursion_count=0):
self.baseurl, f.local_filename)
url = '%s/%s' % (self.baseurl, f.local_filename)
else:
raise FileNotFoundError(f"'local_filename' {f.local_filename} points to non-existing file,"
raise FileNotFoundError(f"'local_filename' {f.local_filename} points to non-existing file, "
"and no 'url' to download and no 'baseurl' set on workspace - nothing we can do.")
file_path = Path(f.local_filename)
self.resolver.download_to_directory(self.directory, url, subdir=file_path.parent, basename=file_path.name)
Expand All @@ -219,7 +219,7 @@ def download_file(self, f, _recursion_count=0):
f.local_filename = self.resolver.download_to_directory(self.directory, f.url, subdir=f.fileGrp, basename=basename)
return f
# If neither f.local_filename nor f.url is set, fail
raise ValueError("OcrdFile {f} has neither 'url' nor 'local_filename', so cannot be downloaded")
raise ValueError(f"OcrdFile {f} has neither 'url' nor 'local_filename', so cannot be downloaded")

def remove_file(self, file_id, force=False, keep_file=False, page_recursive=False, page_same_group=False):
"""
Expand Down
Loading