diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2be7f23..bf0f83b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -47,9 +47,10 @@ jobs:
 
 workflows:
   version: 2
-  build:
-    jobs:
-      - build-python38
-      - build-python39
-      - build-python310
-      - build-python311
+  # build:
+  #   jobs:
+  #     # Disabled as of 2025-02-12 due to erratic behavior (CI in GH Action does work)
+  #     # - build-python38
+  #     # - build-python39
+  #     # - build-python310
+  #     # - build-python311
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2c420e1..ed91176 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,8 @@ jobs:
     #
     # Related issue: https://github.com/actions/runner-images/issues/672.
     # runs-on: ubuntu-latest
-    runs-on: macos-latest
+    # runs-on: macos-latest
+    runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c0f6837..7962c27 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ Versioned according to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+Changed:
+
+  * Port the processors to use the OCR-D/core v3 API, #44
+  * Spawn background processes for segmentation and recognition, #44
+  * Refactor tests and test under various conditions (w/o METS caching, page parallel processing), #44
+
 ## [0.4.1] - 2024-05-29
 
 Fixed:
diff --git a/Makefile b/Makefile
index 42a8f3d..c682693 100644
--- a/Makefile
+++ b/Makefile
@@ -71,7 +71,7 @@ docker:
 
 # Run test
 test: tests/assets
-	$(PYTHON) -m pytest tests $(PYTEST_ARGS)
+	$(PYTHON) -m pytest  tests --durations=0 $(PYTEST_ARGS)
 
 #
 # Assets
diff --git a/ocrd_kraken/binarize.py b/ocrd_kraken/binarize.py
index 7c53940..8d639b8 100644
--- a/ocrd_kraken/binarize.py
+++ b/ocrd_kraken/binarize.py
@@ -1,26 +1,28 @@
 from __future__ import absolute_import
-import os
+from os.path import join
+from typing import Optional
+
 import kraken.binarization
+
+from ocrd.processor.base import OcrdPageResult
+from ocrd.processor.ocrd_page_result import OcrdPageResultImage
+
 from ocrd import Processor
-from ocrd_utils import getLogger, make_file_id, MIMETYPE_PAGE
-from ocrd_models.ocrd_page import AlternativeImageType, to_xml
+from ocrd_utils import assert_file_grp_cardinality, getLogger, make_file_id, MIMETYPE_PAGE
+from ocrd_models.ocrd_page import AlternativeImageType, OcrdPage, to_xml
 from ocrd_modelfactory import page_from_file
 
-from ocrd_kraken.config import OCRD_TOOL
-
 
 class KrakenBinarize(Processor):
 
-    def __init__(self, *args, **kwargs):
-        kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-kraken-binarize']
-        kwargs['version'] = OCRD_TOOL['version']
-        super(KrakenBinarize, self).__init__(*args, **kwargs)
+    @property
+    def executable(self):
+        return 'ocrd-kraken-binarize'
 
-    def process(self):
+    def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
         """Binarize the pages/regions/lines with Kraken.
 
-        Open and deserialise PAGE input files and their respective images,
-        then iterate over the element hierarchy down to the requested
+        Iterate over the input PAGE element hierarchy down to the requested
         ``level-of-operation``.
 
         Next, for each file, crop each segment image according to the layout
@@ -36,64 +38,37 @@ def process(self):
 
         Produce a new output file by serialising the resulting hierarchy.
         """
-        log = getLogger('processor.KrakenBinarize')
-        log.debug('Level of operation: "%s"', self.parameter['level-of-operation'])
-        log.debug('Input file group %s', self.input_file_grp)
-        log.debug('Input files %s', [str(f) for f in self.input_files])
-        for (n, input_file) in enumerate(self.input_files):
-            log.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
-            file_id = make_file_id(input_file, self.output_file_grp)
-            pcgts = page_from_file(self.workspace.download_file(input_file))
-            page = pcgts.get_Page()
-            page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
-            self.add_metadata(pcgts)
+        assert self.workspace
+        assert self.output_file_grp
+        self.logger.debug('Level of operation: "%s"', self.parameter['level-of-operation'])
 
-            page_image, page_coords, page_image_info = self.workspace.image_from_page(
-                page, page_id, feature_filter='binarized')
-            if self.parameter['level-of-operation'] == 'page':
-                log.info("Binarizing page '%s'", page_id)
-                bin_image = kraken.binarization.nlbin(page_image)
-                file_path = self.workspace.save_image_file(
-                    bin_image, file_id + '.IMG-BIN',
-                    self.output_file_grp,
-                    page_id=input_file.pageId)
-                page.add_AlternativeImage(AlternativeImageType(
-                    filename=file_path,
-                    comments=page_coords['features'] + ',binarized'))
-            else:
-                for region in page.get_AllRegions(classes=['Text']):
-                    region_image, region_coords = self.workspace.image_from_segment(
-                        region, page_image, page_coords, feature_filter='binarized')
-                    if self.parameter['level-of-operation'] == 'region':
-                        log.info("Binarizing region '%s'", region.id)
-                        bin_image = kraken.binarization.nlbin(region_image)
-                        file_path = self.workspace.save_image_file(
-                            bin_image, file_id + '_' + region.id + '.IMG-BIN',
-                            self.output_file_grp,
-                            page_id=input_file.pageId)
-                        region.add_AlternativeImage(AlternativeImageType(
-                            filename=file_path,
-                            comments=region_coords['features'] + ',binarized'))
-                    else:
-                        for line in region.get_TextLine():
-                            line_image, line_coords = self.workspace.image_from_segment(
-                                line, region_image, region_coords, feature_filter='binarized')
-                            log.info("Binarizing line '%s'", line.id)
-                            bin_image = kraken.binarization.nlbin(line_image)
-                            file_path = self.workspace.save_image_file(
-                                bin_image, file_id + '_' + region.id + '_' + line.id + '.IMG-BIN',
-                                self.output_file_grp,
-                                page_id=input_file.pageId)
-                            line.add_AlternativeImage(AlternativeImageType(
-                                filename=file_path,
-                                comments=line_coords['features'] + ',binarized'))
-            # update METS (add the PAGE file):
-            file_path = os.path.join(self.output_file_grp, file_id + '.xml')
-            pcgts.set_pcGtsId(file_id)
-            out = self.workspace.add_file(
-                ID=file_id,
-                file_grp=self.output_file_grp,
-                pageId=input_file.pageId,
-                local_filename=file_path,
-                mimetype=MIMETYPE_PAGE,
-                content=to_xml(pcgts))
+        pcgts = input_pcgts[0]
+        assert pcgts
+        page = pcgts.get_Page()
+        assert page
+        page_image, page_xywh, _ = self.workspace.image_from_page(
+            page, page_id, feature_filter='binarized')
+        result = OcrdPageResult(pcgts)
+        if self.parameter['level-of-operation'] == 'page':
+            self.logger.info("Binarizing page '%s'", page_id)
+            alternative_image = AlternativeImageType(comments=f'{page_xywh["features"]},binarized')
+            page.add_AlternativeImage(alternative_image)
+            result.images.append(OcrdPageResultImage(kraken.binarization.nlbin(page_image), '.IMG-BIN', alternative_image))
+        else:
+            for region in page.get_AllRegions(classes=['Text']):
+                region_image, region_xywh = self.workspace.image_from_segment(
+                    region, page_image, page_xywh, feature_filter='binarized')
+                if self.parameter['level-of-operation'] == 'region':
+                    self.logger.info("Binarizing region '%s'", region.id)
+                    alternative_image = AlternativeImageType(comments=f'{region_xywh["features"]},binarized')
+                    region.add_AlternativeImage(alternative_image)
+                    result.images.append(OcrdPageResultImage(kraken.binarization.nlbin(region_image), f'{region.id}.IMG-BIN', alternative_image))
+                else:
+                    for line in region.get_TextLine():
+                        line_image, line_xywh = self.workspace.image_from_segment(
+                            line, region_image, region_xywh, feature_filter='binarized')
+                        self.logger.info("Binarizing line '%s'", line.id)
+                        alternative_image = AlternativeImageType(comments=f'{line_xywh["features"]},binarized')
+                        line.add_AlternativeImage(alternative_image)
+                        result.images.append(OcrdPageResultImage(kraken.binarization.nlbin(line_image), f'{region.id}_{line.id}.IMG-BIN', alternative_image))
+        return result
diff --git a/ocrd_kraken/cli.py b/ocrd_kraken/cli.py
deleted file mode 100644
index ead681b..0000000
--- a/ocrd_kraken/cli.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import click
-
-from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
-from ocrd_kraken.binarize import KrakenBinarize
-
-@click.command()
-@ocrd_cli_options
-def ocrd_kraken_binarize(*args, **kwargs):
-    return ocrd_cli_wrap_processor(KrakenBinarize, *args, **kwargs)
-
diff --git a/ocrd_kraken/common.py b/ocrd_kraken/common.py
new file mode 100644
index 0000000..b24d0ac
--- /dev/null
+++ b/ocrd_kraken/common.py
@@ -0,0 +1,76 @@
+import multiprocessing as mp
+
+from ocrd_utils import config, initLogging
+
+class KrakenPredictor(mp.context.SpawnProcess):
+    def __init__(self, logger, parameter):
+        self.logger = logger
+        self.parameter = parameter
+        ctxt = mp.get_context('spawn')
+        self.taskq = ctxt.Queue(maxsize=1 + config.OCRD_MAX_PARALLEL_PAGES)
+        self.resultq = ctxt.Queue(maxsize=1 + config.OCRD_MAX_PARALLEL_PAGES)
+        self.terminate = ctxt.Event()
+        ctxt = mp.get_context('fork') # base.Processor will fork workers
+        self.results = ctxt.Manager().dict()
+        super().__init__()
+        self.daemon = True
+    def __call__(self, page_id, *page_input):
+        self.taskq.put((page_id, page_input))
+        self.logger.debug("sent task for '%s'", page_id)
+        #return self.get(page_id)
+        result = self.get(page_id)
+        self.logger.debug("received result for '%s'", page_id)
+        return result
+    def get(self, page_id):
+        while not self.terminate.is_set():
+            if page_id in self.results:
+                result = self.results.pop(page_id)
+                if isinstance(result, Exception):
+                    raise Exception(f"predictor failed for {page_id}") from result
+                return result
+            try:
+                page_id, result = self.resultq.get(timeout=0.7)
+            except mp.queues.Empty:
+                continue
+            self.logger.debug("storing results for '%s'", page_id)
+            self.results[page_id] = result
+        raise Exception(f"predictor terminated while waiting on results for {page_id}")
+    def run(self):
+        initLogging()
+        try:
+            self.setup()
+        except Exception as e:
+            self.logger.exception("setup failed")
+            self.terminate.set()
+        while not self.terminate.is_set():
+            try:
+                page_id, page_input = self.taskq.get(timeout=1.1)
+            except mp.queues.Empty:
+                continue
+            self.logger.debug("predicting '%s'", page_id)
+            try:
+                page_output = self.predict(*page_input)
+            except Exception as e:
+                self.logger.error("prediction failed: %s", e.__class__.__name__)
+                page_output = e
+            self.resultq.put((page_id, page_output))
+            self.logger.debug("sent result for '%s'", page_id)
+        self.resultq.close()
+        self.resultq.cancel_join_thread()
+        self.logger.debug("predictor terminated")
+    def setup(self):
+        raise NotImplementedError()
+    def predict(self, *inputs):
+        raise NotImplementedError()
+    def shutdown(self):
+        # do not terminate from forked processor instances
+        if mp.parent_process() is None:
+            self.terminate.set()
+            self.taskq.close()
+            self.taskq.cancel_join_thread()
+            self.logger.debug(f"terminated {self} in {mp.current_process().name}")
+        else:
+            self.logger.debug(f"not touching {self} in {mp.current_process().name}")
+    def __del__(self):
+        self.logger.debug(f"deinit of {self} in {mp.current_process().name}")
+        self.shutdown()
diff --git a/ocrd_kraken/config.py b/ocrd_kraken/config.py
deleted file mode 100644
index 1816957..0000000
--- a/ocrd_kraken/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import json
-from ocrd_utils import resource_filename
-
-with open(resource_filename('ocrd_kraken', 'ocrd-tool.json'), 'r', encoding='utf-8') as f:
-    OCRD_TOOL = json.load(f)
diff --git a/ocrd_kraken/ocrd-tool.json b/ocrd_kraken/ocrd-tool.json
index beac9ab..576aaf7 100644
--- a/ocrd_kraken/ocrd-tool.json
+++ b/ocrd_kraken/ocrd-tool.json
@@ -4,8 +4,8 @@
   "tools": {
     "ocrd-kraken-binarize": {
       "executable": "ocrd-kraken-binarize",
-      "input_file_grp": ["OCR-D-IMG", "OCR-D-PRE-CROP", "OCR-D-SEG-REGION", "OCR-D-SEG-LINE"],
-      "output_file_grp": ["OCR-D-PRE-BIN"],
+      "input_file_grp_cardinality": 1,
+      "output_file_grp_cardinality": 1,
       "categories": [
         "Image preprocessing"
       ],
@@ -24,8 +24,8 @@
     },
     "ocrd-kraken-segment": {
       "executable": "ocrd-kraken-segment",
-      "input_file_grp": ["OCR-D-IMG", "OCR-D-PRE-CROP", "OCR-D-PRE-BIN"],
-      "output_file_grp": ["OCR-D-SEG-REGION", "OCR-D-SEG-LINE"],
+      "input_file_grp_cardinality": 1,
+      "output_file_grp_cardinality": 1,
       "categories": [
         "Layout analysis"
       ],
@@ -128,8 +128,8 @@
     },
     "ocrd-kraken-recognize": {
       "executable": "ocrd-kraken-recognize",
-      "input_file_grp": ["OCR-D-SEG-LINE"],
-      "output_file_grp": ["OCR-D-OCR-KRAK"],
+      "input_file_grp_cardinality": 1,
+      "output_file_grp_cardinality": 1,
       "categories": ["Text recognition and optimization"],
       "steps": ["recognition/text-recognition"],
       "description": "Text recognition with Kraken",
diff --git a/ocrd_kraken/recognize.py b/ocrd_kraken/recognize.py
index 2e2ed1d..70c9ae2 100644
--- a/ocrd_kraken/recognize.py
+++ b/ocrd_kraken/recognize.py
@@ -1,6 +1,8 @@
-from os.path import join
+from typing import Optional, Union
+from ocrd.processor.base import OcrdPageResult
 import regex
 import itertools
+from collections import defaultdict
 import numpy as np
 from scipy.sparse.csgraph import minimum_spanning_tree
 from shapely.geometry import Polygon, LineString, box as Rectangle
@@ -8,9 +10,6 @@
 
 from ocrd import Processor
 from ocrd_utils import (
-    getLogger,
-    make_file_id,
-    assert_file_grp_cardinality,
     coordinates_of_segment,
     coordinates_for_segment,
     bbox_from_polygon,
@@ -18,12 +17,10 @@
     points_from_bbox,
     polygon_from_points,
     xywh_from_points,
-    bbox_from_points,
     transform_coordinates,
-    MIMETYPE_PAGE,
 )
-from ocrd_modelfactory import page_from_file
 from ocrd_models.ocrd_page import (
+    OcrdPage,
     RegionRefType,
     RegionRefIndexedType,
     OrderedGroupType,
@@ -35,52 +32,72 @@
     WordType,
     GlyphType,
     CoordsType,
-    to_xml
 )
 from ocrd_models.ocrd_page_generateds import (
     ReadingDirectionSimpleType,
     TextLineOrderSimpleType
 )
 
-from ocrd_kraken.config import OCRD_TOOL
-
-class KrakenRecognize(Processor):
-
-    def __init__(self, *args, **kwargs):
-        kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-kraken-recognize']
-        kwargs['version'] = OCRD_TOOL['version']
-        super().__init__(*args, **kwargs)
-        if hasattr(self, 'output_file_grp'):
-            # processing context
-            self.setup()
+from .common import KrakenPredictor
 
+class KrakenRecognizePredictor(KrakenPredictor):
+    # workaround for Kraken's unpicklable defaultdict choice
+    class DefaultDict(defaultdict):
+        def __init__(self, default=None):
+            self.default = default
+            super().__init__()
+        def default_factory(self):
+            return self.default
     def setup(self):
-        """
-        Load models
-        """
-        log = getLogger('processor.KrakenRecognize')
         import torch
-        from kraken.rpred import rpred
         from kraken.lib.models import load_any
-        model_fname = self.resolve_resource(self.parameter['model'])
-        log.info("loading model '%s'", model_fname)
+        model = self.parameter['model']
+        self.logger.info("loading model '%s'", model)
         device = self.parameter['device']
         if device != 'cpu' and not torch.cuda.is_available():
             device = 'cpu'
         if device == 'cpu':
-            log.warning("no CUDA device available. Running without GPU will be slow")
-        self.model = load_any(model_fname, device=device)
-        def predict(page_image, segmentation):
-            return rpred(self.model, page_image, segmentation,
-                         self.parameter['pad'],
-                         self.parameter['bidi_reordering'])
-        self.predict = predict
-
-    def process(self):
+            self.logger.warning("no CUDA device available. Running without GPU will be slow")
+        self.model = load_any(model, device=device)
+    def predict(self, *inputs):
+        from kraken.rpred import mm_rpred
+        if not len(inputs):
+            return self.model.nn.input[1] == 1 and self.model.one_channel_mode == '1'
+        image, segmentation = inputs
+        nets = __class__.DefaultDict(self.model)
+        result = mm_rpred(nets, image, segmentation,
+                          self.parameter['pad'],
+                          self.parameter['bidi_reordering'])
+        # we must exhaust the generator before enqueuing
+        return list(result)
+
+class KrakenRecognize(Processor):
+
+    @property
+    def executable(self):
+        return 'ocrd-kraken-recognize'
+
+    def setup(self):
+        """
+        Load model, set predict function
+        """
+        parameter = dict(self.parameter)
+        parameter['model'] = self.resolve_resource(parameter['model'])
+        self.predictor = KrakenRecognizePredictor(self.logger, parameter)
+        self.predictor.start()
+        self.binary = self.predictor("") # blocks until model is loaded
+        self.logger.info("loaded %s model %s", "binary" if self.binary else "grayscale", self.parameter["model"])
+
+    def shutdown(self):
+        if getattr(self, 'predictor', None):
+            self.predictor.shutdown()
+            del self.predictor
+
+    def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
         """Recognize text on lines with Kraken.
 
-        Open and deserialise each PAGE input file and its respective image,
-        then iterate over the element hierarchy down to the line level.
+        Open the parsed PAGE-XML file, then iterate over the element hierarchy
+        down to the line level.
 
         Set up Kraken to recognise each text line (via coordinates into
         the higher-level image, or from the alternative image. If the model
@@ -94,149 +111,136 @@ def process(self):
         into additional TextEquiv at each level, and make the higher levels
         consistent with that (by concatenation joined by whitespace).
 
-        Produce a new output file by serialising the resulting hierarchy.
+        Return the resulting hierarchy.
         """
+        assert self.workspace
         from kraken.containers import Segmentation, BaselineLine, BBoxLine
-        log = getLogger('processor.KrakenRecognize')
-        assert_file_grp_cardinality(self.input_file_grp, 1)
-        assert_file_grp_cardinality(self.output_file_grp, 1)
-
-        for n, input_file in enumerate(self.input_files):
-            page_id = input_file.pageId or input_file.ID
-            log.info("INPUT FILE %i / %s of %s", n, page_id, len(self.input_files))
-            pcgts = page_from_file(self.workspace.download_file(input_file))
-            self.add_metadata(pcgts)
-            page = pcgts.get_Page()
-            page_image, page_coords, _ = self.workspace.image_from_page(
-                page, page_id,
-                feature_selector="binarized"
-                if self.model.nn.input[1] == 1 and self.model.one_channel_mode == '1'
-                else '')
-            page_rect = Rectangle(0, 0, page_image.width - 1, page_image.height - 1)
-            # todo: find out whether kraken.lib.xml.XMLPage(...).to_container() is adequate
-
-            all_lines = page.get_AllTextLines()
-            # assumes that missing baselines are rare, if any
-            if any(line.Baseline for line in all_lines):
-                log.info("Converting PAGE to Kraken Segmentation (baselines)")
-                segtype = 'baselines'
-            else:
-                log.info("Converting PAGE to Kraken Segmentation (boxes only)")
-                segtype = 'bbox'
-            scale = 0.5 * np.median([xywh_from_points(line.Coords.points)['h'] for line in all_lines])
-            log.info("Estimated scale: %.1f", scale)
-            seglines = []
-            for line in all_lines:
-                # FIXME: see whether model prefers baselines or bbox crops (seg_type)
-                # FIXME: even if we do not have baselines, emulating baseline+boundary might be useful to prevent automatic center normalization
-                poly = coordinates_of_segment(line, None, page_coords)
-                poly = make_valid(Polygon(poly))
-                poly = poly.intersection(page_rect)
-                if segtype == 'baselines':
-                    if line.Baseline is None:
+
+        pcgts = input_pcgts[0]
+        assert pcgts
+        page = pcgts.get_Page()
+        assert page
+        page_image, page_coords, _ = self.workspace.image_from_page(
+            page, page_id,
+            feature_selector="binarized"
+            if self.binary else '')
+        page_rect = Rectangle(0, 0, page_image.width - 1, page_image.height - 1)
+        # TODO: find out whether kraken.lib.xml.XMLPage(...).to_container() is adequate
+
+        all_lines = page.get_AllTextLines()
+        # assumes that missing baselines are rare, if any
+        if any(line.Baseline for line in all_lines):
+            self.logger.info("Converting PAGE to Kraken Segmentation (baselines)")
+            segtype = 'baselines'
+        else:
+            self.logger.info("Converting PAGE to Kraken Segmentation (boxes only)")
+            segtype = 'bbox'
+        scale = 0.5 * np.median([xywh_from_points(line.Coords.points)['h'] for line in all_lines])
+        self.logger.info("Estimated scale: %.1f", scale)
+        seglines = []
+        for line in all_lines:
+            # FIXME: see whether model prefers baselines or bbox crops (seg_type)
+            # FIXME: even if we do not have baselines, emulating baseline+boundary might be useful to prevent automatic center normalization
+            poly = coordinates_of_segment(line, None, page_coords)
+            poly = make_valid(Polygon(poly))
+            poly = poly.intersection(page_rect)
+            if segtype == 'baselines':
+                if line.Baseline is None:
+                    base = dummy_baseline_of_segment(line, page_coords)
+                else:
+                    base = baseline_of_segment(line, page_coords)
+                    if len(base) < 2 or np.abs(np.mean(base[0] - base[-1])) <= 1:
+                        base = dummy_baseline_of_segment(line, page_coords)
+                    elif not LineString(base).intersects(poly):
                         base = dummy_baseline_of_segment(line, page_coords)
-                    else:
-                        base = baseline_of_segment(line, page_coords)
-                        if len(base) < 2 or np.abs(np.mean(base[0] - base[-1])) <= 1:
-                            base = dummy_baseline_of_segment(line, page_coords)
-                        elif not LineString(base).intersects(poly):
-                            base = dummy_baseline_of_segment(line, page_coords)
-                    # kraken expects baseline to be fully contained in boundary
-                    base = LineString(base)
-                    if poly.is_empty:
-                        poly = polygon_from_baseline(base, scale=scale)
-                    elif not base.within(poly):
-                        poly = join_polygons([poly, polygon_from_baseline(base, scale=scale)],
-                                             loc=line.id, scale=scale)
-                    seglines.append(BaselineLine(baseline=list(map(tuple, base.coords)),
-                                                 boundary=list(map(tuple, poly.exterior.coords)),
-                                                 id=line.id,
-                                                 tags={'type': 'default'}))
-                    # write back
-                    base = coordinates_for_segment(base.coords, None, page_coords)
-                    line.set_Baseline(BaselineType(points=points_from_polygon(base)))
-                    poly = coordinates_for_segment(poly.exterior.coords[:-1], None, page_coords)
-                    line.set_Coords(CoordsType(points=points_from_polygon(poly)))
+                # kraken expects baseline to be fully contained in boundary
+                base = LineString(base)
+                if poly.is_empty:
+                    poly = polygon_from_baseline(base, scale=scale)
+                elif not base.within(poly):
+                    poly = join_polygons([poly, polygon_from_baseline(base, scale=scale)],
+                                         loc=line.id, scale=scale)
+                seglines.append(BaselineLine(baseline=list(map(tuple, base.coords)),
+                                             boundary=list(map(tuple, poly.exterior.coords)),
+                                             id=line.id,
+                                             tags={'type': 'default'}))
+                # write back
+                base = coordinates_for_segment(base.coords, None, page_coords)
+                line.set_Baseline(BaselineType(points=points_from_polygon(base)))
+                poly = coordinates_for_segment(poly.exterior.coords[:-1], None, page_coords)
+                line.set_Coords(CoordsType(points=points_from_polygon(poly)))
+            else:
+                seglines.append(BBoxLine(bbox=poly.envelope.bounds,
+                                         id=line.id))
+
+        segmentation = Segmentation(lines=seglines,
+                                    script_detection=False,
+                                    text_direction='horizontal-lr',
+                                    type=segtype,
+                                    imagename=page_id)
+        for idx_line, ocr_record in enumerate(self.predictor(page_id, page_image, segmentation)):
+            line = all_lines[idx_line]
+            id_line = line.id
+            if not ocr_record.prediction and not ocr_record.cuts:
+                self.logger.warning('No results for line "%s"', line.id)
+                continue
+            text_line = ocr_record.prediction
+            if len(ocr_record.confidences) > 0:
+                conf_line = sum(ocr_record.confidences) / len(ocr_record.confidences)
+            else:
+                conf_line = None
+            if self.parameter['overwrite_text']:
+                line.TextEquiv = []
+            line.add_TextEquiv(TextEquivType(Unicode=text_line, conf=conf_line))
+            idx_word = 0
+            line_offset = 0
+            for text_word in regex.splititer(r'(\s+)', text_line):
+                next_offset = line_offset + len(text_word)
+                cuts_word = list(map(list, ocr_record.cuts[line_offset:next_offset]))
+                # fixme: kraken#98 says the Pytorch CTC output is too impoverished to yield good glyph stops
+                # as a workaround, here we just steal from the next glyph start, respectively:
+                if len(ocr_record.cuts) > next_offset + 1:
+                    cuts_word.extend(list(map(list, ocr_record.cuts[next_offset:next_offset+1])))
                 else:
-                    seglines.append(BBoxLine(bbox=poly.envelope.bounds,
-                                             id=line.id))
-
-            segmentation = Segmentation(lines=seglines,
-                                        script_detection=False,
-                                        text_direction='horizontal-lr',
-                                        type=segtype,
-                                        imagename=page_id)
-            for idx_line, ocr_record in enumerate(self.predict(page_image, segmentation)):
-                line = all_lines[idx_line]
-                id_line = line.id
-                if not ocr_record.prediction and not ocr_record.cuts:
-                    log.warning('No results for line "%s"', line.id)
+                    cuts_word.append(list(ocr_record.cuts[-1]))
+                confidences_word = ocr_record.confidences[line_offset:next_offset]
+                line_offset = next_offset
+                if len(text_word.strip()) == 0:
                     continue
-                text_line = ocr_record.prediction
-                if len(ocr_record.confidences) > 0:
-                    conf_line = sum(ocr_record.confidences) / len(ocr_record.confidences)
+                id_word = '%s_word_%s' % (id_line, idx_word + 1)
+                idx_word += 1
+                poly_word = [point for cut in cuts_word for point in cut]
+                bbox_word = bbox_from_polygon(coordinates_for_segment(poly_word, None, page_coords))
+                # avoid zero-size coords on ties
+                bbox_word = np.array(bbox_word, dtype=int)
+                if np.prod(bbox_word[2:4] - bbox_word[0:2]) == 0:
+                    bbox_word[2:4] += 1
+                if len(confidences_word) > 0:
+                    conf_word = sum(confidences_word) / len(confidences_word)
                 else:
-                    conf_line = None
-                if self.parameter['overwrite_text']:
-                    line.TextEquiv = []
-                line.add_TextEquiv(TextEquivType(Unicode=text_line, conf=conf_line))
-                idx_word = 0
-                line_offset = 0
-                for text_word in regex.splititer(r'(\s+)', text_line):
-                    next_offset = line_offset + len(text_word)
-                    cuts_word = list(map(list, ocr_record.cuts[line_offset:next_offset]))
-                    # fixme: kraken#98 says the Pytorch CTC output is too impoverished to yield good glyph stops
-                    # as a workaround, here we just steal from the next glyph start, respectively:
-                    if len(ocr_record.cuts) > next_offset + 1:
-                        cuts_word.extend(list(map(list, ocr_record.cuts[next_offset:next_offset+1])))
-                    else:
-                        cuts_word.append(list(ocr_record.cuts[-1]))
-                    confidences_word = ocr_record.confidences[line_offset:next_offset]
-                    line_offset = next_offset
-                    if len(text_word.strip()) == 0:
-                        continue
-                    id_word = '%s_word_%s' % (id_line, idx_word + 1)
-                    idx_word += 1
-                    poly_word = [point for cut in cuts_word for point in cut]
-                    bbox_word = bbox_from_polygon(coordinates_for_segment(poly_word, None, page_coords))
+                    conf_word = None
+                word = WordType(id=id_word,
+                                Coords=CoordsType(points=points_from_bbox(*bbox_word)))
+                word.add_TextEquiv(TextEquivType(Unicode=text_word, conf=conf_word))
+                for idx_glyph, text_glyph in enumerate(text_word):
+                    id_glyph = '%s_glyph_%s' % (id_word, idx_glyph + 1)
+                    poly_glyph = cuts_word[idx_glyph] + cuts_word[idx_glyph + 1]
+                    bbox_glyph = bbox_from_polygon(coordinates_for_segment(poly_glyph, None, page_coords))
                     # avoid zero-size coords on ties
-                    bbox_word = np.array(bbox_word, dtype=int)
-                    if np.prod(bbox_word[2:4] - bbox_word[0:2]) == 0:
-                        bbox_word[2:4] += 1
-                    if len(confidences_word) > 0:
-                        conf_word = sum(confidences_word) / len(confidences_word)
-                    else:
-                        conf_word = None
-                    word = WordType(id=id_word,
-                                    Coords=CoordsType(points=points_from_bbox(*bbox_word)))
-                    word.add_TextEquiv(TextEquivType(Unicode=text_word, conf=conf_word))
-                    for idx_glyph, text_glyph in enumerate(text_word):
-                        id_glyph = '%s_glyph_%s' % (id_word, idx_glyph + 1)
-                        poly_glyph = cuts_word[idx_glyph] + cuts_word[idx_glyph + 1]
-                        bbox_glyph = bbox_from_polygon(coordinates_for_segment(poly_glyph, None, page_coords))
-                        # avoid zero-size coords on ties
-                        bbox_glyph = np.array(bbox_glyph, dtype=int)
-                        if np.prod(bbox_glyph[2:4] - bbox_glyph[0:2]) == 0:
-                            bbox_glyph[2:4] += 1
-                        conf_glyph = confidences_word[idx_glyph]
-                        glyph = GlyphType(id=id_glyph,
-                                          Coords=CoordsType(points=points_from_bbox(*bbox_glyph)))
-                        glyph.add_TextEquiv(TextEquivType(Unicode=text_glyph, conf=conf_glyph))
-                        word.add_Glyph(glyph)
-                    line.add_Word(word)
-                log.info('Recognized line "%s"', line.id)
+                    bbox_glyph = np.array(bbox_glyph, dtype=int)
+                    if np.prod(bbox_glyph[2:4] - bbox_glyph[0:2]) == 0:
+                        bbox_glyph[2:4] += 1
+                    conf_glyph = confidences_word[idx_glyph]
+                    glyph = GlyphType(id=id_glyph,
+                                      Coords=CoordsType(points=points_from_bbox(*bbox_glyph)))
+                    glyph.add_TextEquiv(TextEquivType(Unicode=text_glyph, conf=conf_glyph))
+                    word.add_Glyph(glyph)
+                line.add_Word(word)
+            self.logger.info('Recognized line "%s"', line.id)
             page_update_higher_textequiv_levels('line', pcgts)
 
-            log.info("Finished recognition, serializing")
-            file_id = make_file_id(input_file, self.output_file_grp)
-            pcgts.set_pcGtsId(file_id)
-            self.workspace.add_file(
-                ID=file_id,
-                file_grp=self.output_file_grp,
-                pageId=input_file.pageId,
-                mimetype=MIMETYPE_PAGE,
-                local_filename=join(self.output_file_grp, f'{file_id}.xml'),
-                content=to_xml(pcgts))
+        self.logger.info("Finished recognition, serializing")
+        return OcrdPageResult(pcgts)
 
 # zzz should go into core ocrd_utils
 def baseline_of_segment(segment, coords):
@@ -251,7 +255,7 @@ def dummy_baseline_of_segment(segment, coords, yrel=0.2):
     return [[xmin, ymid], [xmax, ymid]]
 
 # zzz should go into core ocrd_utils
-def polygon_from_baseline(baseline, scale=20):
+def polygon_from_baseline(baseline, scale : Union[float, np.floating] = 20):
     if not isinstance(baseline, LineString):
         baseline = LineString(baseline)
     ltr = baseline.coords[0][0] < baseline.coords[-1][0]
@@ -261,7 +265,7 @@ def polygon_from_baseline(baseline, scale=20):
                                        scale=scale))
     return polygon
 
-def join_polygons(polygons, loc='', scale=20):
+def join_polygons(polygons, loc='', scale : Union[float, np.floating] = 20):
     """construct concave hull (alpha shape) from input polygons"""
     # compoundp = unary_union(polygons)
     # jointp = compoundp.convex_hull
diff --git a/ocrd_kraken/segment.py b/ocrd_kraken/segment.py
index 14e19dc..9d4eab3 100644
--- a/ocrd_kraken/segment.py
+++ b/ocrd_kraken/segment.py
@@ -1,83 +1,97 @@
+from typing import Optional
 from PIL import ImageOps
-from os.path import join
+
+import shapely.geometry as geom
+from shapely.prepared import prep as geom_prep
+import torch
 
 from ocrd import Processor
+from ocrd.processor.ocrd_page_result import OcrdPageResult
 from ocrd_utils import (
     getLogger,
-    assert_file_grp_cardinality,
-    make_file_id,
-    concat_padded,
     polygon_from_x0y0x1y1,
     points_from_polygon,
     polygon_mask,
     coordinates_for_segment,
     coordinates_of_segment,
-    MIMETYPE_PAGE
 )
 import ocrd_models.ocrd_page
 from ocrd_models.ocrd_page import (
+    OcrdPage,
     PageType,
     BorderType,
     TextRegionType,
     TextLineType,
     CoordsType,
     BaselineType,
-    to_xml
 )
-from ocrd_modelfactory import page_from_file
-
-import shapely.geometry as geom
-from shapely.prepared import prep as geom_prep
-import torch
-
-from .config import OCRD_TOOL
 
-class KrakenSegment(Processor):
-
-    def __init__(self, *args, **kwargs):
-        kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-kraken-segment']
-        kwargs['version'] = OCRD_TOOL['version']
-        super().__init__(*args, **kwargs)
-        if hasattr(self, 'output_file_grp'):
-            # processing context
-            self.setup()
+from .common import KrakenPredictor
 
+class KrakenSegmentPredictor(KrakenPredictor):
     def setup(self):
-        """
-        Load models
-        """
-        self.log = getLogger('processor.KrakenSegment')
-        kwargs = {}
-        kwargs['text_direction'] = self.parameter['text_direction']
-        self.use_legacy = self.parameter['use_legacy']
+        self.use_legacy = self.parameter.pop('use_legacy')
         if self.use_legacy:
-            from kraken.pageseg import segment
-            kwargs['scale'] = self.parameter['scale']
-            kwargs['maxcolseps'] = self.parameter['maxcolseps']
-            kwargs['black_colseps'] = self.parameter['black_colseps']
-            self.log.info("Using legacy segmenter")
+            self.logger.info("Using legacy segmenter")
+            # adapt to Kraken v5 changes:
+            self.parameter['no_hlines'] = self.parameter.pop('remove_hlines')
+            self.parameter.pop('device')
         else:
             from kraken.lib.vgsl import TorchVGSLModel
-            from kraken.blla import segment
-            self.log.info("Using blla segmenter")
-            blla_model_fname = self.resolve_resource(self.parameter['blla_model'])
-            kwargs['model'] = TorchVGSLModel.load_model(blla_model_fname)
+            self.logger.info("Using blla segmenter")
+            self.logger.info("loading model '%s'", self.parameter['model'])
+            self.parameter['model'] = TorchVGSLModel.load_model(self.parameter['model'])
             device = self.parameter['device']
             if device != 'cpu' and not torch.cuda.is_available():
                 device = 'cpu'
             if device == 'cpu':
-                self.log.warning("no CUDA device available. Running without GPU will be slow")
-            kwargs['device'] = device
-        def segmenter(img, mask=None):
-            return segment(img, mask=mask, **kwargs)
-        self.segmenter = segmenter
+                self.logger.warning("no CUDA device available. Running without GPU will be slow")
+            self.parameter['device'] = device
+            # adapt to Kraken v5 changes:
+            self.parameter.pop('scale')
+            self.parameter.pop('remove_hlines')
+            self.parameter.pop('maxcolseps')
+            self.parameter.pop('black_colseps')
+    def predict(self, *inputs):
+        if self.use_legacy:
+            from kraken.pageseg import segment
+        else:
+            from kraken.blla import segment
+        image, mask = inputs
+        return segment(image, mask=mask, **self.parameter)
+
+class KrakenSegment(Processor):
+
+    @property
+    def executable(self):
+        return 'ocrd-kraken-segment'
+
+    def setup(self):
+        """
+        Load models
+        """
+        parameter = dict(self.parameter)
+        model = parameter.pop('blla_model')
+        del parameter['blla_classes']
+        del parameter['overwrite_segments']
+        del parameter['level-of-operation']
+        self.use_legacy = parameter['use_legacy']
+        if not self.use_legacy:
+            parameter['model'] = self.resolve_resource(model)
+        self.predictor = KrakenSegmentPredictor(self.logger, parameter)
+        self.predictor.start()
+
+    def shutdown(self):
+        import multiprocessing as mp
+        if getattr(self, 'predictor', None):
+            self.predictor.shutdown()
+            del self.predictor
 
-    def process(self):
+    def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
         """Segment into (regions and) lines with Kraken.
 
-        Open and deserialise PAGE input files and their respective images,
-        then iterate over the element hierarchy down to the ``level-of-operation``,
-        i.e.:
+        Iterate over the element hierarchy of the PAGE-XML down to the
+        ``level-of-operation``, i.e.:
 
         \b
         - On `page` level and `table` level, detect text regions and lines
@@ -96,70 +110,58 @@ def process(self):
         Then compute a segmentation and decode it into new (text regions and) lines, and
         append them to the parent segment.
 
-        Produce a new output file by serialising the resulting hierarchy.
+        Return the resulting hierarchy.
         """
-        assert_file_grp_cardinality(self.input_file_grp, 1)
-        assert_file_grp_cardinality(self.output_file_grp, 1)
 
-        for n, input_file in enumerate(self.input_files):
-            page_id = input_file.pageId or input_file.ID
-            self.log.info("INPUT FILE %i / %s of %s", n, page_id, len(self.input_files))
-            pcgts = page_from_file(self.workspace.download_file(input_file))
-            self.add_metadata(pcgts)
-            page = pcgts.get_Page()
-            page_image, page_coords, page_info = self.workspace.image_from_page(
-                page, page_id,
-                feature_selector="binarized" if self.use_legacy else "")
-            if page_info.resolution != 1:
-                dpi = page_info.resolution
-                if page_info.resolutionUnit == 'cm':
-                    dpi = round(dpi * 2.54)
-                zoom = 300.0 / dpi
-            else:
-                zoom = 1.0
-            # TODO: be DPI-relative
+        pcgts = input_pcgts[0]
+        assert pcgts
+        page = pcgts.get_Page()
+        assert page
+        page_image, page_coords, page_info = self.workspace.image_from_page(
+            page, page_id,
+            feature_selector="binarized" if self.use_legacy else "")
+        if page_info.resolution != 1:
+            dpi = page_info.resolution
+            if page_info.resolutionUnit == 'cm':
+                dpi = round(dpi * 2.54)
+            zoom = 300.0 / dpi
+        else:
+            zoom = 1.0
+        # TODO: be DPI-relative
 
-            if self.parameter['level-of-operation'] == 'page':
-                self.log.info('Segmenting page with %s segmenter', 'legacy' if self.use_legacy else 'blla')
+        if self.parameter['level-of-operation'] == 'page':
+            self.logger.info('Segmenting page with %s segmenter', 'legacy' if self.use_legacy else 'blla')
+            if self.parameter['overwrite_segments']:
+                page.TextRegion = []
+            elif len(page.TextRegion or []):
+                self.logger.warning('Keeping %d text regions on page "%s"', len(page.TextRegion or []), page.id)
+            self._process_page(page_image, page_coords, page, page_id, zoom)
+        elif self.parameter['level-of-operation'] == 'table':
+            regions = page.get_AllRegions(classes=['Table'])
+            if not regions:
+                self.logger.warning('No existing table regions on page "%s"', page_id)
+            for region in regions:
+                self.logger.info('Segmenting table region "%s" with %s segmenter', region.id, 'legacy' if self.use_legacy else 'blla')
                 if self.parameter['overwrite_segments']:
-                    page.TextRegion = []
-                elif len(page.TextRegion or []):
-                    self.log.warning('Keeping %d text regions on page "%s"', len(page.TextRegion or []), page.id)
-                self._process_page(page_image, page_coords, page, zoom)
-            elif self.parameter['level-of-operation'] == 'table':
-                regions = page.get_AllRegions(classes=['Table'])
-                if not regions:
-                    self.log.warning('No existing table regions on page "%s"', page_id)
-                for region in regions:
-                    self.log.info('Segmenting table region "%s" with %s segmenter', region.id, 'legacy' if self.use_legacy else 'blla')
-                    if self.parameter['overwrite_segments']:
-                        region.TextRegion = []
-                    elif len(region.TextRegion or []):
-                        self.log.warning('Keeping %d text regions in region "%s"', len(region.TextRegion or []), region.id)
-                    self._process_page(page_image, page_coords, region, zoom)
-            else:
-                regions = page.get_AllRegions(classes=['Text'])
-                if not regions:
-                    self.log.warning('No existing text regions on page "%s"', page_id)
-                for region in regions:
-                    self.log.info('Segmenting text region "%s" with %s segmenter', region.id, 'legacy' if self.use_legacy else 'blla')
-                    if self.parameter['overwrite_segments']:
-                        region.TextLine = []
-                    elif len(region.TextLine or []):
-                        self.log.warning('Keeping %d lines in region "%s"', len(region.TextLine or []), region.id)
-                    self._process_region(page_image, page_coords, region, zoom)
+                    region.TextRegion = []
+                elif len(region.TextRegion or []):
+                    self.logger.warning('Keeping %d text regions in region "%s"', len(region.TextRegion or []), region.id)
+                self._process_page(page_image, page_coords, region, page_id, zoom)
+        else:
+            regions = page.get_AllRegions(classes=['Text'])
+            if not regions:
+                self.logger.warning('No existing text regions on page "%s"', page_id)
+            for region in regions:
+                self.logger.info('Segmenting text region "%s" with %s segmenter', region.id, 'legacy' if self.use_legacy else 'blla')
+                if self.parameter['overwrite_segments']:
+                    region.TextLine = []
+                elif len(region.TextLine or []):
+                    self.logger.warning('Keeping %d lines in region "%s"', len(region.TextLine or []), region.id)
+                self._process_region(page_image, page_coords, region, page_id, zoom)
 
-            file_id = make_file_id(input_file, self.output_file_grp)
-            pcgts.set_pcGtsId(file_id)
-            self.workspace.add_file(
-                ID=file_id,
-                file_grp=self.output_file_grp,
-                pageId=input_file.pageId,
-                mimetype=MIMETYPE_PAGE,
-                local_filename=join(self.output_file_grp, f'{file_id}.xml'),
-                content=to_xml(pcgts))
+        return OcrdPageResult(pcgts)
 
-    def _process_page(self, page_image, page_coords, page, zoom=1.0):
+    def _process_page(self, page_image, page_coords, page, page_id, zoom=1.0):
         def getmask():
             # use mask if existing regions (any type for page, text cells for table)
             # or segment is lower than page level
@@ -192,15 +194,15 @@ def getmask():
             # poly = geom.Polygon(poly).buffer(20/zoom).exterior.coords[:-1]
             mask = ImageOps.invert(polygon_mask(page_image, poly))
             for region in regions:
-                self.log.info("Masking existing region %s", region.id)
+                self.logger.info("Masking existing region %s", region.id)
                 poly = coordinates_of_segment(region, page_image, page_coords)
                 # poly = geom.Polygon(poly).buffer(20/zoom).exterior.coords[:-1]
                 mask.paste(255, mask=polygon_mask(page_image, poly))
             return mask
-        res = self.segmenter(page_image, mask=getmask())
-        self.log.debug("Finished segmentation, serializing")
+        res = self.predictor(page_id, page_image, getmask())
+        self.logger.debug("Finished segmentation, serializing")
+        #self.logger.debug(res)
         if self.use_legacy:
-            self.log.debug(res)
             idx_line = 0
             for idx_line, line in enumerate(res.lines):
                 line_poly = polygon_from_x0y0x1y1(line.bbox)
@@ -213,9 +215,8 @@ def getmask():
                     id=f'region_line_{idx_line + 1}_line',
                     Coords=CoordsType(points=line_points)))
                 page.add_TextRegion(region_elem)
-            self.log.debug("Found %d lines on page %s", idx_line + 1, page.id)
+            self.logger.debug("Found %d lines on page %s", idx_line + 1, page.id)
         else:
-            self.log.debug(res)
             handled_lines = {}
             regions = [(type_, region)
                        for type_ in res.regions
@@ -239,11 +240,11 @@ def getmask():
                     line_baseline = coordinates_for_segment(line.baseline, None, page_coords)
                     line_id = f'region_{idx_region + 1}_line_{idx_line + 1}'
                     line_type = line.tags.get('type', '')
-                    self.log.info("Line %s is of type %s", line_id, line_type)
+                    self.logger.info("Line %s is of type %s", line_id, line_type)
                     line_poly = make_valid(geom.Polygon(line_poly))
                     if region_poly.contains(line_poly):
                         if idx_line in handled_lines:
-                            self.log.error("Line %s was already added to region %s" % (idx_line, handled_lines[idx_line]))
+                            self.logger.error("Line %s was already added to region %s" % (idx_line, handled_lines[idx_line]))
                             continue
                         region_elem.add_TextLine(TextLineType(
                             id=line_id,
@@ -252,12 +253,12 @@ def getmask():
                         handled_lines[idx_line] = idx_region
             for idx_line, line in enumerate(res.lines):
                 if idx_line not in handled_lines:
-                    self.log.error("Line %s could not be assigned a region, creating a dummy region", idx_line)
+                    self.logger.error("Line %s could not be assigned a region, creating a dummy region", idx_line)
                     line_poly = coordinates_for_segment(line.boundary, None, page_coords)
                     line_baseline = coordinates_for_segment(line.baseline, None, page_coords)
                     line_id = f'region_line_{idx_line + 1}_line'
                     line_type = line.tags.get('type', '')
-                    self.log.info("Line %s is of type %s", line_id, line_type)
+                    self.logger.info("Line %s is of type %s", line_id, line_type)
                     line_poly = make_valid(geom.Polygon(line_poly)).exterior.coords[:-1]
                     region_elem = TextRegionType(
                         id='region_line_%s' % (idx_line + 1),
@@ -267,21 +268,21 @@ def getmask():
                         Baseline=BaselineType(points=points_from_polygon(line_baseline)),
                         Coords=CoordsType(points=points_from_polygon(line_poly))))
                     page.add_TextRegion(region_elem)
-            self.log.debug("Found %d lines and %d regions on page %s", idx_line + 1, idx_region + 1, page.id)
+            self.logger.debug("Found %d lines and %d regions on page %s", idx_line + 1, idx_region + 1, page.id)
 
-    def _process_region(self, page_image, page_coords, region, zoom=1.0):
+    def _process_region(self, page_image, page_coords, region, page_id, zoom=1.0):
         def getmask():
             poly = coordinates_of_segment(region, page_image, page_coords)
             poly = geom.Polygon(poly).buffer(20/zoom).exterior.coords[:-1]
             mask = ImageOps.invert(polygon_mask(page_image, poly))
             for line in region.TextLine:
-                self.log.info("Masking existing line %s", line.id)
+                self.logger.info("Masking existing line %s", line.id)
                 poly = coordinates_of_segment(line, page_image, page_coords)
                 # poly = geom.Polygon(poly).buffer(20/zoom).exterior.coords[:-1]
                 mask.paste(255, mask=polygon_mask(page_image, poly))
             return mask
-        res = self.segmenter(page_image, mask=getmask())
-        self.log.debug("Finished segmentation, serializing")
+        res = self.predictor(page_id, page_image, getmask())
+        self.logger.debug("Finished segmentation, serializing")
         idx_line = 0
         if self.use_legacy:
             for idx_line, line in enumerate(res.lines):
@@ -297,7 +298,7 @@ def getmask():
                 line_baseline = coordinates_for_segment(line.baseline, None, page_coords)
                 line_id = f'{region.id}_line_{idx_line + 1}'
                 line_type = line.tags.get('type', '')
-                self.log.info("Line %s is of type %s", line_id, line_type)
+                self.logger.info("Line %s is of type %s", line_id, line_type)
                 line_poly = geom.Polygon(line_poly)
                 #line_poly = line_poly.intersection(region_poly)
                 line_poly = make_valid(line_poly).exterior.coords[:-1]
@@ -305,7 +306,7 @@ def getmask():
                     id=line_id,
                     Baseline=BaselineType(points=points_from_polygon(line_baseline)),
                     Coords=CoordsType(points=points_from_polygon(line_poly))))
-        self.log.debug("Found %d lines in region %s", idx_line + 1, region.id)
+        self.logger.debug("Found %d lines in region %s", idx_line + 1, region.id)
 
 def make_valid(polygon):
     for split in range(1, len(polygon.exterior.coords)-1):
diff --git a/requirements.txt b/requirements.txt
index 6bbfb40..d6faf5f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
-ocrd >= 2.65
+ocrd >= 3.0.2
 kraken >= 5.0
 scipy
 shapely
+regex
 
diff --git a/tests/base.py b/tests/base.py
deleted file mode 100644
index 1387769..0000000
--- a/tests/base.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# pylint: disable=unused-import
-
-from os.path import dirname, realpath
-from os import chdir
-import sys
-import logging
-import io
-import collections
-from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
-import pytest
-from ocrd_utils import disableLogging, initLogging
-
-from tests.assets import assets, copy_of_directory
-
-
-def main(fn=None):
-    if fn:
-        sys.exit(pytest.main([fn]))
-    else:
-        unittests_main()
-
-
-class TestCase(VanillaTestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        chdir(dirname(realpath(__file__)) + '/..')
-
-    def setUp(self):
-        disableLogging()
-        initLogging()
-
-class CapturingTestCase(TestCase):
-    """
-    A TestCase that needs to capture stderr/stdout and invoke click CLI.
-    """
-
-    @pytest.fixture(autouse=True)
-    def _setup_pytest_capfd(self, capfd):
-        self.capfd = capfd
-
-    def invoke_cli(self, cli, args):
-        """
-        Substitution for click.CliRunner.invooke that works together nicely
-        with unittests/pytest capturing stdout/stderr.
-        """
-        self.capture_out_err()  # XXX snapshot just before executing the CLI
-        code = 0
-        sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args
-        try:
-            cli.main(args=args)
-        except SystemExit as e:
-            code = e.code
-        out, err = self.capture_out_err()
-        return code, out, err
-
-    def capture_out_err(self):
-        return self.capfd.readouterr()
-
-#  import traceback
-#  import warnings
-#  def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
-#      log = file if hasattr(file, 'write') else sys.stderr
-#      traceback.print_stack(file=log)
-#      log.write(warnings.formatwarning(message, category, filename, lineno, line))
-#  warnings.showwarning = warn_with_traceback
-
-# https://stackoverflow.com/questions/37944111/python-rolling-log-to-a-variable
-# Adapted from http://alanwsmith.com/capturing-python-log-output-in-a-variable
-
-class FIFOIO(io.TextIOBase):
-    def __init__(self, size, *args):
-        self.maxsize = size
-        io.TextIOBase.__init__(self, *args)
-        self.deque = collections.deque()
-    def getvalue(self):
-        return ''.join(self.deque)
-    def write(self, x):
-        self.deque.append(x)
-        self.shrink()
-    def shrink(self):
-        if self.maxsize is None:
-            return
-        size = sum(len(x) for x in self.deque)
-        while size > self.maxsize:
-            x = self.deque.popleft()
-            size -= len(x)
-
-sys.path.append(dirname(realpath(__file__)) + '/../ocrd')
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..0d7eee5
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,63 @@
+# pylint: disable=unused-import
+
+from multiprocessing import Process
+from time import sleep
+import pytest
+
+from ocrd import Resolver, Workspace, OcrdMetsServer
+from ocrd_utils import pushd_popd, disableLogging, initLogging, setOverrideLogLevel, config
+
+from .assets import assets
+
+CONFIGS = ['', 'pageparallel', 'metscache', 'pageparallel+metscache']
+
+@pytest.fixture(params=CONFIGS)
+def workspace(tmpdir, pytestconfig, request):
+    def _make_workspace(workspace_path):
+        initLogging()
+        if pytestconfig.getoption('verbose') > 0:
+            setOverrideLogLevel('DEBUG')
+        with pushd_popd(tmpdir):
+            directory = str(tmpdir)
+            resolver = Resolver()
+            workspace = resolver.workspace_from_url(workspace_path, dst_dir=directory, download=True)
+            config.OCRD_MISSING_OUTPUT = "ABORT"
+            if 'metscache' in request.param:
+                config.OCRD_METS_CACHING = True
+                print("enabled METS caching")
+            if 'pageparallel' in request.param:
+                config.OCRD_MAX_PARALLEL_PAGES = 4
+                print("enabled page-parallel processing")
+                def _start_mets_server(*args, **kwargs):
+                    print("running with METS server")
+                    server = OcrdMetsServer(*args, **kwargs)
+                    server.startup()
+                process = Process(target=_start_mets_server,
+                                  kwargs={'workspace': workspace, 'url': 'mets.sock'})
+                process.start()
+                sleep(1)
+                workspace = Workspace(resolver, directory, mets_server_url='mets.sock')
+                yield {'workspace': workspace, 'mets_server_url': 'mets.sock'}
+                process.terminate()
+            else:
+                yield {'workspace': workspace}
+        config.reset_defaults()
+        disableLogging()
+    return _make_workspace
+
+
+@pytest.fixture
+def workspace_manifesto(workspace):
+    yield from workspace(assets.path_to('communist_manifesto/data/mets.xml'))
+
+@pytest.fixture
+def workspace_aufklaerung(workspace):
+    yield from workspace(assets.path_to('kant_aufklaerung_1784/data/mets.xml'))
+
+@pytest.fixture
+def workspace_aufklaerung_region(workspace):
+    yield from workspace(assets.path_to('kant_aufklaerung_1784-page-region/data/mets.xml'))
+
+@pytest.fixture
+def workspace_sbb(workspace):
+    yield from workspace(assets.url_of('SBB0000F29300010000/data/mets_one_file.xml'))
diff --git a/tests/test_binarize.py b/tests/test_binarize.py
index 8f5c4b8..da9adea 100644
--- a/tests/test_binarize.py
+++ b/tests/test_binarize.py
@@ -1,60 +1,61 @@
 # pylint: disable=import-error
 
+import json
 import os
-import shutil
-import pytest
 
-from tests.base import assets, main
+from ocrd import run_processor
+from ocrd_utils import MIMETYPE_PAGE
+from ocrd_models.constants import NAMESPACES
+from ocrd_modelfactory import page_from_file
 
-from ocrd import Resolver
 from ocrd_kraken.binarize import KrakenBinarize
-from ocrd_utils.logging import setOverrideLogLevel
 
-setOverrideLogLevel('DEBUG')
+from .assets import assets
 
-PARAM_JSON = assets.url_of('param-binarize.json')
 
+PARAM_JSON = assets.url_of('param-binarize.json')
 
-@pytest.fixture()
-def workspace(tmpdir):
-    if os.path.exists(tmpdir):
-        shutil.rmtree(tmpdir)
-    workspace = Resolver().workspace_from_url(
-        assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
-        dst_dir=tmpdir,
-        download=True
+def analyse_result(ws, level):
+    assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-BIN-KRAKEN'))
+    out_files = list(ws.find_files(fileGrp="OCR-D-BIN-KRAKEN", mimetype=MIMETYPE_PAGE))
+    assert len(out_files), "found no output PAGE file"
+    out_images = list(ws.find_files(fileGrp="OCR-D-BIN-KRAKEN", mimetype="//^image/.*"))
+    assert len(out_images), "found no output image file"
+    out_pcgts = page_from_file(out_files[0])
+    assert out_pcgts is not None
+    out_images = out_pcgts.etree.xpath('//page:%s/page:AlternativeImage[contains(@comments,"binarized")]' % level, namespaces=NAMESPACES)
+    assert len(out_images) > 0, "found no binarized AlternativeImages in output PAGE file"
+
+def test_param_json(workspace_sbb):
+    run_processor(KrakenBinarize,
+                  input_file_grp="OCR-D-IMG",
+                  output_file_grp="OCR-D-BIN-KRAKEN",
+                  parameter=json.load(open(PARAM_JSON)),
+                  **workspace_sbb,
     )
-    return workspace
-
-
-#  def test_param_json(self):
-#      workspace =  resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/data/mets_one_file.xml'), dst_dir=WORKSPACE_DIR)
-#      run_processor(
-#          KrakenBinarize,
-#          resolver=resolver,
-#          workspace=workspace,
-#          parameter=PARAM_JSON
-#      )
-
-def test_binarize_regions(workspace):
-    proc = KrakenBinarize(
-        workspace,
-        input_file_grp="OCR-D-GT-PAGE",
-        output_file_grp="OCR-D-IMG-BIN-KRAKEN",
-        parameter={'level-of-operation': 'region'}
+    ws = workspace_sbb['workspace']
+    ws.save_mets()
+    analyse_result(ws, 'Page')
+
+def test_binarize_regions(workspace_aufklaerung):
+    run_processor(KrakenBinarize,
+                  input_file_grp="OCR-D-GT-PAGE",
+                  output_file_grp="OCR-D-BIN-KRAKEN",
+                  parameter={'level-of-operation': 'region'},
+                  **workspace_aufklaerung,
     )
-    proc.process()
-    workspace.save_mets()
-
-def test_binarize_lines(workspace):
-    proc = KrakenBinarize(
-        workspace,
-        input_file_grp="OCR-D-GT-PAGE",
-        output_file_grp="OCR-D-IMG-BIN-KRAKEN",
-        parameter={'level-of-operation': 'line'}
+    ws = workspace_aufklaerung['workspace']
+    ws.save_mets()
+    analyse_result(ws, 'TextRegion')
+
+def test_binarize_lines(workspace_aufklaerung):
+    run_processor(KrakenBinarize,
+                  input_file_grp="OCR-D-GT-PAGE",
+                  output_file_grp="OCR-D-BIN-KRAKEN",
+                  parameter={'level-of-operation': 'line'},
+                  **workspace_aufklaerung,
     )
-    proc.process()
-    workspace.save_mets()
+    ws = workspace_aufklaerung['workspace']
+    ws.save_mets()
+    analyse_result(ws, 'TextLine')
 
-if __name__ == "__main__":
-    main(__file__)
diff --git a/tests/test_recognize.py b/tests/test_recognize.py
index 0ae2850..8354a0e 100644
--- a/tests/test_recognize.py
+++ b/tests/test_recognize.py
@@ -1,32 +1,36 @@
 # pylint: disable=import-error
 
 import os
-import shutil
 
-from tests.base import TestCase, assets, main
+from ocrd import run_processor
+from ocrd_utils import MIMETYPE_PAGE
+from ocrd_models.constants import NAMESPACES
+from ocrd_modelfactory import page_from_file
 
-from ocrd import Resolver, run_processor
-from ocrd_utils import initLogging, pushd_popd
 from ocrd_kraken.recognize import KrakenRecognize
+from ocrd_kraken.binarize import KrakenBinarize
 
-class TestKrakenRecognize(TestCase):
 
-    def setUp(self):
-        initLogging()
-
-    def test_recognize(self):
-        resolver = Resolver()
-        # with pushd_popd('/tmp/kraken-test') as tempdir:
-        with pushd_popd(tempdir=True) as tempdir:
-            workspace = resolver.workspace_from_url(assets.path_to('communist_manifesto/data/mets.xml'), dst_dir=tempdir, download=True)
-            workspace.overwrite_mode = True
-            proc = KrakenRecognize(
-                workspace,
-                input_file_grp="OCR-D-SEG-KRAKEN",
-                output_file_grp="OCR-D-OCR-KRAKEN",
-            )
-            proc.process()
-            workspace.save_mets()
-
-if __name__ == "__main__":
-    main(__file__)
+def test_recognize(workspace_aufklaerung):
+    # some models (like default en) require binarized images
+    run_processor(KrakenBinarize,
+                  input_file_grp="OCR-D-GT-PAGE",
+                  output_file_grp="OCR-D-GT-PAGE-BIN",
+                  **workspace_aufklaerung,
+    )
+    run_processor(KrakenRecognize,
+                  # re-use layout, overwrite text:
+                  input_file_grp="OCR-D-GT-PAGE-BIN",
+                  output_file_grp="OCR-D-OCR-KRAKEN",
+                  parameter={'overwrite_text': True},
+                  **workspace_aufklaerung,
+    )
+    ws = workspace_aufklaerung['workspace']
+    ws.save_mets()
+    assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-OCR-KRAKEN'))
+    results = ws.find_files(file_grp='OCR-D-OCR-KRAKEN', mimetype=MIMETYPE_PAGE)
+    result0 = next(results, False)
+    assert result0, "found no output PAGE file"
+    result0 = page_from_file(result0)
+    text0 = result0.etree.xpath('//page:Glyph/page:TextEquiv/page:Unicode', namespaces=NAMESPACES)
+    assert len(text0) > 0, "found no glyph text in output PAGE file"
diff --git a/tests/test_segment.py b/tests/test_segment.py
index 627fbbf..6c00880 100644
--- a/tests/test_segment.py
+++ b/tests/test_segment.py
@@ -1,59 +1,65 @@
 # pylint: disable=import-error
 
 import os
-import shutil
 
-from tests.base import TestCase, assets, main
+from ocrd import run_processor
+from ocrd_utils import MIMETYPE_PAGE
+from ocrd_models.constants import NAMESPACES
+from ocrd_modelfactory import page_from_file
 
-from ocrd import Resolver
-from ocrd_utils import initLogging, pushd_popd
 from ocrd_kraken.segment import KrakenSegment
+from ocrd_kraken.binarize import KrakenBinarize
 
-class TestKrakenSegment(TestCase):
-
-    def setUp(self):
-        initLogging()
-
-    def test_run_blla(self):
-        resolver = Resolver()
-        with pushd_popd(tempdir=True) as tempdir:
-            workspace = resolver.workspace_from_url(assets.path_to('communist_manifesto/data/mets.xml'), dst_dir=tempdir, download=True)
-            proc = KrakenSegment(
-                workspace,
-                input_file_grp="OCR-D-IMG-BIN",
-                output_file_grp="OCR-D-SEG-LINE-KRAKEN",
-                parameter={'maxcolseps': 0, 'use_legacy': False}
-            )
-            proc.process()
-            workspace.save_mets()
-
-    def test_run_blla_regionlevel(self):
-        resolver = Resolver()
-        with pushd_popd(tempdir=True) as tempdir:
-            workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784-page-region/data/mets.xml'), dst_dir=tempdir, download=True)
-            proc = KrakenSegment(
-                workspace,
-                input_file_grp="OCR-D-GT-SEG-REGION",
-                output_file_grp="OCR-D-SEG-LINE-KRAKEN",
-                page_id="phys_0005",
-                parameter={'maxcolseps': 0, 'use_legacy': False}
-            )
-            proc.process()
-            workspace.save_mets()
-
-    def test_run_legacy(self):
-        resolver = Resolver()
-        # with pushd_popd('/tmp/kraken-test') as tempdir:
-        with pushd_popd(tempdir=True) as tempdir:
-            workspace = resolver.workspace_from_url(assets.path_to('communist_manifesto/data/mets.xml'), dst_dir=tempdir, download=True)
-            proc = KrakenSegment(
-                workspace,
-                input_file_grp="OCR-D-IMG-BIN",
-                output_file_grp="OCR-D-SEG-LINE-KRAKEN",
-                parameter={'maxcolseps': 0, 'use_legacy': True}
-            )
-            proc.process()
-            workspace.save_mets()
-
-if __name__ == "__main__":
-    main(__file__)
+
+def analyse_result(ws):
+    assert os.path.isdir(os.path.join(ws.directory, 'OCR-D-SEG-LINE-KRAKEN'))
+    out_files = list(ws.find_files(fileGrp="OCR-D-SEG-LINE-KRAKEN", mimetype=MIMETYPE_PAGE))
+    assert len(out_files), "found no output PAGE file"
+    out_pcgts = page_from_file(out_files[0])
+    assert out_pcgts is not None
+    out_regions = out_pcgts.etree.xpath('//page:TextRegion/page:Coords', namespaces=NAMESPACES)
+    assert len(out_regions) > 0, "found no text regions in output PAGE file"
+    out_lines = out_pcgts.get_Page().get_AllTextLines()
+    assert len(out_lines), "found no text lines in output PAGE file"
+
+def test_run_blla(workspace_aufklaerung):
+    run_processor(KrakenSegment,
+                  input_file_grp="OCR-D-IMG",
+                  output_file_grp="OCR-D-SEG-LINE-KRAKEN",
+                  parameter={'maxcolseps': 0, 'use_legacy': False},
+                  **workspace_aufklaerung,
+    )
+    ws = workspace_aufklaerung['workspace']
+    ws.save_mets()
+    analyse_result(ws)
+
+def test_run_blla_regionlevel(workspace_aufklaerung_region):
+    run_processor(KrakenSegment,
+                  input_file_grp="OCR-D-GT-SEG-REGION",
+                  output_file_grp="OCR-D-SEG-LINE-KRAKEN",
+                  # only 1 page (takes 3min per page without GPU)
+                  page_id="phys_0005",
+                  parameter={'maxcolseps': 0, 'use_legacy': False},
+                  **workspace_aufklaerung_region,
+    )
+    ws = workspace_aufklaerung_region['workspace']
+    ws.save_mets()
+    analyse_result(ws)
+
+def test_run_legacy(workspace_aufklaerung):
+    # legacy segmentation requires binarized images
+    run_processor(KrakenBinarize,
+                  input_file_grp="OCR-D-GT-PAGE",
+                  output_file_grp="OCR-D-GT-PAGE-BIN",
+                  **workspace_aufklaerung,
+    )
+    run_processor(KrakenSegment,
+                  # overwrite layout:
+                  input_file_grp="OCR-D-GT-PAGE-BIN",
+                  output_file_grp="OCR-D-SEG-LINE-KRAKEN",
+                  parameter={'maxcolseps': 0, 'use_legacy': True, 'overwrite_segments': True},
+                  **workspace_aufklaerung,
+    )
+    ws = workspace_aufklaerung['workspace']
+    ws.save_mets()
+    analyse_result(ws)