Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDF Renderer: allow to specify an alternate image or a custom resolution. #4171

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,7 @@ check_PROGRAMS += paragraphs_test
if !DISABLED_LEGACY_ENGINE
check_PROGRAMS += params_model_test
endif # !DISABLED_LEGACY_ENGINE
check_PROGRAMS += pdfrenderer_test
check_PROGRAMS += progress_test
check_PROGRAMS += qrsequence_test
check_PROGRAMS += recodebeam_test
Expand Down Expand Up @@ -1469,6 +1470,10 @@ progress_test_CPPFLAGS = $(unittest_CPPFLAGS)
progress_test_LDFLAGS = $(LEPTONICA_LIBS)
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)

pdfrenderer_test_SOURCES = unittest/pdfrenderer_test.cc
pdfrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
pdfrenderer_test_LDADD = $(TESS_LIBS) $(TRAINING_LIBS)

qrsequence_test_SOURCES = unittest/qrsequence_test.cc
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
qrsequence_test_LDADD = $(TESS_LIBS)
Expand Down
34 changes: 34 additions & 0 deletions include/tesseract/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,23 @@ class TESS_API TessResultRenderer {
return imagenum_;
}

/**
* Specifies an alternate image to render with the extracted text.
* It must be called after BeginDocument and before AddImage.
*/
void SetRenderingImage(Pix *rendering_image) {
rendering_image_ = rendering_image;
}

/**
* Specifies the expected rendering resolution.
* If not set, rendering_dpi api params will be used, else the source image
* resolution.
*/
void SetRenderingResolution(int rendering_dpi) {
rendering_dpi_ = rendering_dpi;
}

protected:
/**
* Called by concrete classes.
Expand Down Expand Up @@ -139,6 +156,21 @@ class TESS_API TessResultRenderer {
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);

// Renderers can call this to get the actual image to render with extracted
// text. This method returns:
// - the rendering image set by the caller or
// - the input image scaled to the rendering_dpi field if defined or
// - the input image from the api otherwise
Pix *GetRenderingImage(TessBaseAPI *api);

// Resolution of the rendering image either set manually by the caller or with
// the rendering_dpi api parameter.
int GetRenderingResolution(TessBaseAPI *api);

// Reset rendering image and dpi to previous state. Destroy scaled rendered
// image if exists.
void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev);

template <typename T>
auto AppendData(T &&d) {
AppendData(d.data(), d.size());
Expand All @@ -151,6 +183,8 @@ class TESS_API TessResultRenderer {
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
Pix *rendering_image_; // Image to render with the extracted text
int rendering_dpi_; // Resolution of the rendering_image
phymbert marked this conversation as resolved.
Show resolved Hide resolved
bool happy_; // I get grumpy when the disk fills up, etc.
};

Expand Down
13 changes: 10 additions & 3 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,12 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
}

char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double height) {
double ppi = api->GetSourceYResolution();
double input_image_ppi = api->GetSourceYResolution();
double ppi = GetRenderingResolution(api);
double scale = 1;
if (input_image_ppi > 0) {
scale = ppi / input_image_ppi;
}

// These initial conditions are all arbitrary and will be overwritten
double old_x = 0.0, old_y = 0.0;
Expand Down Expand Up @@ -379,6 +384,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
int x1, y1, x2, y2;
res_it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
x1 *= scale; y1 *= scale; x2 *= scale; y2 *= scale;
ClipBaseline(ppi, x1, y1, x2, y2, &line_x1, &line_y1, &line_x2, &line_y2);
}

Expand Down Expand Up @@ -413,6 +419,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
{
int word_x1, word_y1, word_x2, word_y2;
res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2);
word_x1 *= scale; word_y1 *= scale; word_x2 *= scale; word_y2 *= scale;
GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, word_y2, line_x1,
line_y1, line_x2, line_y2, &x, &y, &word_length);
}
Expand Down Expand Up @@ -828,9 +835,9 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int obj
}

bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
Pix *pix = api->GetInputImage();
Pix *pix = GetRenderingImage(api);
const char *filename = api->GetInputName();
int ppi = api->GetSourceYResolution();
int ppi = GetRenderingResolution(api);
if (!pix || ppi <= 0) {
return false;
}
Expand Down
62 changes: 62 additions & 0 deletions src/api/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
#ifdef HAVE_CONFIG_H
# include "config_auto.h"
#endif
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>
#include <cstring>
#include <memory> // std::unique_ptr
#include <string> // std::string
#include "serialis.h" // Serialize
#include "tprintf.h"

namespace tesseract {

Expand All @@ -36,6 +38,8 @@ TessResultRenderer::TessResultRenderer(const char *outputbase, const char *exten
, file_extension_(extension)
, title_("")
, imagenum_(-1)
, rendering_image_(nullptr)
, rendering_dpi_(0)
, happy_(true) {
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
std::string outfile = std::string(outputbase) + "." + extension;
Expand Down Expand Up @@ -90,13 +94,71 @@ bool TessResultRenderer::AddImage(TessBaseAPI *api) {
return false;
}
++imagenum_;
Pix *rendering_image_prev = rendering_image_;
int rendering_dpi_prev = rendering_dpi_;
bool ok = AddImageHandler(api);
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
if (next_) {
ok = next_->AddImage(api) && ok;
}
return ok;
}

void TessResultRenderer::ResetRenderingState(Pix *rendering_image_prev,
int rendering_dpi_prev) {
if (rendering_image_ != rendering_image_prev) {
pixDestroy(&rendering_image_);
rendering_image_ = rendering_image_prev;
}
if (rendering_dpi_ != rendering_dpi_prev) {
rendering_dpi_ = rendering_dpi_prev;
}
}

Pix *TessResultRenderer::GetRenderingImage(TessBaseAPI *api) {
if (!rendering_image_) {
Pix *source_image = api->GetInputImage();
int source_dpi = api->GetSourceYResolution();
if (!source_image || source_dpi <= 0) {
happy_ = false;
return nullptr;
}

int rendering_dpi = GetRenderingResolution(api);
if (rendering_dpi != source_dpi) {
float scale = (float)rendering_dpi / (float)source_dpi;

rendering_image_ = pixScale(source_image, scale, scale);
} else {
return source_image;
}
}
return rendering_image_;
}

int TessResultRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) {
if (rendering_dpi_) {
return rendering_dpi_;
}
int source_dpi = api->GetSourceYResolution();
int rendering_dpi;
if (api->GetIntVariable("rendering_dpi", &rendering_dpi) &&
rendering_dpi > 0 && rendering_dpi != source_dpi) {
if (rendering_dpi < kMinCredibleResolution ||
rendering_dpi > kMaxCredibleResolution) {
#if !defined(NDEBUG)
tprintf(
"Warning: User defined rendering dpi %d is outside of expected range "
"(%d - %d)!\n",
rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution);
#endif
}
rendering_dpi_ = rendering_dpi;
return rendering_dpi_;
}
return source_dpi;
}

bool TessResultRenderer::EndDocument() {
if (!happy_) {
return false;
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ Tesseract::Tesseract()
, BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer",
this->params())
, INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params())
, INT_MEMBER(rendering_dpi, 0, "Scaled input image resolution before rendering", this->params())
, INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", this->params())
, INT_MEMBER(min_characters_to_try, 50, "Specify minimum characters to try during OSD",
this->params())
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,7 @@ class TESS_API Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_pdf);
BOOL_VAR_H(textonly_pdf);
INT_VAR_H(jpg_quality);
INT_VAR_H(rendering_dpi);
INT_VAR_H(user_defined_dpi);
INT_VAR_H(min_characters_to_try);
STRING_VAR_H(unrecognised_char);
Expand Down
139 changes: 139 additions & 0 deletions unittest/pdfrenderer_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// (C) Copyright 2023, Tesseract Contributors.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <allheaders.h>
#include <tesseract/renderer.h>
#include <filesystem>
#include <string>

#include "include_gunit.h"

namespace tesseract {

static std::map<std::string, std::string> userdefined_dpi_variables = {
{"user_defined_dpi", "300"}};

class TessPDFRendererTest : public testing::Test {
protected:
static std::string TestDataNameToPath(const std::string &name) {
return file::JoinPath(TESTING_DIR, name);
}
static std::string TessdataPath() {
return TESSDATA_DIR;
}
static std::string TestPDFName(const std::string &suffix) {
return "/tmp/tesseract_pdf_renderer_test_phottest" + suffix;
}

static void AssertPDFSizeLT(const std::string &filename, int size) {
std::filesystem::path p = filename + ".pdf";
ASSERT_LT(std::filesystem::file_size(p), size);
}

static void AssertPDFRemove(const std::string &filename) {
ASSERT_EQ(std::remove((filename + ".pdf").c_str()), 0);
}

static bool initializeAPI(
TessBaseAPI &api, const std::map<std::string, std::string> &variables) {
EXPECT_EQ(api.Init(TESSDATA_DIR, "eng", OEM_LSTM_ONLY), 0);
for (const auto &[name, value] : variables) {
api.SetVariable(name.c_str(), value.c_str());
}
return true;
}

static bool ProcessAndRenderPages(
const std::string &input_filename, TessPDFRenderer *pdf_renderer,
const std::map<std::string, std::string> &variables) {
TessBaseAPI api;
initializeAPI(api, variables);
auto testdata_input_filename = TestDataNameToPath(input_filename);
EXPECT_TRUE(api.ProcessPages(testdata_input_filename.c_str(), TESSDATA_DIR,
1000, pdf_renderer));
api.End();
return pdf_renderer->happy();
}

static void RenderPDFAndAssertSize(
const std::string &image_file, const std::string &pdf_suffix,
bool text_only, int max_file_size,
const std::map<std::string, std::string> &variables = {}) {
auto pdf_name = TestPDFName(pdf_suffix);
auto pdf_renderer = std::make_unique<TessPDFRenderer>(
pdf_name.c_str(), "tessdata", text_only);
ASSERT_TRUE(
ProcessAndRenderPages(image_file, pdf_renderer.get(), variables));
AssertPDFSizeLT(pdf_name, max_file_size);
AssertPDFRemove(pdf_name);
}
};

// Test basic pdf rendering
TEST_F(TessPDFRendererTest, TestPDFRenderBasicTest) {
RenderPDFAndAssertSize("phototest_2.tif", "", false, 113000);
}

// Test pdf rendering with lower jpeg quality
TEST_F(TessPDFRendererTest, TestPDFRenderJPEGQualityTest) {
static std::map<std::string, std::string> variables = {{"jpg_quality", "40"}};
RenderPDFAndAssertSize("phototest_2.tif", "jpg_quality", false, 66000,
variables);
}

// Test pdf renderer text only
TEST_F(TessPDFRendererTest, TestPDFRenderTextOnlyTest) {
RenderPDFAndAssertSize("phototest_2.tif", "text_only", true, 3500);
}

// Test that pdf renderer generates a custom image resolution in the pdf export
TEST_F(TessPDFRendererTest, TestPDFRenderLowerResolutionTest) {
std::string pdf_name = TestPDFName("lower_resolution");
auto pdf_renderer =
std::make_unique<TessPDFRenderer>(pdf_name.c_str(), "tessdata", false);
pdf_renderer->SetRenderingResolution(110);
CHECK_OK(ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(),
userdefined_dpi_variables));
AssertPDFSizeLT(pdf_name, 35000);
AssertPDFRemove(pdf_name);
}

// Test that pdf renderer generates a custom image resolution in the pdf export
// with variable directive
TEST_F(TessPDFRendererTest, TestPDFLowerResolutionVariableTest) {
std::string pdf_name = TestPDFName("lower_resolution_variable");
static std::map<std::string, std::string> variables = {
{"rendering_dpi", "110"}};
variables.insert(begin(userdefined_dpi_variables),
end(userdefined_dpi_variables));
auto pdf_renderer =
std::make_unique<TessPDFRenderer>(pdf_name.c_str(), "tessdata", false);
CHECK_OK(
ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(), variables));
AssertPDFSizeLT(pdf_name, 35000);
AssertPDFRemove(pdf_name);
}

// Test that pdf renderer generates an alternate image in the pdf export
TEST_F(TessPDFRendererTest, TestPDFAlternateImageTest) {
std::string pdf_name = TestPDFName("alternate_image");
auto pdf_renderer =
std::make_unique<TessPDFRenderer>(pdf_name.c_str(), "tessdata", false);
auto alternate_image = pixRead(TestDataNameToPath("phototest.tif").c_str());
pdf_renderer->SetRenderingImage(alternate_image);
CHECK_OK(ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(),
std::map<std::string, std::string>()));
pixDestroy(&alternate_image);
AssertPDFSizeLT(pdf_name, 8000);
AssertPDFRemove(pdf_name);
}

} // namespace tesseract