Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDF Renderer: allow to specify an alternate image or a custom resolution. #4171

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,7 @@ check_PROGRAMS += paragraphs_test
if !DISABLED_LEGACY_ENGINE
check_PROGRAMS += params_model_test
endif # !DISABLED_LEGACY_ENGINE
check_PROGRAMS += pdfrenderer_test
check_PROGRAMS += progress_test
check_PROGRAMS += qrsequence_test
check_PROGRAMS += recodebeam_test
Expand Down Expand Up @@ -1469,6 +1470,10 @@ progress_test_CPPFLAGS = $(unittest_CPPFLAGS)
progress_test_LDFLAGS = $(LEPTONICA_LIBS)
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)

pdfrenderer_test_SOURCES = unittest/pdfrenderer_test.cc
pdfrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
pdfrenderer_test_LDADD = $(TESS_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)

qrsequence_test_SOURCES = unittest/qrsequence_test.cc
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
qrsequence_test_LDADD = $(TESS_LIBS)
Expand Down
32 changes: 32 additions & 0 deletions include/tesseract/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,26 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
// Reset rendering image and dpi to previous state. Destroy scaled rendered
// image if exists.
void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev);

/**
* Specifies an alternate image to render with the extracted text.
* It must be called after BeginDocument and before AddImage.
*/
void SetRenderingImage(Pix *rendering_image) {
rendering_image_ = rendering_image;
}

/**
* Specifies the expected rendering resolution.
* If not set, rendering_dpi api params will be used, else the source image
* resolution.
*/
void SetRenderingResolution(int rendering_dpi) {
rendering_dpi_ = rendering_dpi;
}

protected:
bool BeginDocumentHandler() override;
Expand All @@ -258,12 +278,24 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
Pix *rendering_image_; // Image to render with the extracted text
int rendering_dpi_; // Resolution of the rendering_image
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Renderers can call this to get the actual image to render with extracted
// text. This method returns:
// - the rendering image set by the caller or
// - the input image scaled to the rendering_dpi field if defined or
// - the input image from the api otherwise
Pix *GetRenderingImage(TessBaseAPI *api);
// Resolution of the rendering image either set manually by the caller or with
// the rendering_dpi api parameter.
int GetRenderingResolution(TessBaseAPI *api);

// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
Expand Down
73 changes: 69 additions & 4 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ static const int kMaxBytesPerCodepoint = 20;
* PDF Renderer interface implementation
**********************************************************************/
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly)
: TessResultRenderer(outputbase, "pdf"), datadir_(datadir) {
: TessResultRenderer(outputbase, "pdf")
, datadir_(datadir)
, rendering_image_(nullptr)
, rendering_dpi_(0) {
obj_ = 0;
textonly_ = textonly;
offsets_.push_back(0);
Expand Down Expand Up @@ -329,7 +332,12 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
}

char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double height) {
double ppi = api->GetSourceYResolution();
double input_image_ppi = api->GetSourceYResolution();
double ppi = GetRenderingResolution(api);
double scale = 1;
if (input_image_ppi > 0) {
scale = ppi / input_image_ppi;
}

// These initial conditions are all arbitrary and will be overwritten
double old_x = 0.0, old_y = 0.0;
Expand Down Expand Up @@ -379,6 +387,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
int x1, y1, x2, y2;
res_it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
x1 *= scale; y1 *= scale; x2 *= scale; y2 *= scale;
ClipBaseline(ppi, x1, y1, x2, y2, &line_x1, &line_y1, &line_x2, &line_y2);
}

Expand Down Expand Up @@ -413,6 +422,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
{
int word_x1, word_y1, word_x2, word_y2;
res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2);
word_x1 *= scale; word_y1 *= scale; word_x2 *= scale; word_y2 *= scale;
GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, word_y2, line_x1,
line_y1, line_x2, line_y2, &x, &y, &word_length);
}
Expand Down Expand Up @@ -827,10 +837,63 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int obj
return true;
}

void TessPDFRenderer::ResetRenderingState(Pix *rendering_image_prev,
int rendering_dpi_prev) {
if (rendering_image_ != rendering_image_prev) {
pixDestroy(&rendering_image_);
rendering_image_ = rendering_image_prev;
}
rendering_dpi_ = rendering_dpi_prev;
}

Pix *TessPDFRenderer::GetRenderingImage(TessBaseAPI *api) {
if (!rendering_image_) {
Pix *source_image = api->GetInputImage();
int source_dpi = api->GetSourceYResolution();
if (!source_image || source_dpi <= 0) {
return nullptr;
}

int rendering_dpi = GetRenderingResolution(api);
if (rendering_dpi != source_dpi) {
float scale = (float)rendering_dpi / (float)source_dpi;
rendering_image_ = pixScale(source_image, scale, scale);
} else {
return source_image;
}
}
return rendering_image_;
}

int TessPDFRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) {
if (rendering_dpi_) {
return rendering_dpi_;
}
int source_dpi = api->GetSourceYResolution();
int rendering_dpi;
if (api->GetIntVariable("rendering_dpi", &rendering_dpi) &&
rendering_dpi > 0 && rendering_dpi != source_dpi) {
if (rendering_dpi < kMinCredibleResolution ||
rendering_dpi > kMaxCredibleResolution) {
#if !defined(NDEBUG)
tprintf(
"Warning: User defined rendering dpi %d is outside of expected range "
"(%d - %d)!\n",
rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution);
#endif
}
rendering_dpi_ = rendering_dpi;
return rendering_dpi_;
}
return source_dpi;
}

bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
Pix *pix = api->GetInputImage();
Pix *rendering_image_prev = rendering_image_;
int rendering_dpi_prev = rendering_dpi_;
Pix *pix = GetRenderingImage(api);
const char *filename = api->GetInputName();
int ppi = api->GetSourceYResolution();
int ppi = GetRenderingResolution(api);
if (!pix || ppi <= 0) {
return false;
}
Expand Down Expand Up @@ -913,12 +976,14 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
int jpg_quality;
api->GetIntVariable("jpg_quality", &jpg_quality);
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality)) {
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
return false;
}
AppendData(pdf_object, objsize);
AppendPDFObjectDIY(objsize);
delete[] pdf_object;
}
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
return true;
}

Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ Tesseract::Tesseract()
, BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer",
this->params())
, INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params())
, INT_MEMBER(rendering_dpi, 0, "Scaled input image resolution before rendering", this->params())
, INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", this->params())
, INT_MEMBER(min_characters_to_try, 50, "Specify minimum characters to try during OSD",
this->params())
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,7 @@ class TESS_API Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_pdf);
BOOL_VAR_H(textonly_pdf);
INT_VAR_H(jpg_quality);
INT_VAR_H(rendering_dpi);
INT_VAR_H(user_defined_dpi);
INT_VAR_H(min_characters_to_try);
STRING_VAR_H(unrecognised_char);
Expand Down
139 changes: 139 additions & 0 deletions unittest/pdfrenderer_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// (C) Copyright 2023, Tesseract Contributors.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <allheaders.h>
#include <tesseract/renderer.h>
#include <filesystem>
#include <string>

#include "include_gunit.h"

namespace tesseract {

static std::map<std::string, std::string> userdefined_dpi_variables = {
{"user_defined_dpi", "300"}};

class TessPDFRendererTest : public testing::Test {
protected:
static std::string TestDataNameToPath(const std::string &name) {
return file::JoinPath(TESTING_DIR, name);
}
static std::string TessdataPath() {
return TESSDATA_DIR;
}
static std::string TestPDFName(const std::string &suffix) {
return "/tmp/tesseract_pdf_renderer_test_phottest" + suffix;
}

static void AssertPDFSizeLT(const std::string &filename, int size) {
std::filesystem::path p = filename + ".pdf";
ASSERT_LT(std::filesystem::file_size(p), size);
}

static void AssertPDFRemove(const std::string &filename) {
ASSERT_EQ(std::remove((filename + ".pdf").c_str()), 0);
}

static bool initializeAPI(
TessBaseAPI &api, const std::map<std::string, std::string> &variables) {
EXPECT_EQ(api.Init(TESSDATA_DIR, "eng", OEM_LSTM_ONLY), 0);
for (const auto &[name, value] : variables) {
api.SetVariable(name.c_str(), value.c_str());
}
return true;
}

static bool ProcessAndRenderPages(
const std::string &input_filename, TessPDFRenderer *pdf_renderer,
const std::map<std::string, std::string> &variables) {
TessBaseAPI api;
initializeAPI(api, variables);
auto testdata_input_filename = TestDataNameToPath(input_filename);
EXPECT_TRUE(api.ProcessPages(testdata_input_filename.c_str(), TESSDATA_DIR,
1000, pdf_renderer));
api.End();
return pdf_renderer->happy();
}

static void RenderPDFAndAssertSize(
const std::string &image_file, const std::string &pdf_suffix,
bool text_only, int max_file_size,
const std::map<std::string, std::string> &variables = {}) {
auto pdf_name = TestPDFName(pdf_suffix);
auto pdf_renderer = std::make_unique<TessPDFRenderer>(
pdf_name.c_str(), "tessdata", text_only);
ASSERT_TRUE(
ProcessAndRenderPages(image_file, pdf_renderer.get(), variables));
AssertPDFSizeLT(pdf_name, max_file_size);
AssertPDFRemove(pdf_name);
}
};

// Test basic pdf rendering
TEST_F(TessPDFRendererTest, TestPDFRenderBasicTest) {
RenderPDFAndAssertSize("phototest_2.tif", "", false, 113000);
}

// Test pdf rendering with lower jpeg quality
TEST_F(TessPDFRendererTest, TestPDFRenderJPEGQualityTest) {
static std::map<std::string, std::string> variables = {{"jpg_quality", "40"}};
RenderPDFAndAssertSize("phototest_2.tif", "jpg_quality", false, 66000,
variables);
}

// Test pdf renderer text only
TEST_F(TessPDFRendererTest, TestPDFRenderTextOnlyTest) {
RenderPDFAndAssertSize("phototest_2.tif", "text_only", true, 3500);
}

// Test that pdf renderer generates a custom image resolution in the pdf export
TEST_F(TessPDFRendererTest, TestPDFRenderLowerResolutionTest) {
std::string pdf_name = TestPDFName("lower_resolution");
auto pdf_renderer =
std::make_unique<TessPDFRenderer>(pdf_name.c_str(), "tessdata", false);
pdf_renderer->SetRenderingResolution(110);
CHECK_OK(ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(),
userdefined_dpi_variables));
AssertPDFSizeLT(pdf_name, 35000);
AssertPDFRemove(pdf_name);
}

// Test that pdf renderer generates a custom image resolution in the pdf export
// with variable directive
TEST_F(TessPDFRendererTest, TestPDFLowerResolutionVariableTest) {
std::string pdf_name = TestPDFName("lower_resolution_variable");
static std::map<std::string, std::string> variables = {
{"rendering_dpi", "110"}};
variables.insert(begin(userdefined_dpi_variables),
end(userdefined_dpi_variables));
auto pdf_renderer =
std::make_unique<TessPDFRenderer>(pdf_name.c_str(), "tessdata", false);
CHECK_OK(
ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(), variables));
AssertPDFSizeLT(pdf_name, 35000);
AssertPDFRemove(pdf_name);
}

// Test that pdf renderer generates an alternate image in the pdf export
TEST_F(TessPDFRendererTest, TestPDFAlternateImageTest) {
std::string pdf_name = TestPDFName("alternate_image");
auto pdf_renderer =
std::make_unique<TessPDFRenderer>(pdf_name.c_str(), "tessdata", false);
auto alternate_image = pixRead(TestDataNameToPath("phototest.tif").c_str());
pdf_renderer->SetRenderingImage(alternate_image);
CHECK_OK(ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(),
std::map<std::string, std::string>()));
pixDestroy(&alternate_image);
AssertPDFSizeLT(pdf_name, 8000);
AssertPDFRemove(pdf_name);
}

} // namespace tesseract