diff --git a/Makefile.am b/Makefile.am index c07567ec25..4d88045c65 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1241,6 +1241,7 @@ check_PROGRAMS += paragraphs_test if !DISABLED_LEGACY_ENGINE check_PROGRAMS += params_model_test endif # !DISABLED_LEGACY_ENGINE +check_PROGRAMS += pdfrenderer_test check_PROGRAMS += progress_test check_PROGRAMS += qrsequence_test check_PROGRAMS += recodebeam_test @@ -1469,6 +1470,10 @@ progress_test_CPPFLAGS = $(unittest_CPPFLAGS) progress_test_LDFLAGS = $(LEPTONICA_LIBS) progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) +pdfrenderer_test_SOURCES = unittest/pdfrenderer_test.cc +pdfrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS) +pdfrenderer_test_LDADD = $(TESS_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS) + qrsequence_test_SOURCES = unittest/qrsequence_test.cc qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS) qrsequence_test_LDADD = $(TESS_LIBS) diff --git a/include/tesseract/renderer.h b/include/tesseract/renderer.h index a8745a09ee..9dbd51e4dc 100644 --- a/include/tesseract/renderer.h +++ b/include/tesseract/renderer.h @@ -241,6 +241,26 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { // we load a custom PDF font from this location. TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly = false); + // Reset rendering image and dpi to previous state. Destroy scaled rendered + // image if exists. + void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev); + + /** + * Specifies an alternate image to render with the extracted text. + * It must be called after BeginDocument and before AddImage. + */ + void SetRenderingImage(Pix *rendering_image) { + rendering_image_ = rendering_image; + } + + /** + * Specifies the expected rendering resolution. + * If not set, rendering_dpi api params will be used, else the source image + * resolution. + */ + void SetRenderingResolution(int rendering_dpi) { + rendering_dpi_ = rendering_dpi; + } protected: bool BeginDocumentHandler() override; @@ -258,12 +278,24 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { std::vector pages_; // object number for every /Page object std::string datadir_; // where to find the custom font bool textonly_; // skip images if set + Pix *rendering_image_; // Image to render with the extracted text + int rendering_dpi_; // Resolution of the rendering_image // Bookkeeping only. DIY = Do It Yourself. void AppendPDFObjectDIY(size_t objectsize); // Bookkeeping + emit data. void AppendPDFObject(const char *data); // Create the /Contents object for an entire page. char *GetPDFTextObjects(TessBaseAPI *api, double width, double height); + // Renderers can call this to get the actual image to render with extracted + // text. This method returns: + // - the rendering image set by the caller or + // - the input image scaled to the rendering_dpi field if defined or + // - the input image from the api otherwise + Pix *GetRenderingImage(TessBaseAPI *api); + // Resolution of the rendering image either set manually by the caller or with + // the rendering_dpi api parameter. + int GetRenderingResolution(TessBaseAPI *api); + // Turn an image into a PDF object. Only transcode if we have to. static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum, char **pdf_object, long int *pdf_object_size, diff --git a/src/api/pdfrenderer.cpp b/src/api/pdfrenderer.cpp index e84b063a64..9f28b4a04e 100644 --- a/src/api/pdfrenderer.cpp +++ b/src/api/pdfrenderer.cpp @@ -192,7 +192,10 @@ static const int kMaxBytesPerCodepoint = 20; * PDF Renderer interface implementation **********************************************************************/ TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly) - : TessResultRenderer(outputbase, "pdf"), datadir_(datadir) { + : TessResultRenderer(outputbase, "pdf") + , datadir_(datadir) + , rendering_image_(nullptr) + , rendering_dpi_(0) { obj_ = 0; textonly_ = textonly; offsets_.push_back(0); @@ -329,7 +332,12 @@ static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) { } char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double height) { - double ppi = api->GetSourceYResolution(); + double input_image_ppi = api->GetSourceYResolution(); + double ppi = GetRenderingResolution(api); + double scale = 1; + if (input_image_ppi > 0) { + scale = ppi / input_image_ppi; + } // These initial conditions are all arbitrary and will be overwritten double old_x = 0.0, old_y = 0.0; @@ -379,6 +387,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { int x1, y1, x2, y2; res_it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); + x1 *= scale; y1 *= scale; x2 *= scale; y2 *= scale; ClipBaseline(ppi, x1, y1, x2, y2, &line_x1, &line_y1, &line_x2, &line_y2); } @@ -413,6 +422,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double { int word_x1, word_y1, word_x2, word_y2; res_it->Baseline(RIL_WORD, &word_x1, &word_y1, &word_x2, &word_y2); + word_x1 *= scale; word_y1 *= scale; word_x2 *= scale; word_y2 *= scale; GetWordBaseline(writing_direction, ppi, height, word_x1, word_y1, word_x2, word_y2, line_x1, line_y1, line_x2, line_y2, &x, &y, &word_length); } @@ -827,10 +837,63 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int obj return true; } +void TessPDFRenderer::ResetRenderingState(Pix *rendering_image_prev, + int rendering_dpi_prev) { + if (rendering_image_ != rendering_image_prev) { + pixDestroy(&rendering_image_); + rendering_image_ = rendering_image_prev; + } + rendering_dpi_ = rendering_dpi_prev; +} + +Pix *TessPDFRenderer::GetRenderingImage(TessBaseAPI *api) { + if (!rendering_image_) { + Pix *source_image = api->GetInputImage(); + int source_dpi = api->GetSourceYResolution(); + if (!source_image || source_dpi <= 0) { + return nullptr; + } + + int rendering_dpi = GetRenderingResolution(api); + if (rendering_dpi != source_dpi) { + float scale = (float)rendering_dpi / (float)source_dpi; + rendering_image_ = pixScale(source_image, scale, scale); + } else { + return source_image; + } + } + return rendering_image_; +} + +int TessPDFRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) { + if (rendering_dpi_) { + return rendering_dpi_; + } + int source_dpi = api->GetSourceYResolution(); + int rendering_dpi; + if (api->GetIntVariable("rendering_dpi", &rendering_dpi) && + rendering_dpi > 0 && rendering_dpi != source_dpi) { + if (rendering_dpi < kMinCredibleResolution || + rendering_dpi > kMaxCredibleResolution) { +#if !defined(NDEBUG) + tprintf( + "Warning: User defined rendering dpi %d is outside of expected range " + "(%d - %d)!\n", + rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution); +#endif + } + rendering_dpi_ = rendering_dpi; + return rendering_dpi_; + } + return source_dpi; +} + bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) { - Pix *pix = api->GetInputImage(); + Pix *rendering_image_prev = rendering_image_; + int rendering_dpi_prev = rendering_dpi_; + Pix *pix = GetRenderingImage(api); const char *filename = api->GetInputName(); - int ppi = api->GetSourceYResolution(); + int ppi = GetRenderingResolution(api); if (!pix || ppi <= 0) { return false; } @@ -913,12 +976,14 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) { int jpg_quality; api->GetIntVariable("jpg_quality", &jpg_quality); if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality)) { + ResetRenderingState(rendering_image_prev, rendering_dpi_prev); return false; } AppendData(pdf_object, objsize); AppendPDFObjectDIY(objsize); delete[] pdf_object; } + ResetRenderingState(rendering_image_prev, rendering_dpi_prev); return true; } diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index bb645aba82..34c4892dbc 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -352,6 +352,7 @@ Tesseract::Tesseract() , BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer", this->params()) , INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()) + , INT_MEMBER(rendering_dpi, 0, "Scaled input image resolution before rendering", this->params()) , INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", this->params()) , INT_MEMBER(min_characters_to_try, 50, "Specify minimum characters to try during OSD", this->params()) diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index c03e045742..5154020455 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -906,6 +906,7 @@ class TESS_API Tesseract : public Wordrec { BOOL_VAR_H(tessedit_create_pdf); BOOL_VAR_H(textonly_pdf); INT_VAR_H(jpg_quality); + INT_VAR_H(rendering_dpi); INT_VAR_H(user_defined_dpi); INT_VAR_H(min_characters_to_try); STRING_VAR_H(unrecognised_char); diff --git a/unittest/pdfrenderer_test.cc b/unittest/pdfrenderer_test.cc new file mode 100644 index 0000000000..d8427413c8 --- /dev/null +++ b/unittest/pdfrenderer_test.cc @@ -0,0 +1,139 @@ +// (C) Copyright 2023, Tesseract Contributors. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "include_gunit.h" + +namespace tesseract { + +static std::map userdefined_dpi_variables = { + {"user_defined_dpi", "300"}}; + +class TessPDFRendererTest : public testing::Test { +protected: + static std::string TestDataNameToPath(const std::string &name) { + return file::JoinPath(TESTING_DIR, name); + } + static std::string TessdataPath() { + return TESSDATA_DIR; + } + static std::string TestPDFName(const std::string &suffix) { + return "/tmp/tesseract_pdf_renderer_test_phottest" + suffix; + } + + static void AssertPDFSizeLT(const std::string &filename, int size) { + std::filesystem::path p = filename + ".pdf"; + ASSERT_LT(std::filesystem::file_size(p), size); + } + + static void AssertPDFRemove(const std::string &filename) { + ASSERT_EQ(std::remove((filename + ".pdf").c_str()), 0); + } + + static bool initializeAPI( + TessBaseAPI &api, const std::map &variables) { + EXPECT_EQ(api.Init(TESSDATA_DIR, "eng", OEM_LSTM_ONLY), 0); + for (const auto &[name, value] : variables) { + api.SetVariable(name.c_str(), value.c_str()); + } + return true; + } + + static bool ProcessAndRenderPages( + const std::string &input_filename, TessPDFRenderer *pdf_renderer, + const std::map &variables) { + TessBaseAPI api; + initializeAPI(api, variables); + auto testdata_input_filename = TestDataNameToPath(input_filename); + EXPECT_TRUE(api.ProcessPages(testdata_input_filename.c_str(), TESSDATA_DIR, + 1000, pdf_renderer)); + api.End(); + return pdf_renderer->happy(); + } + + static void RenderPDFAndAssertSize( + const std::string &image_file, const std::string &pdf_suffix, + bool text_only, int max_file_size, + const std::map &variables = {}) { + auto pdf_name = TestPDFName(pdf_suffix); + auto pdf_renderer = std::make_unique( + pdf_name.c_str(), "tessdata", text_only); + ASSERT_TRUE( + ProcessAndRenderPages(image_file, pdf_renderer.get(), variables)); + AssertPDFSizeLT(pdf_name, max_file_size); + AssertPDFRemove(pdf_name); + } +}; + +// Test basic pdf rendering +TEST_F(TessPDFRendererTest, TestPDFRenderBasicTest) { + RenderPDFAndAssertSize("phototest_2.tif", "", false, 113000); +} + +// Test pdf rendering with lower jpeg quality +TEST_F(TessPDFRendererTest, TestPDFRenderJPEGQualityTest) { + static std::map variables = {{"jpg_quality", "40"}}; + RenderPDFAndAssertSize("phototest_2.tif", "jpg_quality", false, 66000, + variables); +} + +// Test pdf renderer text only +TEST_F(TessPDFRendererTest, TestPDFRenderTextOnlyTest) { + RenderPDFAndAssertSize("phototest_2.tif", "text_only", true, 3500); +} + +// Test that pdf renderer generates a custom image resolution in the pdf export +TEST_F(TessPDFRendererTest, TestPDFRenderLowerResolutionTest) { + std::string pdf_name = TestPDFName("lower_resolution"); + auto pdf_renderer = + std::make_unique(pdf_name.c_str(), "tessdata", false); + pdf_renderer->SetRenderingResolution(110); + CHECK_OK(ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(), + userdefined_dpi_variables)); + AssertPDFSizeLT(pdf_name, 35000); + AssertPDFRemove(pdf_name); +} + +// Test that pdf renderer generates a custom image resolution in the pdf export +// with variable directive +TEST_F(TessPDFRendererTest, TestPDFLowerResolutionVariableTest) { + std::string pdf_name = TestPDFName("lower_resolution_variable"); + static std::map variables = { + {"rendering_dpi", "110"}}; + variables.insert(begin(userdefined_dpi_variables), + end(userdefined_dpi_variables)); + auto pdf_renderer = + std::make_unique(pdf_name.c_str(), "tessdata", false); + CHECK_OK( + ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(), variables)); + AssertPDFSizeLT(pdf_name, 35000); + AssertPDFRemove(pdf_name); +} + +// Test that pdf renderer generates an alternate image in the pdf export +TEST_F(TessPDFRendererTest, TestPDFAlternateImageTest) { + std::string pdf_name = TestPDFName("alternate_image"); + auto pdf_renderer = + std::make_unique(pdf_name.c_str(), "tessdata", false); + auto alternate_image = pixRead(TestDataNameToPath("phototest.tif").c_str()); + pdf_renderer->SetRenderingImage(alternate_image); + CHECK_OK(ProcessAndRenderPages("phototest_2.tif", pdf_renderer.get(), + std::map())); + pixDestroy(&alternate_image); + AssertPDFSizeLT(pdf_name, 8000); + AssertPDFRemove(pdf_name); +} + +} // namespace tesseract \ No newline at end of file