Skip to content

Commit

Permalink
Change default .repair(...) setting h/t @Laubeee
Browse files Browse the repository at this point in the history
Now passes "default" rather than "prepress" to Ghostscript's
`-dPDFSETTINGS` parameter.

Also makes that setting modifiable via `.repair(setting=...)`, where the
value is one of `"default"`, `"prepress"`, `"printer"`, or `"ebook"`.

See #874 and https://ghostscript.com/docs/9.54.0/VectorDevices.htm
  • Loading branch information
jsvine committed Jul 31, 2024
1 parent 6c1bbd4 commit 48cab3f
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format

## [0.11.3] - Unreleased

### Changed

- Change default setting `pdfplumber.repair(...)` passes to Ghostscript's `-dPDFSETTINGS` parameter, from `prepress` to `default`, and make that setting modifiable via `.repair(setting=...)`, where the value is one of `"default"`, `"prepress"`, `"printer"`, or `"ebook"` (h/t @Laubeee). ([#874](https://github.com/jsvine/pdfplumber/issues/874))

### Fixed

- Fix error on getting `.annots`/`.hyperlinks` from `CroppedPage` (due to missing `.rotation` and `.initial_doctop` attributes) (h/t @Safrone). ([#1171](https://github.com/jsvine/pdfplumber/issues/1171) + [e5737d2](https://github.com/jsvine/pdfplumber/commit/e5737d2))
Expand Down
7 changes: 5 additions & 2 deletions pdfplumber/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ._typing import T_num, T_obj_list
from .container import Container
from .page import Page
from .repair import _repair
from .repair import T_repair_setting, _repair
from .structure import PDFStructTree, StructTreeMissing
from .utils import resolve_and_decode

Expand Down Expand Up @@ -72,12 +72,15 @@ def open(
strict_metadata: bool = False,
repair: bool = False,
gs_path: Optional[Union[str, pathlib.Path]] = None,
repair_setting: T_repair_setting = "default",
) -> "PDF":

stream: Union[BufferedReader, BytesIO]

if repair:
stream = _repair(path_or_fp, password=password, gs_path=gs_path)
stream = _repair(
path_or_fp, password=password, gs_path=gs_path, setting=repair_setting
)
stream_is_external = False
# Although the original file has a path,
# the repaired version does not
Expand Down
10 changes: 7 additions & 3 deletions pdfplumber/repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import shutil
import subprocess
from io import BufferedReader, BytesIO
from typing import Optional, Union
from typing import Literal, Optional, Union

T_repair_setting = Literal["default", "prepress", "printer", "ebook", "screen"]


def _repair(
path_or_fp: Union[str, pathlib.Path, BufferedReader, BytesIO],
password: Optional[str] = None,
gs_path: Optional[Union[str, pathlib.Path]] = None,
setting: T_repair_setting = "default",
) -> BytesIO:

executable = (
Expand All @@ -29,7 +32,7 @@ def _repair(
"-o",
"-",
"-sDEVICE=pdfwrite",
"-dPDFSETTINGS=/prepress",
f"-dPDFSETTINGS=/{setting}",
]

if password:
Expand Down Expand Up @@ -62,8 +65,9 @@ def repair(
outfile: Optional[Union[str, pathlib.Path]] = None,
password: Optional[str] = None,
gs_path: Optional[Union[str, pathlib.Path]] = None,
setting: T_repair_setting = "default",
) -> Optional[BytesIO]:
repaired = _repair(path_or_fp, password, gs_path=gs_path)
repaired = _repair(path_or_fp, password, gs_path=gs_path, setting=setting)
if outfile:
with open(outfile, "wb") as f:
f.write(repaired.read())
Expand Down
12 changes: 12 additions & 0 deletions tests/test_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,18 @@ def test_repair_to_file(self):
char = page.chars[0]
assert char["bottom"] < page.height

def test_repair_setting(self):
path = os.path.join(HERE, "pdfs/malformed-from-issue-932.pdf")
with tempfile.NamedTemporaryFile("wb") as out:
pdfplumber.repair(path, outfile=out.name)
size_default = os.stat(out.name).st_size

with tempfile.NamedTemporaryFile("wb") as out:
pdfplumber.repair(path, outfile=out.name, setting="prepress")
size_prepress = os.stat(out.name).st_size

assert size_default > size_prepress

def test_repair_password(self):
path = os.path.join(HERE, "pdfs/password-example.pdf")
with pdfplumber.open(path, repair=True, password="test") as pdf:
Expand Down

0 comments on commit 48cab3f

Please sign in to comment.