diff --git a/setup.py b/setup.py index bc8fd43..6b62439 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def _run(self, command): setup( # include data files name="amazon-textract-textractor", - version="1.8.1", + version="1.8.2", license="Apache 2.0", description="A package to use AWS Textract services.", url="https://github.com/aws-samples/amazon-textract-textractor", diff --git a/textractor/__init__.py b/textractor/__init__.py index bea822f..9f627f8 100644 --- a/textractor/__init__.py +++ b/textractor/__init__.py @@ -1,3 +1,3 @@ -__version__ = "1.8.1" +__version__ = "1.8.2" from .textractor import Textractor diff --git a/textractor/textractor.py b/textractor/textractor.py index 8fcc587..068829c 100644 --- a/textractor/textractor.py +++ b/textractor/textractor.py @@ -135,7 +135,7 @@ def _get_document_images_from_path(self, filepath: str) -> List[Image.Image]: file_obj = s3_client.get_object(Bucket=bucket, Key=key).get("Body").read() if filepath.lower().endswith(".pdf"): if IS_PDF_RENDERING_ENABLED: - images = rasterize_pdf(bytearray(file_obj)) + images = rasterize_pdf(file_obj) else: raise MissingDependencyException( "pdf2image is not installed. If you do not plan on using visualizations you can skip image generation using save_image=False in your function call."