diff --git a/README.md b/README.md index 61c56c9..46ff3a5 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,10 @@ We also provide some utility scripts for working with `PDF`, `PS`, and `EPS` fil - `eps2pdf.sh ` convert the `EPS` file `` to `PDF` - `filterPdf.sh ` transform a document (either in [PostScript](https://en.wikipedia.org/wiki/PostScript)/`PS`, `EPS`, or `PDF` format) to `PDF` and include as many of the fonts used inside the document into the final `PDF`. This allows to produce a `PDF` from a `.ps` file `` which should display correctly on as many computers as possible. +- `filterPdfExact.sh ` does the same as `filderPdf.sh`, except that it does not re-encode the included images. - `sudo` is a pseudo-`sudo` command: Inside a Docker container, we don't need `sudo`. However, if you have a script or something that calls plain `sudo` (without additional arguments) just with a to-be-sudoed command, this script will emulate a `sudo`. By doing nothing. +- `downscalePdf.sh {resolution}` makes a pdf document smaller by downscaling all included images (to the specified resolution). +- `findNonASCIIChars.sh ` finds non-ASCII characters in a document. In `.tex` documents, such characters may pose problems. ## 4. License diff --git a/image/Dockerfile b/image/Dockerfile index 5ef92d9..25505c5 100644 --- a/image/Dockerfile +++ b/image/Dockerfile @@ -45,8 +45,8 @@ RUN export LANG=C.UTF-8 &&\ texlive-science=2015.2016* \ texlive-xetex=2015.2016* &&\ # install pandoc - apt-get install -f -y pandoc \ - pandoc-citeproc &&\ + apt-get install -f -y pandoc=1.16*\ + pandoc-citeproc=0.9* &&\ # free huge amount of unused space apt-get purge -f -y make-doc \ texlive-fonts-extra-doc \ diff --git a/image/scripts/downscalePdf.sh b/image/scripts/downscalePdf.sh new file mode 100644 index 0000000..5d001e1 --- /dev/null +++ b/image/scripts/downscalePdf.sh @@ -0,0 +1,58 @@ +#!/bin/bash + + +# Make a pdf File Smaller by downscaling all included images + +# strict error handling +set -o pipefail # trace ERR through pipes +set -o errtrace # trace ERR through 'time command' and other functions +set -o nounset # set -u : exit the script if you try to use an uninitialized variable +set -o errexit # set -e : exit the script if any statement returns a non-true return value + + +echo "Make a pdf File Smaller by downscaling all included images." + +source="$1" +name="${source%%.*}" +dest="$name.pdf" + +if [ -f "$dest" ]; then + useSource="$name.original.pdf" + echo "Creating backup '$useSource' of '$dest'." + cp "$dest" "$useSource" +else + useSource="$source" +fi + +resolution=${2:-} + +if [[ -n "$resolution" ]] +then + echo "The resolution '$resolution' was specified." +else +resolution=192 + echo "No resolution was specified, using '$resolution'." +fi + +echo "Downscaling '$useSource' to '$dest'." + +gs -q -dEmbedAllFonts=true -dSubsetFonts=true -dCompressFonts=true \ + -dOptimize=true \ + -dPreserveCopyPage=false -dPreserveEPSInfo=false -dPreserveHalftoneInfo=false \ + -dPreserveOPIComments=false -dPreserveOverprintSettings=false \ + -dPreserveSeparation=false -dPreserveDeviceN=false \ + -dMaxBitmap=2147483647 \ + -dDownsampleMonoImages=true -dDownsampleGrayImages=true -dDownsampleColorImages=true \ + -dColorImageDownsampleType=/Bicubic -dGrayImageDownsampleType=/Bicubic \ + -dMonoImageDownsampleType=/Bicubic \ + -dColorImageResolution=$resolution -dGrayImageResolution=$resolution -dMonoImageResolution=$resolution \ + -dFastWebView=false \ + -dNOPAUSE -dQUIET -dBATCH -dSAFER -sDEVICE=pdfwrite -dAutoRotatePages=/PageByPage \ + -sOutputFile="$dest" "$useSource" \ + -c .setpdfwrite "<> setdistillerparams" + + +chmod 777 "$dest" || true + +echo "Finished creating downsampled, smaller version." + diff --git a/image/scripts/filterPdfExact.sh b/image/scripts/filterPdfExact.sh new file mode 100644 index 0000000..4759ef0 --- /dev/null +++ b/image/scripts/filterPdfExact.sh @@ -0,0 +1,15 @@ +#!/bin/bash - + +# This script filters a PDF file and attempts to include +# as many fonts as possible. It works like filterPdf, but +# avoids image re-encoding. + +# strict error handling +set -o pipefail # trace ERR through pipes +set -o errtrace # trace ERR through 'time command' and other functions +set -o nounset # set -u : exit the script if you try to use an uninitialized variable +set -o errexit # set -e : exit the script if any statement returns a non-true return value + +scriptDir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +"$scriptDir/__filterPdf.sh" "$1" "-dAutoFilterColorImages=false -dAutoFilterGrayImages=false -dColorImageFilter=/FlateEncode -dGrayImageFilter=/FlateEncode -dColorConversionStrategy=/LeaveColorUnchanged" diff --git a/image/scripts/findNonASCIIChars.sh b/image/scripts/findNonASCIIChars.sh new file mode 100644 index 0000000..955ed89 --- /dev/null +++ b/image/scripts/findNonASCIIChars.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# strict error handling +set -o pipefail # trace ERR through pipes +set -o errtrace # trace ERR through 'time command' and other functions +set -o nounset # set -u : exit the script if you try to use an uninitialized variable +set -o errexit # set -e : exit the script if any statement returns a non-true return value + +# Find non-ASCII characters which may cause problems in tex documents. +grep --color='auto' -P -n '[^\x00-\x7F]' "$1"