From 2245cb7052f18f37e2232eb446b5708f631a66d1 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 13:55:32 +0200 Subject: [PATCH 01/17] try multi-arch build --- .github/workflows/ci-build-manual-crf.yml | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci-build-manual-crf.yml b/.github/workflows/ci-build-manual-crf.yml index 6606f0bfd1..3b86dbcf70 100644 --- a/.github/workflows/ci-build-manual-crf.yml +++ b/.github/workflows/ci-build-manual-crf.yml @@ -31,17 +31,21 @@ jobs: - name: Create more disk space run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v4 - - name: Build and push - id: docker_build - uses: mr-smithers-excellent/docker-build-push@v6 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v1 with: - dockerfile: Dockerfile.crf username: ${{ secrets.DOCKERHUB_USERNAME_LFOPPIANO }} password: ${{ secrets.DOCKERHUB_TOKEN_LFOPPIANO }} - image: lfoppiano/grobid - registry: docker.io - pushImage: true - tags: | + - name: Build and push + uses: docker/build-push-action@v3 + with: + context: . + platforms: linux/amd64,linux/arm/v7 + push: true + tags: | latest-develop, latest-crf${{ github.event.inputs.suffix != '' && '-' || '' }}${{ github.event.inputs.suffix }} - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} From 2e34a08b728897b1286fd695b9ab88b1b72becdc Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 14:03:55 +0200 Subject: [PATCH 02/17] Specify the correct Dockerfile --- .github/workflows/ci-build-manual-crf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-manual-crf.yml b/.github/workflows/ci-build-manual-crf.yml index 3b86dbcf70..6e476f1ef8 100644 --- a/.github/workflows/ci-build-manual-crf.yml +++ b/.github/workflows/ci-build-manual-crf.yml @@ -44,7 +44,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v3 with: - context: . + file: Dockerfile.crf platforms: linux/amd64,linux/arm/v7 push: true tags: | From c5a4fd52abe2ac8b204b98b7861c58172451895b Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 14:15:13 +0200 Subject: [PATCH 03/17] switch to eclipse-temurin JDK 17 docker image --- Dockerfile.crf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 1c255124ea..d90a0769c5 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -13,7 +13,7 @@ # ------------------- # build builder image # ------------------- -FROM openjdk:17-jdk-slim as builder +FROM eclipse-temurin:17.0.12_7-jdk as builder USER root @@ -63,7 +63,7 @@ RUN rm -rf grobid-source # ------------------- # build runtime image # ------------------- -FROM openjdk:17-slim +FROM eclipse-temurin:17.0.12_7-jre RUN apt-get update && \ apt-get -y upgrade && \ From 406dd74d2f21d975dadce45542cbaaecbfdd1a77 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 14:36:12 +0200 Subject: [PATCH 04/17] attempt to fix the issue with SSL --- Dockerfile.crf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index d90a0769c5..ba507e0ea4 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -13,7 +13,7 @@ # ------------------- # build builder image # ------------------- -FROM eclipse-temurin:17.0.12_7-jdk as builder +FROM eclipse-temurin:17.0.12_7-jdk-focal as builder USER root @@ -63,7 +63,7 @@ RUN rm -rf grobid-source # ------------------- # build runtime image # ------------------- -FROM eclipse-temurin:17.0.12_7-jre +FROM eclipse-temurin:17.0.12_7-jre-focal RUN apt-get update && \ apt-get -y upgrade && \ From 55b01083e40058f5e689cf8c4d059142f7a6e8c7 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 14:41:18 +0200 Subject: [PATCH 05/17] simplify --- .github/workflows/ci-build-manual-crf.yml | 2 +- Dockerfile.crf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-build-manual-crf.yml b/.github/workflows/ci-build-manual-crf.yml index 6e476f1ef8..7006f5cd66 100644 --- a/.github/workflows/ci-build-manual-crf.yml +++ b/.github/workflows/ci-build-manual-crf.yml @@ -45,7 +45,7 @@ jobs: uses: docker/build-push-action@v3 with: file: Dockerfile.crf - platforms: linux/amd64,linux/arm/v7 + platforms: linux/amd64,linux/arm/v8 push: true tags: | latest-develop, latest-crf${{ github.event.inputs.suffix != '' && '-' || '' }}${{ github.event.inputs.suffix }} diff --git a/Dockerfile.crf b/Dockerfile.crf index ba507e0ea4..13bd692825 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -13,7 +13,7 @@ # ------------------- # build builder image # ------------------- -FROM eclipse-temurin:17.0.12_7-jdk-focal as builder +FROM eclipse-temurin:17-jdk-focal as builder USER root @@ -63,7 +63,7 @@ RUN rm -rf grobid-source # ------------------- # build runtime image # ------------------- -FROM eclipse-temurin:17.0.12_7-jre-focal +FROM eclipse-temurin:17-jre-focal RUN apt-get update && \ apt-get -y upgrade && \ From ec46df89883185449ca270da226eec2dee759020 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 14:55:50 +0200 Subject: [PATCH 06/17] use arm64 --- .github/workflows/ci-build-manual-crf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-manual-crf.yml b/.github/workflows/ci-build-manual-crf.yml index 7006f5cd66..8b486dbfb6 100644 --- a/.github/workflows/ci-build-manual-crf.yml +++ b/.github/workflows/ci-build-manual-crf.yml @@ -45,7 +45,7 @@ jobs: uses: docker/build-push-action@v3 with: file: Dockerfile.crf - platforms: linux/amd64,linux/arm/v8 + platforms: linux/amd64,linux/arm64 push: true tags: | latest-develop, latest-crf${{ github.event.inputs.suffix != '' && '-' || '' }}${{ github.event.inputs.suffix }} From 5a534dc33dfcc22a23743e147da93b68be359cf6 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 14 Sep 2024 17:40:46 +0200 Subject: [PATCH 07/17] fix tags --- .github/workflows/ci-build-manual-crf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-manual-crf.yml b/.github/workflows/ci-build-manual-crf.yml index 8b486dbfb6..e70a2964f6 100644 --- a/.github/workflows/ci-build-manual-crf.yml +++ b/.github/workflows/ci-build-manual-crf.yml @@ -48,4 +48,4 @@ jobs: platforms: linux/amd64,linux/arm64 push: true tags: | - latest-develop, latest-crf${{ github.event.inputs.suffix != '' && '-' || '' }}${{ github.event.inputs.suffix }} + lfoppiano/grobid:latest-develop, lfoppiano/grobid:latest-crf${{ github.event.inputs.suffix != '' && '-' || '' }}${{ github.event.inputs.suffix }} From 81ac1dc252fa597063ffa75f45783c921aadf928 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 20 Sep 2024 10:02:06 +0200 Subject: [PATCH 08/17] add some documentation about arm --- doc/Grobid-docker.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/Grobid-docker.md b/doc/Grobid-docker.md index 7b447a0cec..e777c1bbf9 100644 --- a/doc/Grobid-docker.md +++ b/doc/Grobid-docker.md @@ -77,7 +77,7 @@ It is then possible to select other Deep Learning models for other processing st ## CRF-only image -The process for retrieving and running the image is as follow: +The process for retrieving and running the image is as follows: - Pull the image from docker HUB (check the [latest version number](https://hub.docker.com/r/lfoppiano/grobid/tags)): @@ -115,6 +115,24 @@ Access the service: Grobid web services are then available as described in the [service documentation](https://grobid.readthedocs.io/en/latest/Grobid-service/). +### ARM64 + +Since version 0.8.1 the CRF-only image is available for both x86_64 and arm64. +The `lfoppiano/grobid:{latest_grobid_version}` support both architectures by default. + +To run it on arm64, use the following command: + +```bash +docker run --ulimit core=0 --platform linux/amd64 -p 8070:8070 --name grobid lfoppiano/grobid:{latest_grobid_version} +``` + +As discussed [here](https://github.com/kermitt2/grobid/issues/1119#issuecomment-2362204879), it is mandatory to provide the parameter `--platform linux/amd64` to force the usage of amd64 architecture and avoid the raising of the rosetta error: + +``` +rosetta error: failed to open elf at /lib64/ld-linux-x86-64.so.2 +``` + +**NOTE**: The arm64 image has been tested only on `macOs` (not linux `arm64`). ## Configure using the yaml config file From 9cc7285743033da65597cdef37290bfa77be4233 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Fri, 20 Sep 2024 22:12:40 +0200 Subject: [PATCH 09/17] add detail about --init --- doc/Grobid-docker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/Grobid-docker.md b/doc/Grobid-docker.md index e777c1bbf9..7517e03e8d 100644 --- a/doc/Grobid-docker.md +++ b/doc/Grobid-docker.md @@ -123,7 +123,7 @@ The `lfoppiano/grobid:{latest_grobid_version}` support both architectures by def To run it on arm64, use the following command: ```bash -docker run --ulimit core=0 --platform linux/amd64 -p 8070:8070 --name grobid lfoppiano/grobid:{latest_grobid_version} +docker run --ulimit core=0 --platform linux/amd64 --init -p 8070:8070 --name grobid lfoppiano/grobid:{latest_grobid_version} ``` As discussed [here](https://github.com/kermitt2/grobid/issues/1119#issuecomment-2362204879), it is mandatory to provide the parameter `--platform linux/amd64` to force the usage of amd64 architecture and avoid the raising of the rosetta error: From 2ff7a64e49e9abe2c0fdb96a2d7adf5bc6bd5497 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 29 Sep 2024 14:59:05 +0200 Subject: [PATCH 10/17] set vfork for arm --- Dockerfile.crf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 13bd692825..19c203c017 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -80,7 +80,12 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . -ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" +ARG PROCESS_MECHANISM="" +RUN if [ "$(uname -m)" = "aarch64" ]; then \ + PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork"; \ + fi + +ENV GROBID_SERVICE_OPTS "$PROCESS_MECHANISM -Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" CMD ["./grobid-service/bin/grobid-service"] From a298010b70722b7550a7bb1439717dcba8fa8fe6 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 29 Sep 2024 16:16:42 +0200 Subject: [PATCH 11/17] try to set a different fork mechanism for arm --- Dockerfile.crf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 19c203c017..82bca0b464 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -80,8 +80,8 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . -ARG PROCESS_MECHANISM="" -RUN if [ "$(uname -m)" = "aarch64" ]; then \ +ARG TARGETARCH +RUN if [ "$TARGETARCH" = "arm64" ]; then \ PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork"; \ fi From 59029595e5987ce44b15ce6bb678ec98e4d5563a Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 29 Sep 2024 17:47:46 +0200 Subject: [PATCH 12/17] try to fix this variable --- Dockerfile.crf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 82bca0b464..451131817f 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -80,13 +80,13 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . +ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" + ARG TARGETARCH RUN if [ "$TARGETARCH" = "arm64" ]; then \ - PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork"; \ + ENV PGROBID_SERVICE_OPTS = "-Djdk.lang.Process.launchMechanism=vfork ${PGROBID_SERVICE_OPTS}"; \ fi -ENV GROBID_SERVICE_OPTS "$PROCESS_MECHANISM -Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" - CMD ["./grobid-service/bin/grobid-service"] ARG GROBID_VERSION From 1c1ae9fc3269356502622b104471fd3c238d89c3 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 29 Sep 2024 21:22:52 +0200 Subject: [PATCH 13/17] another attempt --- Dockerfile.crf | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 451131817f..b19949292b 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -82,10 +82,14 @@ COPY --from=builder /opt/grobid . ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" -ARG TARGETARCH RUN if [ "$TARGETARCH" = "arm64" ]; then \ - ENV PGROBID_SERVICE_OPTS = "-Djdk.lang.Process.launchMechanism=vfork ${PGROBID_SERVICE_OPTS}"; \ - fi + export PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork"; \ + else \ + export PROCESS_MECHANISM=""; \ + fi && \ + echo "PROCESS_MECHANISM=$PROCESS_MECHANISM" >> /etc/environment + +ENV GROBID_SERVICE_OPTS "$PROCESS_MECHANISM -Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" CMD ["./grobid-service/bin/grobid-service"] From efa5dd3b6e58d7d5e78dbfdec0145d576a3cef3e Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sun, 29 Sep 2024 22:05:18 +0200 Subject: [PATCH 14/17] improve env vars --- Dockerfile.crf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index b19949292b..84436443c7 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -80,8 +80,6 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . -ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" - RUN if [ "$TARGETARCH" = "arm64" ]; then \ export PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork"; \ else \ @@ -89,7 +87,10 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \ fi && \ echo "PROCESS_MECHANISM=$PROCESS_MECHANISM" >> /etc/environment -ENV GROBID_SERVICE_OPTS "$PROCESS_MECHANISM -Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" +RUN echo "BAO" \ + echo "$PROCESS_MECHANISM" + +ENV GROBID_SERVICE_OPTS "$PROCESS_MECHANISM-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" CMD ["./grobid-service/bin/grobid-service"] From e9accbd51c1e58f70af4c5258692ad2c1c4033b6 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 30 Sep 2024 07:39:03 +0200 Subject: [PATCH 15/17] minor --- Dockerfile.crf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 84436443c7..3f550734a2 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -81,14 +81,13 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . RUN if [ "$TARGETARCH" = "arm64" ]; then \ - export PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork"; \ + export PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork "; \ else \ export PROCESS_MECHANISM=""; \ fi && \ echo "PROCESS_MECHANISM=$PROCESS_MECHANISM" >> /etc/environment -RUN echo "BAO" \ - echo "$PROCESS_MECHANISM" +RUN echo "Selected process spawning mechanism: $PROCESS_MECHANISM" ENV GROBID_SERVICE_OPTS "$PROCESS_MECHANISM-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED" From 42436ad172ee8fb86870c70cd341cdde51903fd0 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 30 Sep 2024 08:23:22 +0200 Subject: [PATCH 16/17] minor adjust --- Dockerfile.crf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 3f550734a2..3f92b771e2 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -80,7 +80,7 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . -RUN if [ "$TARGETARCH" = "arm64" ]; then \ +RUN if [ "$TARGETARCH" = "linux/arm64" ]; then \ export PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork "; \ else \ export PROCESS_MECHANISM=""; \ From 7ea5f74602f9282e3b3a20682ea20b355dfc13b0 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 30 Sep 2024 09:37:17 +0200 Subject: [PATCH 17/17] another test --- Dockerfile.crf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Dockerfile.crf b/Dockerfile.crf index 3f92b771e2..dcb3386dbf 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -80,12 +80,14 @@ WORKDIR /opt/grobid COPY --from=builder /opt/grobid . -RUN if [ "$TARGETARCH" = "linux/arm64" ]; then \ - export PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork "; \ +ARG TARGETARCH +ARG PROCESS_MECHANISM + +RUN if [ "$TARGETARCH" = "arm64" ]; then \ + PROCESS_MECHANISM="-Djdk.lang.Process.launchMechanism=vfork "; \ else \ - export PROCESS_MECHANISM=""; \ - fi && \ - echo "PROCESS_MECHANISM=$PROCESS_MECHANISM" >> /etc/environment + PROCESS_MECHANISM=""; \ + fi RUN echo "Selected process spawning mechanism: $PROCESS_MECHANISM"