diff --git a/containers/paperless/Dockerfile b/containers/paperless/Dockerfile index 24b22da..a7b4e65 100644 --- a/containers/paperless/Dockerfile +++ b/containers/paperless/Dockerfile @@ -1,130 +1,156 @@ +# syntax=docker/dockerfile:1 # Paperless-ngx — self-hosted document management # Built from source via forge mirror of paperless-ngx/paperless-ngx -# Based on upstream Dockerfile (multi-stage: Node frontend + Python backend + s6-overlay) +# Closely follows upstream Dockerfile structure with git clone instead of COPY ARG CONTAINER_APP_VERSION=v2.20.13 ############################################### -# Frontend Build +# Stage 1: Clone source (reused by later stages) ############################################### -FROM node:20-slim AS frontend-builder +FROM docker.io/library/alpine:3.22 AS source ARG CONTAINER_APP_VERSION -RUN apt-get update && apt-get install --no-install-recommends -y git ca-certificates && rm -rf /var/lib/apt/lists/* - +RUN apk add --no-cache git RUN git clone --depth 1 --branch ${CONTAINER_APP_VERSION} \ https://forge.ops.eblu.me/mirrors/paperless-ngx.git /src +############################################### +# Stage 2: Compile frontend +############################################### +FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim AS compile-frontend + +COPY --from=source /src/src-ui /src/src-ui WORKDIR /src/src-ui -RUN corepack enable && corepack prepare pnpm@latest --activate -RUN pnpm install --frozen-lockfile -RUN ./node_modules/.bin/ng build --configuration production + +RUN set -eux \ + && npm update -g pnpm \ + && npm install -g corepack@latest \ + && corepack enable \ + && pnpm install + +RUN set -eux \ + && ./node_modules/.bin/ng build --configuration production ############################################### -# s6-overlay base +# Stage 3: s6-overlay base ############################################### -FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-base +FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-overlay-base -ARG S6_OVERLAY_VERSION=3.2.1.0 -ARG TARGETARCH - -ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz /tmp -ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${TARGETARCH}.tar.xz /tmp -RUN tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz \ - && tar -C / -Jxpf /tmp/s6-overlay-${TARGETARCH}.tar.xz \ - && rm -f /tmp/s6-overlay-*.tar.xz +WORKDIR /usr/src/s6 ENV S6_BEHAVIOUR_IF_STAGE2_FAILS=2 \ - S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 + S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 \ + S6_VERBOSITY=1 \ + PATH=/command:$PATH + +ARG TARGETARCH +ARG TARGETVARIANT +ARG S6_OVERLAY_VERSION=3.2.1.0 + +RUN set -eux \ + && apt-get update \ + && apt-get install --yes --quiet --no-install-recommends curl xz-utils \ + && S6_ARCH="" \ + && if [ "${TARGETARCH}${TARGETVARIANT}" = "amd64" ]; then S6_ARCH="x86_64"; \ + elif [ "${TARGETARCH}${TARGETVARIANT}" = "arm64" ]; then S6_ARCH="aarch64"; fi \ + && if [ -z "${S6_ARCH}" ]; then echo "Error: Cannot determine arch"; exit 1; fi \ + && curl --fail --silent --show-error --location --remote-name-all --parallel \ + "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz" \ + "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz.sha256" \ + "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${S6_ARCH}.tar.xz" \ + "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${S6_ARCH}.tar.xz.sha256" \ + && sha256sum --check ./*.sha256 \ + && tar --directory / -Jxpf s6-overlay-noarch.tar.xz \ + && tar --directory / -Jxpf s6-overlay-${S6_ARCH}.tar.xz \ + && rm ./*.tar.xz ./*.sha256 \ + && apt-get --yes purge curl xz-utils \ + && apt-get --yes autoremove --purge \ + && rm -rf /var/lib/apt/lists/* + +# Copy rootfs (s6 service definitions, init scripts) +COPY --from=source /src/docker/rootfs / ############################################### -# Main Application +# Stage 4: Main application ############################################### -FROM s6-base AS production +FROM s6-overlay-base AS main-app ARG CONTAINER_APP_VERSION +ARG DEBIAN_FRONTEND=noninteractive +ARG TARGETARCH +ARG JBIG2ENC_VERSION=0.30 -# Runtime system dependencies -RUN apt-get update && apt-get install --no-install-recommends -y \ - # General - curl gosu tzdata gettext file libmagic1 media-types zlib1g \ - # PDF / document processing - ghostscript gnupg qpdf poppler-utils imagemagick icc-profiles-free \ - # OCR - tesseract-ocr tesseract-ocr-eng unpaper pngquant \ - # Database client - postgresql-client \ - # XML - libxml2 libxslt1.1 \ - # Barcode - libzbar0 \ - # Fonts - fonts-liberation \ - # Build deps (purged after pip install) - build-essential pkg-config libpq-dev \ +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONWARNINGS="ignore:::django.http.response:517" \ + PNGX_CONTAINERIZED=1 \ + UV_LINK_MODE=copy \ + UV_CACHE_DIR=/cache/uv/ + +# Runtime packages +RUN set -eux \ + && apt-get update \ + && apt-get install --yes --quiet --no-install-recommends \ + curl gosu tzdata fonts-liberation gettext ghostscript gnupg \ + icc-profiles-free imagemagick postgresql-client \ + tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \ + tesseract-ocr-ita tesseract-ocr-spa unpaper pngquant jbig2dec \ + libxml2 libxslt1.1 qpdf file libmagic1 media-types zlib1g \ + libzbar0 poppler-utils \ + && curl --fail --silent --show-error --location --remote-name-all \ + "https://github.com/paperless-ngx/builder/releases/download/jbig2enc-trixie-v${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb" \ + && dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \ + && cp /etc/ImageMagick-6/paperless-policy.xml /etc/ImageMagick-6/policy.xml \ + && rm --force *.deb \ && rm -rf /var/lib/apt/lists/* -# Install jbig2enc from upstream prebuilt deb -RUN curl -fsSL "https://github.com/paperless-ngx/builder/releases/download/jbig2enc-0.29/jbig2enc_0.29-1_$(dpkg --print-architecture).deb" -o /tmp/jbig2enc.deb \ - && dpkg -i /tmp/jbig2enc.deb \ - && rm /tmp/jbig2enc.deb \ - || true +WORKDIR /usr/src/paperless/src/ -WORKDIR /usr/src/paperless +# Python dependencies +COPY --from=source /src/pyproject.toml /src/uv.lock /usr/src/paperless/src/ -# Clone source -RUN apt-get update && apt-get install --no-install-recommends -y git ca-certificates \ - && git clone --depth 1 --branch ${CONTAINER_APP_VERSION} \ - https://forge.ops.eblu.me/mirrors/paperless-ngx.git /tmp/paperless-src \ - && cp -r /tmp/paperless-src/src ./src \ - && cp -r /tmp/paperless-src/docker/rootfs / \ - && cp /tmp/paperless-src/docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml || true \ - && cp /tmp/paperless-src/Pipfile* . 2>/dev/null || true \ - && cp /tmp/paperless-src/pyproject.toml /tmp/paperless-src/uv.lock . \ - && rm -rf /tmp/paperless-src \ - && apt-get purge -y git \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* +RUN --mount=type=cache,target=${UV_CACHE_DIR},id=python-cache \ + set -eux \ + && apt-get update \ + && apt-get install --yes --quiet --no-install-recommends \ + build-essential default-libmysqlclient-dev pkg-config \ + && uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt \ + && uv pip install --system --no-python-downloads --python-preference system --requirements requirements.txt \ + && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \ + && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \ + && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt_tab \ + && apt-get --yes purge build-essential default-libmysqlclient-dev pkg-config \ + && apt-get --yes autoremove --purge \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Copy frontend build -COPY --from=frontend-builder /src/src/documents/static/frontend/ /usr/src/paperless/src/documents/static/frontend/ +# Copy backend source +COPY --from=source /src/src ./ -# Install Python dependencies -ENV UV_LINK_MODE=copy -RUN uv export --frozen --no-dev --no-editable --no-emit-project \ - --output-file /tmp/requirements.txt 2>/dev/null \ - && uv pip install --system -r /tmp/requirements.txt \ - && rm /tmp/requirements.txt \ - || uv pip install --system -e ".[postgres]" +# Copy compiled frontend +COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/ -# Download NLTK data -RUN python3 -c "import nltk; nltk.download('snowball_data', download_dir='/usr/share/nltk_data'); nltk.download('stopwords', download_dir='/usr/share/nltk_data'); nltk.download('punkt_tab', download_dir='/usr/share/nltk_data')" 2>/dev/null || true +# Create user and finalize +RUN set -eux \ + && addgroup --gid 1000 paperless \ + && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ + && mkdir -p /usr/src/paperless/data /usr/src/paperless/media \ + /usr/src/paperless/consume /usr/src/paperless/export \ + && chown -R paperless:paperless /usr/src/paperless \ + && s6-setuidgid paperless python3 manage.py collectstatic --clear --no-input --link \ + && s6-setuidgid paperless python3 manage.py compilemessages -# Create paperless user -RUN groupadd -g 1000 paperless \ - && useradd -u 1000 -g paperless -d /usr/src/paperless paperless - -# Collect static files -RUN cd src && python3 manage.py collectstatic --noinput 2>/dev/null || true -RUN cd src && python3 manage.py compilemessages 2>/dev/null || true - -# Purge build dependencies -RUN apt-get purge -y build-essential pkg-config libpq-dev \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* - -# Volumes -VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"] +VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", \ + "/usr/src/paperless/consume", "/usr/src/paperless/export"] +ENTRYPOINT ["/init"] EXPOSE 8000 -HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ - CMD curl -f http://localhost:8000 || exit 1 +HEALTHCHECK --interval=30s --timeout=10s --retries=5 \ + CMD [ "curl", "-fs", "-S", "-L", "--max-time", "2", "http://localhost:8000" ] LABEL org.opencontainers.image.title="Paperless-ngx" LABEL org.opencontainers.image.description="Self-hosted document management system" LABEL org.opencontainers.image.version="${CONTAINER_APP_VERSION}" LABEL org.opencontainers.image.source="https://forge.eblu.me/eblume/blumeops" LABEL org.opencontainers.image.vendor="blumeops" - -ENTRYPOINT ["/init"]