Last active 3 months ago

As shown on https://youtu.be/P-Pr7Dy6mP8

MrGeneration's Avatar MrGeneration revised this gist 3 months ago. Go to revision

5 files changed, 479 insertions, 1 deletion

.env(file created)

@@ -0,0 +1,72 @@
1 + # The UID and GID of the user used to run paperless in the container. Set this
2 + # to your UID and GID on the host so that you have write access to the
3 + # consumption directory.
4 + USERMAP_UID=1001
5 + USERMAP_GID=1001
6 +
7 + # Database credentials
8 + POSTGRES_DB=paperless
9 + POSTGRES_USER=paperless
10 + POSTGRES_PASSWORD=paperless
11 +
12 + PAPERLESS_DBHOST=paperless-ngx-db
13 + PAPERLESS_DBNAME=$POSTGRES_DB
14 + PAPERLESS_DBUSER=$POSTGRES_USER
15 + PAPERLESS_DBPASS=$POSTGRES_PASSWORD
16 +
17 + # Additional languages to install for text recognition, separated by a
18 + # whitespace. Note that this is
19 + # different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
20 + # language used for OCR.
21 + # The container installs English, German, Italian, Spanish and French by
22 + # default.
23 + # See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster
24 + # for available languages.
25 + PAPERLESS_OCR_LANGUAGES=deu fra eng nld
26 +
27 + ###############################################################################
28 + # Paperless-specific settings #
29 + ###############################################################################
30 +
31 + # All settings defined in the paperless.conf.example can be used here. The
32 + # Docker setup does not use the configuration file.
33 + # A few commonly adjusted settings are provided below.
34 +
35 + # This is required if you will be exposing Paperless-ngx on a public domain
36 + # (if doing so please consider security measures such as reverse proxy)
37 + PAPERLESS_URL=https://paperless.domain.tld
38 + # Adjust this key if you plan to make paperless available publicly. It should
39 + # be a very long sequence of random characters. You don't need to remember it.
40 + PAPERLESS_SECRET_KEY=GENERATEME
41 +
42 + # Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
43 + PAPERLESS_TIME_ZONE=Europe/Berlin
44 +
45 + # The default language to use for OCR. Set this to the language most of your
46 + # documents are written in.
47 + PAPERLESS_OCR_LANGUAGE=deu
48 +
49 + # Set if accessing paperless via a domain subpath e.g. https://domain.com/PATHPREFIX and using a reverse-proxy like traefik or nginx
50 + #PAPERLESS_FORCE_SCRIPT_NAME=/PATHPREFIX
51 + #PAPERLESS_STATIC_URL=/PATHPREFIX/static/ # trailing slash required
52 +
53 + PAPERLESS_CONSUMER_ENABLE_BARCODES=true
54 +
55 + PAPERLESS_OCR_USER_ARGS= '{"continue_on_soft_render_error": true, "invalidate_digital_signatures": true}'
56 +
57 + PAPERLESS_USE_X_FORWARD_HOST=true
58 + PAPERLESS_CONSUMER_POLLING=10
59 + PAPERLESS_CONSUMER_POLLING_RETRY_COUNT=5
60 + PAPERLESS_CONSUMER_POLLING_DELAY=20
61 +
62 + PAPERLESS_ADMIN_USER=MyAmazingAdmin
63 + PAPERLESS_ADMIN_MAIL=alias@domain.tld
64 + PAPERLESS_ADMIN_PASSWORD="SuperSecure!"
65 +
66 + PAPERLESS_EMAIL_HOST=mail.domain.tld
67 + PAPERLESS_EMAIL_PORT=587
68 + PAPERLESS_EMAIL_HOST_USER=paperless@domain.tld
69 + PAPERLESS_EMAIL_HOST_PASSWORD="SuperSecureToo!"
70 + PAPERLESS_EMAIL_USE_TLS=false
71 + PAPERLESS_EMAIL_USE_SSL=true
72 +

README(file created)

@@ -0,0 +1,16 @@
1 + Last change: 2025-09-25 Marcel Herrguth
2 + As shown on https://youtu.be/P-Pr7Dy6mP8
3 + Attribution back to this GIST is required!
4 +
5 + Parts of this script are oriented from https://github.com/zammad/zammad/tree/develop/contrib/backup
6 + This script supports PostGreSQL (docker) stacks only and expects your media to be in a mounted folder.
7 +
8 + Restoration only works on a pre-installed Paperless NGX installation of the same version (or higher).
9 + During Restore, this script will remove the existing database and stop all relevant containers.
10 + Containers will be re-started after the restoration finished.
11 +
12 + COMMAND REFERENCE
13 + - backup: creates a new snapshot and backs up database and storage.
14 + - restore <snapshot>: Restore a given snapshot from your backup. If you omit the snapshot ID, latest will be used.
15 + - list_snapshots: List all available snapshot IDs.
16 + - verify_health <percentage>: Verify the health of your backup repository. If you omit percentage, 100% will be assumed.

docker-compose.yml(file created)

@@ -0,0 +1,104 @@
1 + services:
2 + paperless-ngx-broker:
3 + image: docker.io/library/redis:8.2-alpine
4 + container_name: paperless-ngx-broker
5 + hostname: paperless-ngx-broker
6 + restart: always
7 + networks:
8 + - paperless-ngx
9 + volumes:
10 + - ./data/redis:/data
11 +
12 + paperless-ngx-db:
13 + image: docker.io/library/postgres:17-alpine
14 + container_name: paperless-ngx-db
15 + hostname: paperless-ngx-db
16 + restart: always
17 + networks:
18 + - paperless-ngx
19 + volumes:
20 + - ./pg:/var/lib/postgresql/data
21 +
22 + paperless-ngx-webserver:
23 + image: ghcr.io/paperless-ngx/paperless-ngx:2.18.2
24 + restart: always
25 + container_name: paperless-ngx-webserver
26 + hostname: paperless-ngx-webserver
27 + depends_on:
28 + - paperless-ngx-db
29 + - paperless-ngx-broker
30 + - paperless-ngx-gotenberg
31 + - paperless-ngx-tika
32 + networks:
33 + - paperless-ngx
34 + - external_network
35 + volumes:
36 + - ./data/paperless:/usr/src/paperless/data
37 + - ./data/media:/usr/src/paperless/media
38 + - ./volume/_consume:/usr/src/paperless/consume:z
39 + - ./volume/_export:/usr/src/paperless/export:z
40 + ports:
41 + - "127.0.0.1:8000:8000"
42 + env_file: .env
43 + environment:
44 + PAPERLESS_REDIS: redis://paperless-ngx-broker:6379
45 + PAPERLESS_TIKA_ENABLED: 1
46 + PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://paperless-ngx-gotenberg:3000
47 + PAPERLESS_TIKA_ENDPOINT: http://paperless-ngx-tika:9998
48 +
49 + paperless-ngx-gotenberg:
50 + image: docker.io/gotenberg/gotenberg:8.22
51 + restart: always
52 + networks:
53 + - paperless-ngx
54 + container_name: paperless-ngx-gotenberg
55 + hostname: paperless-ngx-gotenberg
56 +
57 + # The gotenberg chromium route is used to convert .eml files. We do not
58 + # want to allow external content like tracking pixels or even javascript.
59 + # environment:
60 + # CHROMIUM_DISABLE_ROUTES: 1
61 + command:
62 + - 'gotenberg'
63 + - "--chromium-disable-javascript=true"
64 + - "--chromium-allow-list=file:///tmp/.*"
65 + # - "--chromium-disable-routes=true"
66 + # - "--chromium-restart-after=5"
67 + # - "--chromium-auto-start=true"
68 + # - "--chromium-ignore-certificate-errors=true"
69 + # - "--chromium-disable-web-security=true"
70 + # - '--chromium-allow-insecure-localhost=true'
71 + # - "--chromium-start-timeout=30s"
72 + # - "--uno-listener-start-timeout=180s"
73 + # - "--libreoffice-disable-routes=false"
74 + # - "--libreoffice-auto-start=true"
75 + # - "--libreoffice-restart-after=5"
76 + # - '--libreoffice-start-timeout=30s'
77 + # - '--api-timeout=3000s'
78 +
79 +
80 + paperless-ngx-tika:
81 + image: ghcr.io/paperless-ngx/tika:2.9.1-full
82 + container_name: paperless-ngx-tika
83 + hostname: paperless-ngx-tika
84 + restart: always
85 + networks:
86 + - paperless-ngx
87 +
88 + networks:
89 + paperless-ngx:
90 + name: paperless-ngx
91 + internal: true
92 + external_network:
93 + enable_ipv6: true
94 + ipam:
95 + config:
96 + - subnet: fd63:e614:1cf8:fb00::2:0/112
97 +
98 + # volumes:
99 + # consume:
100 + # driver: local
101 + # driver_opts:
102 + # type: nfs
103 + # o: addr=yamato.tha.vpn,nfsvers=4.1,nolock,soft,rw,async
104 + # device: :/volume1/import/_consume

init (file deleted)

@@ -1 +0,0 @@
1 - TODO

restic_paperless.sh(file created)

@@ -0,0 +1,287 @@
1 + #!/usr/bin/env bash
2 + #
3 + # 2025-09-25 Marcel Herrguth
4 + # As shown on https://youtu.be/P-Pr7Dy6mP8
5 + # Attribution back to this GIST is required!
6 + #
7 + # Parts of this script are oriented from https://github.com/zammad/zammad/tree/develop/contrib/backup
8 + # This script supports PostGreSQL (docker) stacks only and expects your media to be in a mounted folder.
9 + #
10 + # Restoration only works on a pre-installed Paperless NGX installation of the same version (or higher).
11 + # During Restore, this script will remove the existing database and stop all relevant containers.
12 + # Containers will be re-started after the restoration finished.
13 +
14 + #----- CONFIG -------------------------------------------------------------------------------#
15 + # Use either a directory or a supported repository type (like s3:https//domain.tld/bucket)
16 + BACKUP_REPOSITORY='s3:https://s3.domain.tld/resticsample'
17 +
18 + # Data directory where your Paperless NGX stores your files
19 + # If you're using volumes, use /var/lib/docker/volumes/paperless_*
20 + # Alternatively, use several folder-paths seperated by space if needed
21 + DATA_DIR='/opt/paperless-ngx/data/'
22 + # Path and filename of the environment file of your stack
23 + # This file will be evaluated for access credentials
24 + ENV_FILE='/opt/paperless-ngx/.env'
25 + # Paperless Stack name
26 + # Used as prefix for being able to
27 + STACK_NAME='paperless-ngx'
28 + # If you're not using PostGreSQL but SQlite, set the following to 'no'
29 + BACKUP_DATABASE='yes'
30 +
31 + # How many daily and hourly snapshot should stay available?
32 + HOLD_DAYS=14
33 + HOLD_HOURLY=1
34 +
35 + # Do you want to cause downtime during backup? (This is the safest backup way)
36 + STOP_DURING_BACKUP='yes'
37 + #----- CONFIG END ---------------------------------------------------------------------------#
38 +
39 + # ---- Magic --------------------------------------------------------------------------------#
40 + function pre_flight () {
41 + # Verify that we have all we need.
42 + if [[ ""$BACKUP_REPOSITORY"" == s3:* ]]; then
43 + if [[ -z "${AWS_ACCESS_KEY_ID}" || -z "${AWS_SECRET_ACCESS_KEY}" ]]; then
44 + echo "You have chosen to backup to S3 (it seems), but either AWS_ACCESS_KEY_ID and/or AWS_SECRET_ACCESS_KEY."
45 + exit 1
46 + fi
47 + else
48 + echo "This script has been tested with S3 only. This should be fine."
49 + echo "Ensure that the required environment information for restic are available as per their documentation:"
50 + echo "https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html"
51 + fi
52 +
53 + if [[ -z "${RESTIC_PASSWORD}" ]]; then
54 + echo "You forgot to set RESTIC_PASSWORD which is mandatory for your backup repository!"
55 + exit 1
56 + fi
57 +
58 + # Get all currently running NGX containers
59 + DC_DB=$(docker ps| grep "${STACK_NAME}-db" |cut -d " " -f1)
60 + DC_BROKER=$(docker ps| grep "${STACK_NAME}-broker" |cut -d " " -f1)
61 + DC_GOTENBERG=$(docker ps| grep "${STACK_NAME}-gotenberg" |cut -d " " -f1)
62 + DC_TIKA=$(docker ps| grep "${STACK_NAME}-tika" |cut -d " " -f1)
63 + DC_WEB=$(docker ps| grep "${STACK_NAME}-webserver" |cut -d " " -f1)
64 + }
65 +
66 + function start_paperless () {
67 + echo "# Starting Paperless"
68 + docker start $DC_WEB $DC_TIKA $DC_GOTENBERG $DC_BROKER
69 + }
70 +
71 + function stop_paperless () {
72 + echo "# Stopping Paperless"
73 + docker stop $DC_WEB $DC_TIKA $DC_GOTENBERG $DC_BROKER
74 +
75 + for i in {15..1}; do
76 + echo -ne "... Waiting $i seconds for the stack to stop.\r"; sleep 1
77 + done
78 + }
79 +
80 + function ensure_variable_set () {
81 + if [ -z $1 ]; then
82 + echo "ERROR: environment variable ${1} not set!"
83 + exit 1
84 + fi
85 + }
86 +
87 + function get_db_credentials () {
88 + if [ ! -f $ENV_FILE ]; then
89 + echo "ERROR: Could not find the configured environment file!"
90 + exit 1
91 + fi
92 +
93 + eval $(grep -E '^(POSTGRES_DB|POSTGRES_USER|POSTGRES_PASSWORD|USERMAP_UID|USERMAP_GID)=' "$ENV_FILE")
94 + }
95 +
96 + function kind_exit () {
97 + # We're nice to our admin and bring Zammad back up before exiting
98 + start_paperless
99 + exit 1
100 + }
101 +
102 + function delete_old_backups () {
103 + echo "# Invoking cleanup as per snapshot rules"
104 + restic -r "$BACKUP_REPOSITORY" forget --group-by '' --keep-hourly $HOLD_HOURLY --keep-daily $HOLD_DAYS --prune
105 + }
106 +
107 + function write_backup () {
108 + stop_paperless if "${STOP_DURING_BACKUP}x" == 'yesx'
109 + echo "# Creating postgresql backup..."
110 +
111 + docker exec $DC_DB pg_dump --dbname "${POSTGRES_DB}" \
112 + --username "${POSTGRES_USER}" \
113 + --no-privileges --no-owner > /tmp/paperless_db.psql
114 +
115 + state=$?
116 +
117 + if [ "${state}" == "1" ]; then
118 + echo -e "\n\n # ERROR(${state}) - Database credentials are wrong or database server configuration is invalid."
119 + echo -e " #-> BACKUP WAS NOT SUCCESSFUL"
120 +
121 + kind_exit
122 + exit 2
123 + fi
124 +
125 + restic -r "$BACKUP_REPOSITORY" migrate
126 + restic --no-scan --read-concurrency=25 --pack-size=128 --compression off -r "$BACKUP_REPOSITORY" backup \
127 + ${DATA_DIR} /tmp/paperless_db.psql
128 +
129 + state=$?
130 +
131 + # clean up temporary database dump
132 + rm -f /tmp/paperless_db.psql
133 +
134 + start_paperless if "${STOP_DURING_BACKUP}x" == 'yesx'
135 +
136 + if [ $state == '1' ]; then
137 + echo "# FATAL - Restic could not create the snapshot."
138 + elif [ $state == '3' ]; then
139 + echo "# WARNING - Files changed during snapshot creation; Snapshot potentially incomplete!"
140 + fi
141 +
142 + if [ $state -gt 0 ]; then
143 + echo "# BACKUP WAS NOT SUCCESSFUL"
144 + exit $state
145 + fi
146 + }
147 +
148 + function list_available_snapshots () {
149 + pre_flight
150 + echo "# Here's your currently available snapshots in your backup repository:"
151 + restic -r "$BACKUP_REPOSITORY" snapshots
152 + }
153 +
154 + function verify_backup_repository_health () {
155 + pre_flight
156 +
157 + VERIFY_PERCENTAGE="${1:-100}"
158 +
159 + echo "# Checking ${VERIFY_PERCENTAGE} of your repository ..."
160 + restic -r "$BACKUP_REPOSITORY" check --read-data-subset="${VERIFY_PERCENTAGE}%"
161 + }
162 +
163 + function get_snapshot_id () {
164 + if [ -n "${1}" ]; then
165 + RESTORE_SNAPSHOT_ID="${1}"
166 + else
167 + # User did not provide snapshot so we'll hard guess 'latest'
168 + RESTORE_SNAPSHOT_ID='latest'
169 +
170 + echo "... No Snapshot provided, guessing you want to restore 'latest'!"
171 + for i in {15..1}; do
172 + echo -ne "... You have $i seconds to abort.\r"; sleep 1
173 + done
174 + fi
175 + }
176 +
177 + function restore_backup () {
178 + stop_paperless
179 +
180 + echo "# ... Dropping current database ${POSTGRES_DB}"
181 +
182 + docker exec $DC_DB psql -U ${POSTGRES_USER} -c "\c postgres; DROP DATABASE IF EXISTS ${POSTGRES_DB}; CREATE DATABASE ${POSTGRES_DB} OWNED BY ${POSTGRES_USER};"
183 +
184 + echo "# Restoring PostgreSQL DB"
185 +
186 + # We're removing uncritical dump information that caused "ugly" error
187 + # messages on older script versions. These could safely be ignored.
188 + restic -r "$BACKUP_REPOSITORY" dump latest '/tmp/paperless_db.psql' | \
189 + sed '/^CREATE EXTENSION IF NOT EXISTS plpgsql/d'| \
190 + sed '/^COMMENT ON EXTENSION plpgsql/d'| \
191 + docker exec $DC_DB psql -U ${POSTGRES_USER} ${POSTGRES_DB}
192 +
193 + state=$?
194 +
195 + if [[ ("${state}" == "1") || ( "${state}" == "2") || ( "${state}" == "3") ]]; then
196 + # We're checking for critical restoration errors
197 + # It may not cover all possible errors which is out of scope of this script
198 + echo -e "\n\n # ERROR(${state}) - Database credentials are wrong or database server configuration is invalid."
199 + echo -e " #-> RESTORE WAS NOT SUCCESSFUL"
200 +
201 + kind_exit
202 + exit 2
203 + fi
204 +
205 + echo "# Restoring Files"
206 + restic -r "$BACKUP_REPOSITORY" restore $RESTORE_SNAPSHOT_ID --include "$DATA_DIR" --target /
207 +
208 + state=$?
209 +
210 + if [[ ($state == '1') || ($state == '2') ]]; then
211 + echo "# ERROR(${state}) - File restore reported an error."
212 + echo "- Check file permissions, and ensure Zammad IS NOT running, and try again."
213 + echo -e " \n# RESTORE WAS NOT SUCCESSFUL"
214 + exit 1
215 + fi
216 +
217 + echo "# Ensuring correct file permissions ..."
218 + chown -R ${USERMAP_UID}:${USERMAP_GID} ${DATA_DIR}
219 +
220 + start_paperless
221 + }
222 +
223 + function start_backup_message () {
224 + echo -e "\n# Backup script started - $(date)!\n"
225 + }
226 +
227 + function start_restore_message () {
228 + echo -e "\n# Restore script started - $(date)!\n"
229 + }
230 +
231 + function finished_backup_message () {
232 + echo -e "\n# Backup script finished; Check output! - $(date)!\n"
233 + }
234 +
235 + function finished_restore_message () {
236 + echo -e "\n# Restore script finished; Check output! - $(date)!\n"
237 + }
238 +
239 + function execute_backup () {
240 + pre_flight
241 + get_db_credentials
242 + start_backup_message
243 + write_backup
244 + delete_old_backups
245 + finished_backup_message
246 + }
247 +
248 + function execute_restoration () {
249 + pre_flight
250 + get_db_credentials
251 + start_restore_message
252 + get_snapshot_id $1
253 + restore_backup
254 + finished_restore_message
255 + }
256 +
257 + function command_reference () {
258 + echo "COMMAND REFERENCE"
259 + echo "- backup: creates a new snapshot and backs up database and storage."
260 + echo "- restore <snapshot>: Restore a given snapshot from your backup. If you omit the snapshot ID, latest will be used."
261 + echo "- list_snapshots: List all available snapshot IDs."
262 + echo "- verify_health <percentage>: Verify the health of your backup repository. If you omit percentage, 100% will be assumed."
263 + }
264 +
265 + # ---- Option part and control --------------------------------------------------------------#
266 +
267 + case "$1" in
268 + backup)
269 + execute_backup
270 + ;;
271 +
272 + restore)
273 + execute_restoration $2
274 + ;;
275 +
276 + list_snapshots)
277 + list_available_snapshots
278 + ;;
279 +
280 + verify_health)
281 + verify_backup_repository_health "$2"
282 + ;;
283 +
284 + *)
285 + command_reference
286 + ;;
287 + esac

MrGeneration's Avatar MrGeneration revised this gist 4 months ago. Go to revision

1 file changed, 1 insertion

init(file created)

@@ -0,0 +1 @@
1 + TODO
Newer Older