#!/usr/bin/env bash
#
# 2025-09-25 Marcel Herrguth
# As shown on https://youtu.be/P-Pr7Dy6mP8
# Attribution back to this GIST is required!
#
# Parts of this script are oriented from https://github.com/zammad/zammad/tree/develop/contrib/backup
# This script supports PostGreSQL (docker) stacks only and expects your media to be in a mounted folder.
#
# Restoration only works on a pre-installed Paperless NGX installation of the same version (or higher).
# During Restore, this script will remove the existing database and stop all relevant containers.
# Containers will be re-started after the restoration finished.

#----- CONFIG -------------------------------------------------------------------------------#
# Use either a directory or a supported repository type (like s3:https//domain.tld/bucket)
BACKUP_REPOSITORY='s3:https://s3.domain.tld/resticsample'

# Data directory where your Paperless NGX stores your files
# If you're using volumes, use /var/lib/docker/volumes/paperless_*
# Alternatively, use several folder-paths seperated by space if needed
DATA_DIR='/opt/paperless-ngx/data/'
# Path and filename of the environment file of your stack
# This file will be evaluated for access credentials
ENV_FILE='/opt/paperless-ngx/.env'
# Paperless Stack name
# Used as prefix for being able to
STACK_NAME='paperless-ngx'
# If you're not using PostGreSQL but SQlite, set the following to 'no'
BACKUP_DATABASE='yes'

# How many daily and hourly snapshot should stay available?
HOLD_DAYS=14
HOLD_HOURLY=1

# Do you want to cause downtime during backup? (This is the safest backup way)
STOP_DURING_BACKUP='yes'
#----- CONFIG END ---------------------------------------------------------------------------#

# ---- Magic --------------------------------------------------------------------------------#
function pre_flight () {
  # Verify that we have all we need.
  if [[ ""$BACKUP_REPOSITORY"" == s3:* ]]; then
    if [[ -z "${AWS_ACCESS_KEY_ID}" || -z "${AWS_SECRET_ACCESS_KEY}" ]]; then
      echo "You have chosen to backup to S3 (it seems), but either AWS_ACCESS_KEY_ID and/or AWS_SECRET_ACCESS_KEY."
      exit 1
    fi
  else
    echo "This script has been tested with S3 only. This should be fine."
    echo "Ensure that the required environment information for restic are available as per their documentation:"
    echo "https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html"
  fi

  if [[ -z "${RESTIC_PASSWORD}" ]]; then
    echo "You forgot to set RESTIC_PASSWORD which is mandatory for your backup repository!"
    exit 1
  fi

  # Get all currently running NGX containers
  DC_DB=$(docker ps| grep "${STACK_NAME}-db" |cut -d " " -f1)
  DC_BROKER=$(docker ps| grep "${STACK_NAME}-broker" |cut -d " " -f1)
  DC_GOTENBERG=$(docker ps| grep "${STACK_NAME}-gotenberg" |cut -d " " -f1)
  DC_TIKA=$(docker ps| grep "${STACK_NAME}-tika" |cut -d " " -f1)
  DC_WEB=$(docker ps| grep "${STACK_NAME}-webserver" |cut -d " " -f1)
}

function start_paperless () {
  echo "# Starting Paperless"
  docker start $DC_WEB $DC_TIKA $DC_GOTENBERG $DC_BROKER
}

function stop_paperless () {
  echo "# Stopping Paperless"
  docker stop $DC_WEB $DC_TIKA $DC_GOTENBERG $DC_BROKER

  for i in {15..1}; do
    echo -ne "... Waiting $i seconds for the stack to stop.\r"; sleep 1
  done
}

function ensure_variable_set () {
  if [ -z $1 ]; then
    echo "ERROR: environment variable ${1} not set!"
    exit 1
  fi
}

function get_db_credentials () {
  if [ ! -f $ENV_FILE ]; then
    echo "ERROR: Could not find the configured environment file!"
    exit 1
  fi

  eval $(grep -E '^(POSTGRES_DB|POSTGRES_USER|POSTGRES_PASSWORD|USERMAP_UID|USERMAP_GID)=' "$ENV_FILE")
}

function kind_exit () {
  # We're nice to our admin and bring Zammad back up before exiting
  start_paperless
  exit 1
}

function delete_old_backups () {
  echo "# Invoking cleanup as per snapshot rules"
  restic -r "$BACKUP_REPOSITORY" forget --group-by '' --keep-hourly $HOLD_HOURLY --keep-daily $HOLD_DAYS --prune
}

function write_backup () {
  stop_paperless if "${STOP_DURING_BACKUP}x" == 'yesx'
  echo "# Creating postgresql backup..."

  docker exec $DC_DB pg_dump --dbname "${POSTGRES_DB}" \
      --username "${POSTGRES_USER}" \
      --no-privileges --no-owner > /tmp/paperless_db.psql

  state=$?

  if [ "${state}" == "1" ]; then
    echo -e "\n\n # ERROR(${state}) - Database credentials are wrong or database server configuration is invalid."
    echo -e " #-> BACKUP WAS NOT SUCCESSFUL"

    kind_exit
    exit 2
  fi

  restic -r "$BACKUP_REPOSITORY" migrate
  restic --no-scan --read-concurrency=25 --pack-size=128 --compression off -r "$BACKUP_REPOSITORY" backup \
    ${DATA_DIR} /tmp/paperless_db.psql

  state=$?

  # clean up temporary database dump
  rm -f /tmp/paperless_db.psql

  start_paperless if "${STOP_DURING_BACKUP}x" == 'yesx'

  if [ $state == '1' ]; then
    echo "# FATAL - Restic could not create the snapshot."
  elif [ $state == '3' ]; then
    echo "# WARNING - Files changed during snapshot creation; Snapshot potentially incomplete!"
  fi

  if [ $state -gt 0 ]; then
    echo "# BACKUP WAS NOT SUCCESSFUL"
    exit $state
  fi
}

function list_available_snapshots () {
  pre_flight
  echo "# Here's your currently available snapshots in your backup repository:"
  restic -r "$BACKUP_REPOSITORY" snapshots
}

function verify_backup_repository_health () {
  pre_flight

  VERIFY_PERCENTAGE="${1:-100}"

  echo "# Checking ${VERIFY_PERCENTAGE} of your repository ..."
  restic -r "$BACKUP_REPOSITORY" check --read-data-subset="${VERIFY_PERCENTAGE}%"
}

function get_snapshot_id () {
  if [ -n "${1}" ]; then
    RESTORE_SNAPSHOT_ID="${1}"
  else
    # User did not provide snapshot so we'll hard guess 'latest'
    RESTORE_SNAPSHOT_ID='latest'

    echo "... No Snapshot provided, guessing you want to restore 'latest'!"
    for i in {15..1}; do
      echo -ne "... You have $i seconds to abort.\r"; sleep 1
    done
  fi
}

function restore_backup () {
  stop_paperless

  echo "# ... Dropping current database ${POSTGRES_DB}"

  docker exec $DC_DB psql -U ${POSTGRES_USER} -c "\c postgres; DROP DATABASE IF EXISTS ${POSTGRES_DB}; CREATE DATABASE ${POSTGRES_DB} OWNED BY ${POSTGRES_USER};"

  echo "# Restoring PostgreSQL DB"

  # We're removing uncritical dump information that caused "ugly" error
  # messages on older script versions. These could safely be ignored.
  restic -r "$BACKUP_REPOSITORY" dump latest '/tmp/paperless_db.psql' | \
  sed '/^CREATE EXTENSION IF NOT EXISTS plpgsql/d'| \
  sed '/^COMMENT ON EXTENSION plpgsql/d'| \
  docker exec $DC_DB psql -U ${POSTGRES_USER} ${POSTGRES_DB}

  state=$?

  if [[ ("${state}" == "1") || ( "${state}" == "2") || ( "${state}" == "3") ]]; then
    # We're checking for critical restoration errors
    # It may not cover all possible errors which is out of scope of this script
    echo -e "\n\n # ERROR(${state}) - Database credentials are wrong or database server configuration is invalid."
    echo -e " #-> RESTORE WAS NOT SUCCESSFUL"

    kind_exit
    exit 2
  fi

  echo "# Restoring Files"
  restic -r "$BACKUP_REPOSITORY" restore $RESTORE_SNAPSHOT_ID --include "$DATA_DIR" --target /

  state=$?

  if [[ ($state == '1') || ($state == '2') ]]; then
    echo "# ERROR(${state}) - File restore reported an error."
    echo "- Check file permissions, and ensure Zammad IS NOT running, and try again."
    echo -e " \n# RESTORE WAS NOT SUCCESSFUL"
    exit 1
  fi

  echo "# Ensuring correct file permissions ..."
  chown -R ${USERMAP_UID}:${USERMAP_GID} ${DATA_DIR}

  start_paperless
}

function start_backup_message () {
    echo -e "\n# Backup script started - $(date)!\n"
}

function start_restore_message () {
    echo -e "\n# Restore script started - $(date)!\n"
}

function finished_backup_message () {
    echo -e "\n# Backup script finished; Check output! - $(date)!\n"
}

function finished_restore_message () {
    echo -e "\n# Restore script finished; Check output! - $(date)!\n"
}

function execute_backup () {
  pre_flight
  get_db_credentials
  start_backup_message
  write_backup
  delete_old_backups
  finished_backup_message
}

function execute_restoration () {
  pre_flight
  get_db_credentials
  start_restore_message
  get_snapshot_id $1
  restore_backup
  finished_restore_message
}

function command_reference () {
  echo "COMMAND REFERENCE"
  echo "- backup: creates a new snapshot and backs up database and storage."
  echo "- restore <snapshot>: Restore a given snapshot from your backup. If you omit the snapshot ID, latest will be used."
  echo "- list_snapshots: List all available snapshot IDs."
  echo "- verify_health <percentage>: Verify the health of your backup repository. If you omit percentage, 100% will be assumed."
}

# ---- Option part and control --------------------------------------------------------------#

case "$1" in
  backup)
    execute_backup
    ;;

  restore)
    execute_restoration $2
    ;;

  list_snapshots)
    list_available_snapshots
    ;;

  verify_health)
    verify_backup_repository_health "$2"
    ;;

  *)
    command_reference
    ;;
esac