commit 05f759719f18b26449cd4d19e1d1d5fa342b78d5
parent a3d7c0b8abb3b0bf8ece84b811127ffc0f73e322
Author: Christian Grothoff <christian@grothoff.org>
Date: Sun, 20 Apr 2025 14:41:35 +0200
work on backup/restore logic
Diffstat:
13 files changed, 115 insertions(+), 50 deletions(-)
diff --git a/README b/README
@@ -79,13 +79,13 @@ server-side (see admin-logs/pixel/03-borg.txt), start the daily
backups via:
```
-$ ./start-borg-backups.sh
+$ ./start-borg-backups.sh $DEPLOYMENT
```
This will make a backup basically everything relevant to the
deployment, **except** the exchange online signing keys. The
backup will in particular include the system configuration
-and a full (xz-compressed) snapshot of the database. Thus,
+and a full (gzip-compressed) snapshot of the database. Thus,
the backups should also suffice to diagnose problems.
Backups are set to retain daily snapshots of the last 7 days,
@@ -93,13 +93,23 @@ weekly snapshots for the last 4 weeks, and monthly snapshots
for the last 6 months.
+### Backup (right now)
+
+To run a backup "immediately" (instead of the daily regular
+backups), use:
+
+```
+$ ./backup.sh $DEPLOYMENT
+```
+
+
### Rebooting (into a new kernel)
This should be done via the 'reboot' playbook which can
be invoked via the
```
-$ ./reboot.sh $HOSTNAME
+$ ./reboot.sh $DEPLOYMENT
```
script. The reboot playbook first stops all Taler services,
@@ -108,17 +118,6 @@ restore to another system in case the host does not come back
online cleanly.
-### Running the import/export Playbooks (TOPS-only)
-
-This is used to manually import/export wire transfers from/to the
-bank:
-
-```
-$ ./export.sh
-$ ./import.sh $FILE.xml
-```
-
-
### Testing Locally
With podman and ansible installed locally one can run:
diff --git a/backup.sh b/backup.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+set -eu
+
+if [ -z ${1:-} ]
+then
+ echo "Call with 'spec' or another host/group to select target"
+ exit 1
+fi
+
+ansible-playbook \
+ --inventory inventories/default \
+ --limit "$1" \
+ playbooks/backup.yml
+
+exit 0
diff --git a/inventories/host_vars/spec/tops-public.yml b/inventories/host_vars/spec/tops-public.yml
@@ -1,6 +1,13 @@
# Public variables for the Taler Operations AG (TOPS) deployment
# Deploy challenger?
DEPLOY_CHALLENGER: true
+# Disable restore from backup? MUST be set to "false" once in production!
+# This forces a backup to be provided *if* there is no database on the
+# target system already. If such a database exists, we will NOT restore
+# any backup even if this is 'false'. If no database exists on the target
+# system and this option is 'false', then a backup must have been provided
+# at the originating host (you get get it using the 'restore.sh' script).
+DISABLE_RESTORE_BACKUP: false
# Use EBICS? (starts libeufin-nexus-fetch/submit services)
USE_EBICS: false
# Main domain name.
diff --git a/playbooks/backup.yml b/playbooks/backup.yml
@@ -1,5 +1,6 @@
---
- name: Backup GNU Taler Databases
hosts: all
+ any_errors_fatal: true
roles:
- role: backup
diff --git a/playbooks/borg-start.yml b/playbooks/borg-start.yml
@@ -1,6 +1,7 @@
---
- name: Start backups with borg
hosts: all
+ any_errors_fatal: true
roles:
- borg-start
vars:
diff --git a/playbooks/pixel-borg.yml b/playbooks/pixel-borg.yml
@@ -1,5 +1,6 @@
---
- name: Setup Borg repository on spec to receive backups from pixel
hosts: all
+ any_errors_fatal: true
roles:
- pixel_borg
diff --git a/playbooks/sanctionlist-check.yml b/playbooks/sanctionlist-check.yml
@@ -1,5 +1,6 @@
---
- name: Import a sanction list an run all records against it
hosts: all
+ any_errors_fatal: true
roles:
- exchange-sanctionlist-import
diff --git a/playbooks/setup.yml b/playbooks/setup.yml
@@ -1,6 +1,7 @@
---
- name: Deploy GNU Taler
hosts: all
+ any_errors_fatal: true
pre_tasks:
- name: "Fail if secrets are not defined"
ansible.builtin.fail:
diff --git a/restore.sh b/restore.sh
@@ -33,4 +33,4 @@ fi
borg extract \
--list \
"${REPO}::${LATEST}" \
- root/postgres-backup.sql.xz
+ root/postgres-backup.sql.gz
diff --git a/roles/borg-ssh-export/tasks/main.yml b/roles/borg-ssh-export/tasks/main.yml
@@ -3,7 +3,7 @@
apt:
name:
- borgbackup
- - xz-utils
+ - gzip
state: latest
when: ansible_os_family == 'Debian'
diff --git a/roles/borg-start/templates/root/bin/borg-backup.sh b/roles/borg-start/templates/root/bin/borg-backup.sh
@@ -9,7 +9,16 @@ trap 'echo $( date ) Backup interrupted >&2; exit 2' INT TERM
info "Dumping database"
-sudo -u postgres pg_dumpall --clean | xz -T0 > postgres-backup.sql.xz
+sudo -u postgres pg_dumpall > postgres-backup.sql
+db_exit=$?
+
+# Note: I actually benchmarked (!) this on *out* SQL data.
+# zstd was fastest, but gzip was smallest
+# (tested: gzip, bzip, zstd, lzip, xz)
+gzip postgres-backup.sql || exit 1
+
+echo "Database snapshot created:"
+ls -al postgres-backup.sql.gz
info "Starting backup"
@@ -31,16 +40,14 @@ borg create \
::'{hostname}-{now}' \
/etc \
/root \
- /var/lib/libeufin-bank \
/var/lib/libeufin-nexus \
- /var/lib/taler-auditor \
/var/lib/taler-exchange
backup_exit=$?
info "Removing database dump"
-rm postgres-backup.sql.xz
+rm postgres-backup.sql.gz
info "Pruning repository"
@@ -71,6 +78,7 @@ compact_exit=$?
# use highest exit code as global exit code
global_exit=$(( backup_exit > prune_exit ? backup_exit : prune_exit ))
global_exit=$(( compact_exit > global_exit ? compact_exit : global_exit ))
+global_exit=$(( db_exit > global_exit ? db_exit : global_exit ))
if [ ${global_exit} -eq 0 ]; then
info "Backup, Prune, and Compact finished successfully"
diff --git a/roles/database/tasks/main.yml b/roles/database/tasks/main.yml
@@ -10,22 +10,27 @@
- Restart postgresql
when: ansible_os_family == 'Debian'
-- name: Make sure PostgreSQL is started if trying to import
+- name: Ensure PostgreSQL is started and enabled
systemd:
name: postgresql
state: started
- when: ansible_local.taler_backup_import is defined
+ enabled: true
-- name: Collect info about databases
+- name: Collect database information
become: true
become_user: postgres
community.postgresql.postgresql_info:
filter:
- "databases*"
register: database_info
- when: ansible_local.taler_backup_import is defined
-- name: If import triggered then verify database is not already existent
+- name: Check if exchange database already exists
+ become: true
+ become_user: postgres
+ ansible.builtin.set_fact:
+ exchange_db_exists: "{{ 'taler-exchange' in database_info.databases.keys() }}"
+
+- name: Check if versioning schema exists
become: true
become_user: postgres
community.postgresql.postgresql_query:
@@ -33,42 +38,62 @@
db: taler-exchange
query:
SELECT schema_name FROM information_schema.schemata WHERE schema_name = '_v';
- register: DATABASE_EXISTS
- when: ansible_local.taler_backup_import is defined and database_info.databases == "taler-exchange"
+ register: schema_check
+ when: exchange_db_exists | bool
+
+- name: Set versioning schema existence fact
+ ansible.builtin.set_fact:
+ versioning_schema_exists: "{{ schema_check.rowcount | default(0) > 0 }}"
+ when: exchange_db_exists | bool
+
+# Check if the local backup file exists
+- name: Check if postgres backup file exists locally
+ ansible.builtin.stat:
+ path: postgres-backup.sql.gz
+ delegate_to: localhost
+ register: backup_file_status
-- name: Fail if trying to import and db exists
+- name: Set local backup existence fact
+ ansible.builtin.set_fact:
+ local_backup_exists: "{{ backup_file_status.stat.exists }}"
+
+- name: Fail if trying to import and versioning schema exists
fail: msg="Import fact set but _v schema exists"
- when: ansible_local.taler_backup_import is defined and DATABASE_EXISTS.statusmessage is defined and DATABASE_EXISTS.statusmessage == "SELECT 1"
+ when:
+ - DISABLE_RESTORE_BACKUP
+ - v_schema_exists | default(false) | bool
+ - local_backup_exists | bool
+
+# Note: the postgres-backup.sql.gz is a symbolic link in Git.
+# The target of that symbolic link is created via the 'restore.sh' script.
+- name: Upload database backup file to server if restoring from backup
+ copy:
+ src: postgres-backup.sql.gz
+ dest: /tmp/postgres-backup.sql.gz
+ owner: postgres
+ group: postgres
+ mode: "0400"
+ when:
+ - local_backup_exists | bool
-- name: Create empty taler-exchange database
+- name: Restore database from backup if restoring from backup
become: true
become_user: postgres
community.postgresql.postgresql_db:
login_user: postgres
db: taler-exchange
- state: present
- when: ansible_local.taler_backup_import is defined
+ state: restore
+ target: /tmp/postgres-backup-sql.gz
+ notify:
+ - Restart postgresql
+ when:
+ - local_backup_exists | bool
-- name: Restore Exchange backup if import fact is set
+- name: Create empty taler-exchange database (if first deployment)
become: true
become_user: postgres
community.postgresql.postgresql_db:
login_user: postgres
db: taler-exchange
- state: restore
- target: /tmp/taler-exchange-backup.sql.xz
- notify:
- - Restart postgresql
- when: ansible_local.taler_backup_import is defined
-
-- name: Remove "taler_backup_import" fact so import does not run again
- file:
- path: /etc/ansible/facts.d/taler_backup_import.fact
- state: absent
- when: ansible_local.taler_backup_import is defined
-
-- name: Make sure PostgreSQL is started and enabled
- systemd:
- name: postgresql
- state: started
- enabled: true
+ state: present
+ when: DISABLE_RESTORE_BACKUP | bool
diff --git a/start-borg-backups.sh b/start-borg-backups.sh
@@ -8,6 +8,11 @@ then
exit 1
fi
+if [ -z ${1:-} ]
+then
+ echo "Call with 'spec' or another host/group to select target"
+ exit 1
+fi
ansible-playbook \
--verbose \
--extra-vars BORG_PASSPHRASE="$BORG_PASSPHRASE" \