summaryrefslogtreecommitdiff
path: root/experiment/scripts/auditor.sh
blob: 4eeac9a76737dea6a66fb81998c8b7d7190a2be5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/bin/bash
set -eux
source ~/scripts/helpers.sh

INFO_MSG="
Setup the auditor node
(Start taler-auditor)
"
OPT_MSG="
init:
  Configure and start the auditor together with its database
"

cd /tmp

function create_users() {
  for USER in auditor sync auditor-ingress auditor-wire; do
    adduser --quiet --home /tmp/${USER} ${USER} || true
  done
}

function init_db() {

  # Grid5000 mounts the default disk on /tmp
  # Move the DB there to get more storage
  if [ -d /var/lib/postgresql ]; then
    mv /var/lib/postgresql /tmp/
  fi

  # Get hardware info to tune in postgresql.conf
  SHARED_MEM=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) / 4 ))
  CACHE_SIZE=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) * 3/4))
  NUM_CPU=$(lscpu | grep "CPU(s)" | head -n 1 | awk '{print $2}')

  # Enable huge pages
  # Size for huge_pages =~ shared_buffers * 1.25 so that there is enough
  VM_PEAK=$((${SHARED_MEM} * 10/8))

  HUGE_PAGES_SIZE=$(grep ^Hugepagesize /proc/meminfo | awk '{print $2}')
  NUM_PAGES=$((${VM_PEAK} / ${HUGE_PAGES_SIZE}))

  if ! grep -q "vm.nr_hugepages'" /etc/sysctl.conf; then
    echo "vm.nr_hugepages=${NUM_PAGES}" >> /etc/sysctl.conf
    sysctl -p
  fi

  # disable swap
  swapoff -a

  echo "
  listen_addresses='*'

  wal_level = logical

  log_destination=syslog
  syslog_ident='taler-auditor-db'

  log_error_verbosity=terse
  # log_min_messages=PANIC
  # log_min_error_statement=PANIC
  # client_min_messages=ERROR

  # For pgbadger
  # log_checkpoints=on
  # log_connections=on
  # log_disconnections=on
  # log_lock_waits=on
  # log_temp_files=0
  # log_autovacuum_min_duration=0
  # log_error_verbosity=default
  # log_duration=on
  # log_statement=all

  # For explain.py
  # syslog_split_messages=off
  # log_statement=all
  # log_error_verbosity=default

  # Large tables perform bad with the default settings
  # However, they could also be set on each table indiviudally
  # (NOTE: on partitions!)
  # ALTER TABLE known_coins_default
  #  SET (autovacuum_vacuum_scale_factor = 0.0, autovacuum_vacuum_threshold = 1000);
  default_statistics_target=300
  autovacuum_vacuum_cost_limit=400
  autovacuum_vacuum_scale_factor=0.1
  autovacuum_vacuum_threshold=1000
  # Default 50, 0.1
  autovacuum_analyze_threshold=50
  autovacuum_analyze_scale_factor=0.1

  # use 25% of the available memory 
  # (https://www.postgresql.org/docs/13/runtime-config-resource.html)
  shared_buffers=${SHARED_MEM}kB
  effective_cache_size=${CACHE_SIZE}kB

  huge_pages=on
  
  # (https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-MAX-WAL-SIZE)
  min_wal_size=20GB
  max_wal_size=200GB
  wal_buffers=1GB

  checkpoint_completion_target=0.9
  checkpoint_timeout = 15min
  checkpoint_flush_after = 2MB
  random_page_cost=1.1
  
  # Default 2kB
  bgwriter_flush_after = 2MB

  # Default 0
  # https://www.cybertec-postgresql.com/en/the-mysterious-backend_flush_after-configuration-setting/
  backend_flush_after = 2MB

  # Too much results in CPU load
  # https://www.postgresql.org/docs/13/runtime-config-resource.html#GUC-EFFECTIVE-IO-CONCURRENCY
  effective_io_concurrency = 200

  # Bad when turned off - Recovering db may not be possible
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FSYNC
  fsync = on

  # Not so bad as when turning off fsync, but single transactions might get lost on crash - but
  # like they would have aborted cleanly
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT
  # When having replication, this one can be changed (in local only on and off are of use)
  # https://www.postgresql.org/docs/13/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES
  # on causes us to get 100% IO load
  synchronous_commit = off

  # Default off
  wal_compression = off

  wal_sync_method = fsync

  # Bad to turn off, may lead to inconcistency
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FULL-PAGE-WRITES
  # Write full pages to WAL while partial are written to disk, helpful in case of crash, then
  # partially written pages can be recovered.
  # Can be disabled if FS does not support partial written pages (such as ZFS)
  full_page_writes = on

  max_worker_processes=${NUM_CPU}
  max_parallel_workers=${NUM_CPU}
  max_parallel_workers_per_gather=10
  max_connections=500

  max_parallel_maintenance_workers=12
  
  # out of shared memory
  max_locks_per_transaction=85
  
  # (max used =~ work_mem * max_connections)
  # NOTE: This formula is not completely correct 
  work_mem=2GB
  maintenance_work_mem=4GB

  data_directory='/tmp/postgresql/${POSTGRES_VERSION}/main'

  max_logical_replication_workers=${NUM_CPU}
  max_sync_workers_per_subscription=${NUM_CPU}
  " > /etc/postgresql/${POSTGRES_VERSION}/main/auditor.conf

  echo "
  include = '/etc/postgresql/${POSTGRES_VERSION}/main/auditor.conf'
  " >> /etc/postgresql/${POSTGRES_VERSION}/main/postgresql.conf

  echo "
  host all postgres 172.16.0.0/12 trust
  " >> /etc/postgresql/${POSTGRES_VERSION}/main/pg_hba.conf

  systemctl restart postgresql

  su postgres << EOF
psql postgres -tAc "DROP DATABASE IF EXISTS \"taler-ingress\";"
psql postgres -tAc "DROP DATABASE IF EXISTS \"auditor\";"
psql postgres -tAc "SELECT 1 FROM pg_roles WHERE rolname='auditor-ingress'" | \
  grep -q 1 || \
  createuser auditor-ingress
psql postgres -tAc "SELECT 1 FROM pg_roles WHERE rolname='auditor'" | \
  grep -q 1 || \
  createuser auditor

createdb -O auditor-ingress "taler-ingress"
createdb -O auditor "auditor"
EOF
}

function setup_exchange_replication() {

  taler-config -s exchange -o DB -V "postgres"
  taler-config -s exchangedb-postgres -o CONFIG -V "postgres:///taler-ingress"

  sudo -u auditor-ingress taler-exchange-dbinit -r || true
  sudo -u auditor-ingress taler-exchange-dbinit -s || true

  if [ "${PARTITION_DB}" = "true" ]; then
    sudo -u auditor-ingress taler-exchange-dbinit -P ${NUM_PARTITIONS}
  else
    sudo -u auditor-infress taler-exchange-dbinit
  fi

  su auditor-ingress << EOF
psql -d taler-ingress -U auditor-ingress -tAc 'GRANT ALL ON ALL TABLES IN SCHEMA public TO auditor;'
EOF

  DB_USER=egress wait_for_db   

  su postgres << EOF
  psql -d taler-ingress -tAc "ALTER SUBSCRIPTION ${NODE_NAME} DISABLE;"
  psql -d taler-ingress -tAc "ALTER SUBSCRIPTION ${NODE_NAME} SET (slot_name=NONE);"
  psql -d taler-ingress -tAc "CREATE SUBSCRIPTION ${NODE_NAME} CONNECTION 'postgresql://egress:${DB_PASSWORD}@${DB_HOSTS}/${DB_NAME}' PUBLICATION exchange;" || true
EOF
}

# Start the auditor httpd and the nginx proxy
function enable_webservice() {
  create_cert "${NODE_NAME}.${DNS_ZONE}" "/etc/ssl/auditor"
  ln -sf /etc/nginx/sites-available/auditor /etc/nginx/sites-enabled/auditor
  systemctl restart taler-auditor-httpd \
                    nginx
}

function setup_config() {

  rm -rf /var/lib/taler/auditor/*

  setup_exchange_config_master_key_from_api

  PUB_KEY=$(sudo -u taler-auditor-offline taler-auditor-offline setup)

  wait_for_keys ${EXCHANGE_GW_DOMAIN}

  sed -i -e "s/<AUDITOR_PUB_KEY_HERE>/${PUB_KEY}/g" \
         -e "s|<AUDITOR_URL_HERE>|http://${NODE_NAME}.${DNS_ZONE}/|g" \
      /etc/taler/conf.d/auditor.conf

  enable_webservice

  sudo -u auditor taler-auditor-dbinit
  sudo -u auditor taler-auditor-exchange -m $(get_exchange_masterkey) -u "https://${EXCHANGE_GW_DOMAIN}/"

  ssh -o StrictHostKeyChecking=no ${PRIMARY_EXCHANGE} \
    "/bin/bash ~/scripts/exchange.sh add-auditor '${PUB_KEY}' 'http://${NODE_NAME}.${DNS_ZONE}/' '${NODE_NAME}'"

  taler-auditor-offline download > input.json
  taler-auditor-offline show < input.json
  taler-auditor-offline sign < input.json > output.json
  taler-auditor-offline upload < output.json

  setup_exchange_replication
}

function init_auditor() {
  create_users
  init_db
  setup_config
}

case $1 in 
  init)
    if [[ "${TALER_SETUP_AUDITOR_REPLICATION}" == "0" ]]; then
      echo "Not setting up auditor"
      exit 0
    fi
    if [[ "${PARTITION_DB}" == "true" ]]; then
      init_auditor
    else
      echo "INFO: sharded DB not supported yet"
    fi
    ;;
  *)
    taler_perf_help $0 "$INFO_MSG" "$OPT_MSG"
    ;;
esac

exit 0