summaryrefslogtreecommitdiff
path: root/experiment/scripts/database.sh
blob: 6cb1bbbabc9dfcb2c81933211c95efe3dcaf3808 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#!/bin/bash
INFO_MSG="
Setup the database node (start postgresql)
"
OPT_MSG="
init:
  Initialize and start the taler database
"

set -eux
source ~/scripts/helpers.sh

# move to tmp to prevent change directory errors
cd /tmp 

PG_CONF=/etc/postgresql/${POSTGRES_VERSION}/main

# Setup a disk if present to be used for WAL and move the WAL there
# Don't use in combination with `setup_ram_storage`
function setup_disks() {
  if [ -b /dev/disk1 ]; then
    # Dahu ssd
    DISK=/dev/disk1
  elif [ -b /dev/disk4 ]; then
    # Yeti nvme
    DISK=/dev/disk4
  else
    return 0
  fi
  echo 'start=2048, type=83' | sfdisk ${DISK} || true
  yes | mkfs.ext4 -m 0 ${DISK} || true
  mkdir /mnt/disk || true
  mount ${DISK} /mnt/disk || true
  if [ ! -L /tmp/postgresql/${POSTGRES_VERSION}/main/pg_wal ]; then
    rm -rf /mnt/disk/pg_wal || true
    mv /tmp/postgresql/${POSTGRES_VERSION}/main/pg_wal/ /mnt/disk
    ln -s /mnt/disk/pg_wal /tmp/postgresql/${POSTGRES_VERSION}/main/pg_wal
    chown -R postgres:postgres /tmp/postgresql/${POSTGRES_VERSION}/main/pg_wal
  fi
  
}

# Setup Postgres to use RAM instead of disks to store data
function setup_ram_storage() {
  SIZE=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) / 4))
  if ! df | grep -q /tmp/postgresql; then
    mv /tmp/postgresql /tmp/postgresql.bak
    mkdir /tmp/postgresql
    chown postgres:postgres /tmp/postgresql
    mount -t tmpfs -o size=${SIZE}k pgdata /tmp/postgresql
    cp -rp /tmp/postgresql.bak/* /tmp/postgresql
  fi
}

# Setup the postgresql configuration
function setup_config() {

  # Grid5000 mounts the default disk on /tmp
  # Move the DB there to get more storage
  if [ -d /var/lib/postgresql ]; then
    mv /var/lib/postgresql /tmp/
  fi

  sed -i -e "s\<DB_URL_HERE>\postgresql:///${DB_NAME}\g" \
         -e "s/<SHARD_USER_HERE>/${DB_USER}/g" \
	 -e "s/<SHARD_USER_PW_HERE>/${DB_PASSWORD}/g" \
  	/etc/taler/secrets/exchange-db.secret.conf

  sed -i -e "s|<SHARD_DOMAIN_HERE>|${DNS_ZONE}|g" \
         /etc/taler/conf.d/exchange-business.conf
  
  # Get hardware info to tune in postgresql.conf
  SHARED_MEM=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) / 4 ))
  CACHE_SIZE=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) * 3/4))
  NUM_CPU=$(lscpu | grep "CPU(s)" | head -n 1 | awk '{print $2}')

  # Enable huge pages
  # Size for huge_pages =~ shared_buffers * 1.25 so that there is enough
  VM_PEAK=$((${SHARED_MEM} * 10/8))

  HUGE_PAGES_SIZE=$(grep ^Hugepagesize /proc/meminfo | awk '{print $2}')
  NUM_PAGES=$((${VM_PEAK} / ${HUGE_PAGES_SIZE}))

  if ! grep -q "vm.nr_hugepages'" /etc/sysctl.conf; then
    echo "vm.nr_hugepages=${NUM_PAGES}" >> /etc/sysctl.conf
    sysctl -p
  fi

  # disable swap
  swapoff -a

  # Allow longer lines to be logged by postgres
  sed -i '1 i\$MaxMessageSize 4k' /etc/rsyslog.conf

  # Configure postgres with an additional file and include this
  # in the main configuration
  echo "
  listen_addresses='*'

  log_destination=syslog
  syslog_ident='taler-database'

  log_error_verbosity=terse
  # log_min_messages=PANIC
  # log_min_error_statement=PANIC
  # client_min_messages=ERROR

  # For pgbadger
  # log_checkpoints=on
  # log_connections=on
  # log_disconnections=on
  # log_lock_waits=on
  # log_temp_files=0
  # log_autovacuum_min_duration=0
  # log_error_verbosity=default
  # log_duration=on
  # log_statement=all

  # For explain.py
  # syslog_split_messages=off
  # log_statement=all
  # log_error_verbosity=default

  log_min_duration_statement=${DB_MIN_DURATION_STATEMENT}

  auto_explain.log_min_duration='${DB_MIN_DURATION_STATEMENT}ms'
  auto_explain.log_verbose=true
  auto_explain.log_nested_statements=true
  auto_explain.log_analyze=true
  auto_explain.log_buffers=true
  auto_explain.log_wal=true

  shared_preload_libraries='pg_stat_statements,auto_explain'

  # Should be set locally
  join_collapse_limit=1

  # Large tables perform bad with the default settings
  # However, they could also be set on each table indiviudally
  # (NOTE: on partitions!)
  # ALTER TABLE known_coins_default
  #  SET (autovacuum_vacuum_scale_factor = 0.0, autovacuum_vacuum_threshold = 1000);
  log_autovacuum_min_duration=0
  default_statistics_target=300
  autovacuum_vacuum_cost_limit=400
  autovacuum_vacuum_scale_factor=0.1
  autovacuum_vacuum_threshold=1000
  # Default 50, 0.1
  autovacuum_analyze_threshold=50
  autovacuum_analyze_scale_factor=0.1

  # use 25% of the available memory 
  # (https://www.postgresql.org/docs/13/runtime-config-resource.html)
  shared_buffers=${SHARED_MEM}kB
  effective_cache_size=${CACHE_SIZE}kB

  huge_pages=on
  
  # (https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-MAX-WAL-SIZE)
  min_wal_size=20GB
  max_wal_size=200GB
  wal_buffers=1GB

  checkpoint_completion_target=0.9
  checkpoint_timeout = 15min
  checkpoint_flush_after = 2MB
  random_page_cost=1.1
  
  # Default 2kB
  bgwriter_flush_after = 2MB

  # Default 0
  # https://www.cybertec-postgresql.com/en/the-mysterious-backend_flush_after-configuration-setting/
  backend_flush_after = 2MB

  # Too much results in CPU load
  # https://www.postgresql.org/docs/13/runtime-config-resource.html#GUC-EFFECTIVE-IO-CONCURRENCY
  effective_io_concurrency = 200

  # Bad when turned off - Recovering db may not be possible
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FSYNC
  fsync = on

  # Not so bad as when turning off fsync, but single transactions might get lost on crash - but
  # like they would have aborted cleanly
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT
  # When having replication, this one can be changed (in local only on and off are of use)
  # https://www.postgresql.org/docs/13/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES
  # on causes us to get 100% IO load
  synchronous_commit = off

  # Default off
  wal_compression = off

  wal_sync_method = fsync

  # Bad to turn off, may lead to inconcistency
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FULL-PAGE-WRITES
  # Write full pages to WAL while partial are written to disk, helpful in case of crash, then
  # partially written pages can be recovered.
  # Can be disabled if FS does not support partial written pages (such as ZFS)
  full_page_writes = on

  max_worker_processes=${NUM_CPU}
  max_parallel_workers=${NUM_CPU}
  max_parallel_workers_per_gather=10
  max_connections=500

  max_parallel_maintenance_workers=12
  
  # out of shared memory
  max_locks_per_transaction=200
  
  # (max used =~ work_mem * max_connections)
  # NOTE: This formula is not completely correct 
  work_mem=2GB
  maintenance_work_mem=4GB
  # 1 min
  idle_in_transaction_session_timeout=60000

  # Disable sequential scans
  # enable_seqscan=off

  enable_partitionwise_join=on
  enable_partitionwise_aggregate=on

  # set our database location to /tmp
  data_directory='/tmp/postgresql/${POSTGRES_VERSION}/main'
  " > ${PG_CONF}/exchange.conf

  # Must be done here otherwise pg does not start a second time
  if [[ ${NUM_AUDITORS} -ge 1 ]] && \
     [[ ${TALER_SETUP_AUDITOR_REPLICATION} == 1 ]];
  then
    echo "wal_level=logical" >> ${PG_CONF}/exchange.conf
  fi

  cp ${PG_CONF}/exchange.conf ${LOG_DIR}/postgresql.conf

  if ! grep -q "include = 'exchange.conf'" \
	  ${PG_CONF}/postgresql.conf; then
    echo "include = 'exchange.conf'" >> \
          ${PG_CONF}/postgresql.conf
  fi
}

# Configure and start pgBouncer if $USE_PGBOUNCER is true
function setup_pgbouncer() {
  if [ "${USE_PGBOUNCER}" = "true" ]; then
    sed -i -e "s/<DB_USER_HERE>/${DB_USER}/g" \
           -e "s/<DB_PASSWORD_HERE>/${DB_PASSWORD}/g" \
           /etc/pgbouncer/userlist.txt
    sed -i -e "s/<DB_NAME_HERE>/${DB_NAME}/g" \
           /etc/pgbouncer/pgbouncer.ini
    # pgbouncer does not cleanup those sometimes
    rm -f /var/run/postgresql/pgbouncer.pid
    rm -f /var/run/postgresql/.s.PGSQL.6432
    systemctl restart pgbouncer
  fi
}

# Enable replication to the auditor - is only done when an auditor node is present
function setup_replication() {
  if [[ "${NUM_AUDITORS}" -ge 1 ]] && \
     [[ ${TALER_SETUP_AUDITOR_REPLICATION} == 1 ]];
  then
    su postgres << EOF
      createuser --replication egress
      psql -tAc "ALTER ROLE egress WITH PASSWORD '${DB_PASSWORD}';" || true
      psql -d ${DB_NAME} -tAc "GRANT SELECT ON ALL TABLES IN SCHEMA public TO egress;"
      psql -tAc "DROP PUBLICATION auditor;"
      psql -d ${DB_NAME} -tAc "CREATE PUBLICATION exchange FOR ALL TABLES;"
EOF
    grep -q "host all egress 127.16.0.0/12 md5" \
      ${PG_CONF}/pg_hba.conf \
      || echo "host all egress 172.16.0.0/12 md5" \
         >> ${PG_CONF}/pg_hba.conf
    systemctl restart postgresql
  fi
}

# Allow DB_USER from remote (Exchange will continue initialization only when 
# this one was run)
function enable_remote_access() {
  # Enable password for taler since this is the case in real world deployments
  # For the postgres user do not enable authentication (used in metrics)
  if ! grep -q "host all ${DB_USER} 127.16.0.0/12 md5" \
    ${PG_CONF}/pg_hba.conf; then
    echo "
    host all ${DB_USER} 172.16.0.0/12 md5
    host all postgres 172.16.0.0/12 trust
    " >> ${PG_CONF}/pg_hba.conf
  fi
}

# Create user mappings for DB_USER for each shard
function configure_shard_access() {

  for i in $(seq $NUM_SHARDS); do
    su postgres << EOF
psql -d "${DB_NAME}" -tAc "CREATE USER MAPPING IF NOT EXISTS FOR \"${DB_USER}\"
                           SERVER \"${i}\"
                           OPTIONS (user '${DB_USER}', password '${DB_PASSWORD}');"
EOF
    if [[ ${POSTGRES_VERSION} == 14 ]]; then
      psql -d "${DB_NAME}" -tAc "ALTER SERVER ${i} OPTIONS (async_capable 'true');"
    fi
  done

}

# Initialize the database for taler exchange
function init_db() {
  systemctl restart postgresql

  # Create the role taler-exchange-httpd and the database
  su postgres << EOF
psql postgres -tAc "DROP DATABASE IF EXISTS \"${DB_NAME}\";"
psql postgres -tAc "SELECT 1 FROM pg_roles WHERE rolname='taler-exchange-httpd'" | \
  grep -q 1 || \
  createuser taler-exchange-httpd
psql -tAc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" | \
  grep -q 1 || \

  createdb -O taler-exchange-httpd "${DB_NAME}"
psql -tAc "CREATE EXTENSION IF NOT EXISTS pg_stat_statements"
EOF
 
  # Create the remote user "$DB_USER" and load pg_stat_statements for metrics
  su postgres << EOF
psql postgres -tAc "SELECT 1 FROM pg_roles WHERE rolname='${DB_USER}'" | \
  grep -q 1 || \
  psql << END
    CREATE USER "${DB_USER}" with encrypted password '${DB_PASSWORD}';
END
EOF

  sudo -u taler-exchange-httpd taler-exchange-dbinit -r || true
  sudo -u taler-exchange-httpd taler-exchange-dbinit -s || true

  if [ "${SHARD_DB}" = "true" ]; then
    sudo -u postgres taler-exchange-dbinit -F -P ${NUM_SHARDS} -f
    configure_shard_access
  elif [ "${PARTITION_DB}" = "true" ]; then
    sudo -u taler-exchange-httpd taler-exchange-dbinit -P ${NUM_PARTITIONS} -f
  else
    sudo -u taler-exchange-httpd taler-exchange-dbinit
  fi

  # Grant access to the databse to the remote user
  su taler-exchange-httpd -s /bin/bash << EOF
psql -d "${DB_NAME}"
GRANT SELECT,INSERT,UPDATE,DELETE ON ALL TABLES IN SCHEMA public TO "${DB_USER}";
GRANT USAGE ON ALL SEQUENCES IN SCHEMA public TO "${DB_USER}";
EOF

#  for i in $(seq $NUM_PARTITIONS); do
#    su postgres << EOF
#psql -d ${DB_NAME} -tAc "ALTER TABLE known_coins_${i} SET (fillfactor=90)";
#EOF
#  done

  enable_remote_access
  systemctl restart postgresql
}

case ${1} in 
  init)
    setup_config
    if [[ "${POSTGRES_IN_MEMORY}" == "true" ]]; then
      setup_ram_storage
    else
      setup_disks
    fi
    init_db
    setup_replication
    setup_pgbouncer
    restart_rsyslog
    ;;
  *)
    taler_perf_help $0 "$INFO_MSG" "$OPT_MSG"
    ;;
esac

exit 0