summaryrefslogtreecommitdiff
path: root/experiment/scripts/shard.sh
blob: 4160c5df66713405a9a4cd7d304b7918737c646e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/bin/bash
INFO_MSG="
Setup the database shard nodes
"
OPT_MSG="
init:
  Initialize and start the shard database
"

set -eux
source ~/scripts/helpers.sh

cd /tmp

function setup_config() {
  
  # Enable password for taler since this is the case in real world deployments
  # For the postgres user do not enable authentication (used in metrics)
  if ! grep -q "host all ${DB_USER} 127.16.0.0/12 md5" \
    /etc/postgresql/${POSTGRES_VERSION}/main/pg_hba.conf; then
    echo "
    host all ${DB_USER} 172.16.0.0/12 md5
    host all postgres 172.16.0.0/12 trust
    " >> /etc/postgresql/${POSTGRES_VERSION}/main/pg_hba.conf
  fi

  # Get hardware info to tune in postgresql.conf
  SHARED_MEM=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) / 3 ))
  CACHE_SIZE=$(($(awk '/MemTotal/ {print $2}' /proc/meminfo) * 3/4))
  NUM_CPU=$(lscpu | grep "CPU(s)" | head -n 1 | awk '{print $2}')

  # Enable huge pages
  # Size for huge_pages =~ shared_buffers * 1.25 so that there is enough
  VM_PEAK=$((${SHARED_MEM} * 10/8))

  HUGE_PAGES_SIZE=$(grep ^Hugepagesize /proc/meminfo | awk '{print $2}')
  NUM_PAGES=$((${VM_PEAK} / ${HUGE_PAGES_SIZE}))

  if ! grep -q "vm.nr_hugepages'" /etc/sysctl.conf; then
    echo "vm.nr_hugepages=${NUM_PAGES}" >> /etc/sysctl.conf
    sysctl -p
  fi

  # Configure postgres with an additional file and include this
  # in the main configuration
  echo "
  listen_addresses='*'
  log_destination=syslog
  log_error_verbosity=terse
  syslog_ident='taler-database-shard'
  log_min_duration_statement=300
  shared_preload_libraries='pg_stat_statements,auto_explain'
  auto_explain.log_min_duration='300ms'
  auto_explain.log_analyze=true
  auto_explain.log_buffers=true
  auto_explain.log_wal=true
  
  # use 25% of the available memory 
  # (https://www.postgresql.org/docs/13/runtime-config-resource.html)
  shared_buffers=${SHARED_MEM}kB
  effective_cache_size=${CACHE_SIZE}kB

  huge_pages=on
  
  # (https://www.postgresql.org/docs/current/runtime-config-wal.html#GUC-MAX-WAL-SIZE)
  min_wal_size=4GB
  max_wal_size=8GB
  wal_buffers=16MB

  # Hopefully reduce disk IO 
  # https://www.postgresql.org/docs/12/wal-configuration.html
  checkpoint_completion_target=0.9
  random_page_cost=1.1
  
  # Default 2kB
  bgwriter_flush_after = 2MB

  # Default 0
  # https://www.cybertec-postgresql.com/en/the-mysterious-backend_flush_after-configuration-setting/
  # backend_flush_after = 0

  # Too much results in CPU load
  # https://www.postgresql.org/docs/13/runtime-config-resource.html#GUC-EFFECTIVE-IO-CONCURRENCY
  effective_io_concurrency = 100

  # Bad when turned off - Recovering db may not be possible
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FSYNC
  fsync = on

  # Not so bad as when turning off fsync, but single transactions might get lost on crash - but
  # like they would have aborted cleanly
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-SYNCHRONOUS-COMMIT
  # When having replication, this one can be changed (in local only on and off are of use)
  # https://www.postgresql.org/docs/13/runtime-config-replication.html#GUC-SYNCHRONOUS-STANDBY-NAMES
  # on causes us to get 100% IO load
  synchronous_commit = off

  # Default off
  wal_compression = off

  wal_sync_method = fsync

  # Bad to turn off, may lead to inconcistency
  # https://www.postgresql.org/docs/13/runtime-config-wal.html#GUC-FULL-PAGE-WRITES
  # Write full pages to WAL while partial are written to disk, helpful in case of crash, then
  # partially written pages can be recovered.
  # Can be disabled if FS does not support partial written pages (such as ZFS)
  full_page_writes = on

  max_worker_processes=${NUM_CPU}
  max_parallel_workers=${NUM_CPU}
  max_connections=500

  max_parallel_maintenance_workers=4
  
  # out of shared memory
  max_locks_per_transaction=85
  
  # (max used =~ work_mem * max_connections)
  # NOTE: This formula is not completely correct 
  work_mem=128MB
  maintenance_work_mem=2GB
  # 1 min
  idle_in_transaction_session_timeout=60000
  " > /etc/postgresql/${POSTGRES_VERSION}/main/exchange-shard.conf

  if ! grep -q "include = 'exchange-shard.conf'" \
	  /etc/postgresql/${POSTGRES_VERSION}/main/postgresql.conf; then
    echo "include = 'exchange-shard.conf'" >> \
          /etc/postgresql/${POSTGRES_VERSION}/main/postgresql.conf
  fi
}

# Initialize the database for taler exchange shard
function init_db() {

  sed -i "s\<DB_URL_HERE>\postgresql://${DB_USER}:${DB_PASSWORD}@localhost:${DB_PORT}/${DB_NAME}\g" \
  	/etc/taler/secrets/exchange-db.secret.conf

  systemctl restart postgresql

  # Create the remote user "$DB_USER" and load pg_stat_statements for metrics
  su postgres << EOF
psql postgres -tAc "SELECT 1 FROM pg_roles WHERE rolname='${DB_USER}'" | \
  grep -q 1 || \
  psql << END
    CREATE USER "${DB_USER}" with encrypted password '${DB_PASSWORD}';
    CREATE EXTENSION pg_stat_statements;
END
EOF

  # Create the role taler-exchange-httpd and the database
  su postgres << EOF
psql -tAc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" | \
  grep -q 1 || \
  createdb -O "${DB_USER}" "${DB_NAME}"
EOF
  
  IDX=$(echo $NODE_NAME | cut -d "-" -f 2)
  sudo -u taler-exchange-httpd taler-exchange-dbinit -R ${IDX} || true
  sudo -u taler-exchange-httpd taler-exchange-dbinit -S ${IDX}
}

case ${1} in 
  init)
    setup_config
    init_db
    restart_rsyslog
    ;;
  *)
    taler_perf_help $0 "$INFO_MSG" "$OPT_MSG"
    ;;
esac

exit 0