summaryrefslogtreecommitdiff
path: root/experiment/scripts/monitor.sh
blob: 52d0e971834315f4ef1f32341cff6c7888b9fbd1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/bin/bash
INFO_MSG="
Setup and start the experiment monitoring infrastructure such as
Loki, Promtail, Prometheus and various Prometheus exporters
"
OPT_MSG="
init
  Initialize and start all services

start:
  Automatically update wallet / exchange nodes which are monitored
  (does add but not remove)

stop-exchanges NUM:
  Stop NUM exchanges from being monitored 
"

set -ex
source ~/scripts/helpers.sh

# Update a data source on the external grafana instance
# $1: Datasource name (configured in .env)
# $2: Port where the datasource is listenig
#     (http/http8080 or https/https8443)
# See: https://www.grid5000.fr/w/HTTP/HTTPs_access
function update_datasource() {
  # Get the id of the datasource to update
  ID=$(jq --arg name "$1" '.[] | select(.name == $name) | .id' ds.json)

  # We require e.g. dahu-2.grenoble.<PORT> as domain
  # Extract dahu-2.grenoble from 'hostname'
  HOST=$(hostname | cut -d "." -f 1,2 -)
  
  # Replace the datasources url with our proxy domain
  jq --arg url "https://${HOST}.${2}.proxy.grid5000.fr" \
     --arg name "$1" \
     '.[] | select(.name == $name) | .url = $url' \
     ds.json | \
  curl -X PUT -k -f -d @- \
       -H "${AUTH_HEADER}" \
       -H "Content-Type: application/json" \
       -H "Accept: application/json" \
  "${GRAFANA_API}/datasources/${ID}"
}

# Update the external grafana instance and tell it
# about the nodes which should be queried
# If GRAFANA_HOST or GRAFANA_API_KEY are empty this
# step is skipped - requires admin level api key to update data sources
function update_grafana() {
  if [[ -z ${GRAFANA_HOST} || -z ${GRAFANA_API_KEY} ]]; then
    return 0
  fi
  AUTH_HEADER="Authorization: Bearer ${GRAFANA_API_KEY}"
  GRAFANA_API="${GRAFANA_HOST}/api"
  
  # Retrieve the initially configured datasources and safe them to a file
  # to be used later in update_datasource
  if ! curl -k -f -H "${AUTH_HEADER}" \
    "${GRAFANA_API}/datasources" \
    -o ds.json ;
  then 
    echo "Failed to retrieve datasources from Grafana"
    exit $?
  fi
  
  update_datasource "${PROMETHEUS_DATASOURCE_NAME}" "${PROMETHEUS_G5K_PROXY_PORT}"
  update_datasource "${LOKI_DATASOURCE_NAME}" "${LOKI_G5K_PROXY_PORT}"
}

# Configure all exporters which run on this host
function configure_prometheus_and_exporters() {
  
  sed -i -e "s\<EXCHANGE_DB_URL_HERE>\postgresql://postgres@db.${DNS_ZONE}:5432?sslmode=disable\g" \
             /etc/default/prometheus-postgres-exporter
  
  CFG=""
  for MERCH in $(get_hosts "merchant"); do
    CFG="${CFG},postgresql://postgres@${MERCH}.${DNS_ZONE}:5432"
    sed -i "s/<MERCHANT_HOST_HERE>/${MERCH}.${DNS_ZONE}/g" \
           /etc/monitor/prometheus.yaml
  done
  sed -i -e "s|<MERCHANT_DB_URL_HERE>|${CFG}|g" \
            /etc/default/prometheus-postgres-exporter

  CFG=""
  for SHARD in $(get_hosts "shard"); do
    CFG="${CFG},postgresql://postgres@${SHARD}.${DNS_ZONE}:5432"
  done
  sed -i "s|<SHARD_DB_URLS_HERE>|${CFG}|g" \
          /etc/default/prometheus-postgres-exporter
  
  CFG=""
  if [[ ${NUM_AUDITORS} -ge 1 ]] && \
     [[ ${TALER_SETUP_AUDITOR_REPLICATION} == 1 ]];
  then
    CFG="${CFG},postgresql://postgres@auditor.${DNS_ZONE}:5432"
  fi
  sed -i "s|<AUDITOR_DB_URL_HERE>|${CFG}|g" \
          /etc/default/prometheus-postgres-exporter
}

# Create the directory which will be used to store recovery data
# for the experiment (prometheus + loki)
function setup_data_dir() {

  EXPERIMENT_DATA_DIR="/home/${G5K_USER}/exp-data"
  if [ ! -d /home/${G5K_USER} ]; then 
    EXPERIMENT_DATA_DIR=/tmp/exp-data 
  fi

  if [[ "${REMOVE_PREVIOUS_EXPERIMENT_DATA}" == "true" ]]; then
    rm -rf ${EXPERIMENT_DATA_DIR} || true
  fi

  mkdir ${EXPERIMENT_DATA_DIR} || true

  echo "EXPERIMENT_DATA_DIR=${EXPERIMENT_DATA_DIR}" >> ~/.env
  echo "EXPERIMENT_DATA_DIR=${EXPERIMENT_DATA_DIR}" >> /etc/environment

  sed -i "s|<EXPERIMENT_DATA_PATH_HERE>|${EXPERIMENT_DATA_DIR}|g" \
         /etc/monitor/loki.yaml
}

# Add wallet nodes to be monitored
# Requires no argument since wallet nodes are retrieved from the DNS
function add_wallet_nodes_to_prometheus() {
  for WALLET in $(get_hosts "wallet"); do
    if ! grep -q "${WALLET}.${DNS_ZONE}:9100" /etc/monitor/prometheus.yaml; 
    then
      sed -i "/<WALLET_NODES_HERE>/a \ \ \ \ \ \ - '${WALLET}.${DNS_ZONE}:9100'" \
              /etc/monitor/prometheus.yaml
    fi
  done
}

# Add shard servers to be monitored
# Requires no argument, since servers are retrieved from dns
function add_proxy_nodes_to_prometheus() {
  for HOST in $(get_hosts "eproxy"); do
    if ! grep -q "${HOST}.${DNS_ZONE}:9100" /etc/monitor/prometheus.yaml;
    then
      sed -i "/<PROXY_NODES_HERE>/a \ \ \ \ \ \ - '${HOST}.${DNS_ZONE}:9100'" \
              /etc/monitor/prometheus.yaml
    fi
  done
}

# Add shard servers to be monitored
# Requires no argument, since servers are retrieved from dns
function add_shard_nodes_to_prometheus() {
  for SHARD in $(get_hosts "shard"); do
    if ! grep -q "${SHARD}" /etc/monitor/prometheus.yaml;
    then
      sed -i "/<SHARD_NODES_HERE>/a \ \ \ \ \ \ - '${SHARD}.${DNS_ZONE}:9100'" \
              /etc/monitor/prometheus.yaml
    fi
  done
}

# Add shard servers to be monitored
# Requires no argument, since servers are retrieved from dns
function add_merchant_nodes_to_prometheus() {
  for SHARD in $(get_hosts "merchant"); do
    if ! grep -q "${MERCH}" /etc/monitor/prometheus.yaml;
    then
      sed -i "/<MERCHANT_NODES_HERE>/a \ \ \ \ \ \ - '${MERCH}.${DNS_ZONE}:9100'" \
              /etc/monitor/prometheus.yaml
    fi
  done
}

# Add exchange servers to be monitored
# Requires no argument, since servers are retrieved from dns
function add_exchange_nodes_to_prometheus() {
  for EXCH in $(get_hosts "exchange-"); do
    if ! grep -q "${EXCH}" /etc/monitor/prometheus.yaml;
    then
      sed -i "/<EXCHANGE_NODES_HERE>/a \ \ \ \ \ \ - '${EXCH}.${DNS_ZONE}:9100'" \
              /etc/monitor/prometheus.yaml
    fi
  done
}

# Add new exchanges to be monitored
# $1: N - already running exchange processes
# $2: N - processes to add
function add_exchanges_to_prometheus() {

  let "START=$1+10000"
  let "END=$START+$2-1"

  HOSTS=$(get_hosts "exchange-")

  for PORT in $(seq $START $END); do
    for HOST in $HOSTS; do 
      if ! grep -q "${HOST}.${DNS_ZONE}:${PORT}" /etc/monitor/prometheus.yaml;
      then
        sed -i "/<EXCHANGES_HERE>/a  \ \ \ \ \ \ - '${HOST}.${DNS_ZONE}:${PORT}'" \
                /etc/monitor/prometheus.yaml
      fi
    done
  done
}

# Remove N exchanges which have been stopped
# from the config so they will not be scraped anymore
# $1: N - number of exchanges currently running
# $2: N - number of exchanges to remove
function remove_exchanges_from_prometheus() {

  # We know we started from port 10000
  # get the highest port numbers and start removing from there
  let "START=$1+10000"
  let "END=$START-$2"

  HOSTS=$(get_hosts "exchange-")

  for PORT in $(seq $END $START); do
    for HOST in $HOSTS; do
      sed -i "/${HOST}.${DNS_ZONE}:${PORT}/d" \
              /etc/monitor/prometheus.yaml
    done
  done
}

# Initialize all applications needed for the 
# observation of the experiments
function init_monitor() {

  update_grafana

  setup_data_dir
  
  systemctl restart loki \
                    promtail
  
  
  wait_for_keys "${EXCHANGE_GW_DOMAIN}"
  
  if [[ "${ENABLE_EXPORTERS}" == "true" ]]; then
    if ! grep -q "job_name: 'nodes'" /etc/monitor/prometheus.yaml; then 
      cat /etc/monitor/node-exporters.yaml.tpl | \
           envsubst >> /etc/monitor/prometheus.yaml
      add_wallet_nodes_to_prometheus
      add_shard_nodes_to_prometheus
      add_merchant_nodes_to_prometheus
      add_proxy_nodes_to_prometheus
      add_exchange_nodes_to_prometheus
    fi
  fi

  # Add nginx exporters
  for HOST in $(get_hosts "eproxy"); do
    if ! grep -q "${HOST}.${DNS_ZONE}:9113" /etc/monitor/prometheus.yaml;
    then
      sed -i "/<NGINX_EXPORTERS_HERE>/a \ \ \ \ \ \ - '${HOST}.${DNS_ZONE}:9113'" \
              /etc/monitor/prometheus.yaml
    fi
  done

  add_exchanges_to_prometheus "0" ${NUM_EXCHANGE_PROCESSES}

  configure_prometheus_and_exporters

  systemctl restart prometheus-postgres-exporter \
                    prometheus \
		    taler-prometheus-backup.timer

  exit 0
}
  
case $1 in
  init)
    init_monitor
    ;;
  start)
    add_wallet_nodes_to_prometheus
    add_exchanges_to_prometheus $2 $3
    ;;
  stop-exchanges)
    remove_exchanges_from_prometheus $2 $3
    ;;
  *)
    taler_perf_help $0 "$INFO_MSG" "$OPT_MSG"
    ;;
esac

systemctl reload prometheus
exit 0