ansible-taler-exchange

Ansible playbook to deploy a production Taler Exchange
Log | Files | Refs | Submodules | README | LICENSE

commit d40f54f7cfe7eee2e72b2f16ea53677e8aef9d87
parent d1a1c6ec3501c2dbe6175f59cd6b4a19c18d99d6
Author: Christian Grothoff <christian@grothoff.org>
Date:   Tue, 21 Jan 2025 16:06:20 +0100

work on alloy/loki configuration

Diffstat:
Mroles/auditor/templates/etc/nginx/sites-available/auditor-nginx.conf.j2 | 1+
Mroles/challenger/templates/etc/nginx/sites-available/email-challenger-nginx.conf.j2 | 2++
Mroles/challenger/templates/etc/nginx/sites-available/postal-challenger-nginx.conf.j2 | 2++
Mroles/challenger/templates/etc/nginx/sites-available/sms-challenger-nginx.conf.j2 | 2++
Mroles/exchange/templates/etc/nginx/sites-available/exchange-nginx.conf.j2 | 2++
Mroles/monitoring/files/etc/default/alloy | 2+-
Mroles/monitoring/templates/etc/alloy/config.alloy | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mroles/webserver/files/etc/nginx/conf.d/log-format-apm.conf | 18++++++++++++++++++
8 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/roles/auditor/templates/etc/nginx/sites-available/auditor-nginx.conf.j2 b/roles/auditor/templates/etc/nginx/sites-available/auditor-nginx.conf.j2 @@ -29,6 +29,7 @@ server { error_log /var/log/nginx/auditor.{{ DOMAIN_NAME }}.err; access_log /var/log/nginx/auditor.{{ DOMAIN_NAME }}.log apm; + access_log /var/log/nginx/auditor.{{ DOMAIN_NAME }}.tal taler if=$log_perf; location / { # Most of the API we will put behind simple access control for now. if ($http_authorization != "Bearer {{ AUDITOR_ACCESS_TOKEN }}") { diff --git a/roles/challenger/templates/etc/nginx/sites-available/email-challenger-nginx.conf.j2 b/roles/challenger/templates/etc/nginx/sites-available/email-challenger-nginx.conf.j2 @@ -28,6 +28,8 @@ server { error_log /var/log/nginx/email.challenger.{{ DOMAIN_NAME }}.err; access_log /var/log/nginx/email.challenger.{{ DOMAIN_NAME }}.log apm; + access_log /var/log/nginx/email.challenger.{{ DOMAIN_NAME }}.tal taler if=$log_perf; + location / { proxy_pass http://unix:/var/run/challenger-email/challenger-http.sock; } diff --git a/roles/challenger/templates/etc/nginx/sites-available/postal-challenger-nginx.conf.j2 b/roles/challenger/templates/etc/nginx/sites-available/postal-challenger-nginx.conf.j2 @@ -26,6 +26,8 @@ server { error_log /var/log/nginx/postal.challenger.{{ DOMAIN_NAME }}.err; access_log /var/log/nginx/postal.challenger.{{ DOMAIN_NAME }}.log apm; + access_log /var/log/nginx/postal.challenger.{{ DOMAIN_NAME }}.tal taler if=$log_perf; + location / { proxy_pass http://unix:/var/run/challenger-postal/challenger-http.sock; } diff --git a/roles/challenger/templates/etc/nginx/sites-available/sms-challenger-nginx.conf.j2 b/roles/challenger/templates/etc/nginx/sites-available/sms-challenger-nginx.conf.j2 @@ -26,6 +26,8 @@ server { error_log /var/log/nginx/sms.challenger.{{ DOMAIN_NAME }}.err; access_log /var/log/nginx/sms.challenger.{{ DOMAIN_NAME }}.log apm; + access_log /var/log/nginx/sms.challenger.{{ DOMAIN_NAME }}.tal taler if=$log_perf; + location / { proxy_pass http://unix:/var/run/challenger-sms/challenger-http.sock; } diff --git a/roles/exchange/templates/etc/nginx/sites-available/exchange-nginx.conf.j2 b/roles/exchange/templates/etc/nginx/sites-available/exchange-nginx.conf.j2 @@ -28,6 +28,8 @@ server { error_log /var/log/nginx/exchange.{{ DOMAIN_NAME }}.err; access_log /var/log/nginx/exchange.{{ DOMAIN_NAME }}.log apm; + access_log /var/log/nginx/exchange.{{ DOMAIN_NAME }}.tal taler if=$log_perf; + location / { proxy_pass http://unix:/var/run/taler-exchange/httpd/exchange-http.sock; } diff --git a/roles/monitoring/files/etc/default/alloy b/roles/monitoring/files/etc/default/alloy @@ -3,7 +3,7 @@ CONFIG_FILE="/etc/alloy/config.alloy" # User-defined arguments to pass to the run command. -CUSTOM_ARGS="--server.http.listen-addr=127.0.0.1:12345" +CUSTOM_ARGS="--server.http.listen-addr=127.0.0.1:12345 --disable-reporting" # Restart on system upgrade. Defaults to true. RESTART_ON_UPGRADE=true diff --git a/roles/monitoring/templates/etc/alloy/config.alloy b/roles/monitoring/templates/etc/alloy/config.alloy @@ -83,3 +83,89 @@ prometheus.scrape "default" { // prometheus.relabel). ] } + + +loki.source.file "nginx_taler_performance_logs" { + targets = [{ + __path__ = "/var/log/nginx/*.tal", + job = "nginx/performance", + }] + forward_to = [loki.process.perf_logs.receiver] +} + + +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/ +loki.process "perf_logs" { + +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block + stage.regex { + expression = "uri=/(?P<ep>[a-zA-Z]+)(?:/\w+)?(?:/(?P<act>[a-zA-Z-]+))? s=(?P<status>\d{3}).*urt=(?P<urt>\d+\.\d+|-) rt=(?P<response_time>\d+\.\d+) rl=(?P<request_length>\d+) bs=(?P<bytes_sent>\d+)" + } + +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stagetemplate-block + stage.template { + source = "endpoint" + template = '{{ printf "%s-%s" .ep .act | trimSuffix "-" }}' + } + + stage.template { + source = upstream_response_time + template = '{{ .urt | replace "-" "0" }}' + } + + stage.metrics { +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block + metric.counter { + name = 'total_requests' + prefix = 'taler_requests_' + description = "Total Requests" + match_all = true + action = inc + } + metric.gauge { + name = 'response_time' + prefix = 'taler_requests_' + description = "Time taken for Nginx to respond" + source = 'response_time' + max_idle_duration = "24h" + action = set + } +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block +# https://www.robustperception.io/how-does-a-prometheus-histogram-work + metric.histogram { + name = "request_length_hist" + prefix = 'taler_requests_' + description = "Request Length reported from Nginx" + source = 'request_length' + max_idle_duration = "24h" + buckets = [1,10,50,100,200,500,1000,2000,5000] + } + + metric.histogram { + name = "bytes_sent_hist" + prefix = 'taler_requests_' + description = "Number of bytes sent, reported from Nginx" + source = 'bytes_sent' + max_idle_duration = "24h" + buckets = [1,10,50,100,200,500,1000,2000,5000] + } + metric.histogram { + name = "response_time_hist" + prefix = 'taler_requests_' + description = "Time taken for Nginx to respond" + source = 'response_time' + max_idle_duration = "24h" + buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5] + } + metric.histogram { + name = "upstream_response_time_hist" + prefix = 'taler_requests_' + description = "Time taken for the Exchange to respond to Nginx" + source = 'upstream_response_time' + max_idle_duration = "24h" + buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5] + } + + forward_to = [loki.write.grafana_loki.receiver] + +} +\ No newline at end of file diff --git a/roles/webserver/files/etc/nginx/conf.d/log-format-apm.conf b/roles/webserver/files/etc/nginx/conf.d/log-format-apm.conf @@ -7,3 +7,21 @@ log_format apm '”$time_local” client=$remote_addr ' 'upstream_addr=$upstream_addr ' 'upstream_status=$upstream_status ' 'request_time=$request_time'; + +# This format is expected by alloy (see monitoring role) +log_format taler 'm=$request_method uri=$uri s=$status ' + 'uct=$upstream_connect_time ' + 'urt=$upstream_response_time ' + 'rt=$request_time ' + 'rl=$request_length ' + 'bs=$bytes_sent'; + +# Determines which HTTP methods we log performance data for. +# Specifically, we exclude HEAD (don't care) and GET (often long-polling) +# and anything non-canonical for GNU Taler (like OPTIONS). +map $request_method $log_perf { + default 0; + POST 1; + PATCH 1; + DELETE 1; +}