ansible-taler-exchange

Ansible playbook to deploy a production Taler Exchange
Log | Files | Refs | Submodules | README | LICENSE

commit 70dadf7dfcef755fe8aac6245f956f3511e862bf
parent dbc54484de6a2556252f216ca45c1ee3ac9125a0
Author: Christian Grothoff <christian@grothoff.org>
Date:   Tue, 28 Jan 2025 17:47:00 +0100

include alloy secret token configuration

Diffstat:
Mplaybooks/test-secrets.yml | 2++
Droles/monitoring/files/etc/alloy/config.alloy | 172-------------------------------------------------------------------------------
Mroles/monitoring/tasks/main.yml | 4++--
Aroles/monitoring/templates/etc/alloy/config.alloy | 171+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mroles/monitoring/templates/etc/nginx/sites-available/monitoring-nginx.conf.j2 | 2+-
5 files changed, 176 insertions(+), 175 deletions(-)

diff --git a/playbooks/test-secrets.yml b/playbooks/test-secrets.yml @@ -24,4 +24,6 @@ AUDITOR_ACCESS_TOKEN: secret-token:FIXME # Bearer access token for monitoring PROMETHEUS_ACCESS_TOKEN: secret-token:FIXME + +# Bearer access token for loki.taler-systems.com LOKI_ACCESS_TOKEN: secret-token:FIXME diff --git a/roles/monitoring/files/etc/alloy/config.alloy b/roles/monitoring/files/etc/alloy/config.alloy @@ -1,171 +0,0 @@ -// Sample config for Alloy. -// -// For a full configuration reference, see https://grafana.com/docs/alloy -logging { - level = "warn" -} - -// Which log files to monitor -local.file_match "local_files" { - path_targets = [ - {"__path__" = "/var/log/*.log"}, - {"__path__" = "/var/log/nginx/*.err"}, - ] - sync_period = "5s" -} - -// Which log files to monitor -local.file_match "http_logs" { - path_targets = [ - {"__path__" = "/var/log/nginx/*.log"}, - ] - sync_period = "5s" -} - -// Connect local_files as source to filter_logs -// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/ -loki.source.file "log_scrape" { - targets = local.file_match.local_files.targets - forward_to = [loki.process.filter_logs.receiver] - tail_from_end = true -} - -loki.source.file "web_scrape" { - targets = local.file_match.http_logs.targets - forward_to = [loki.process.filter_logs.receiver] - tail_from_end = true -} - -// Filter the logs -// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/ -loki.process "filter_logs" { - stage.drop { - source = "http_logs" - expression = ".*GET.* 200 .*" - drop_counter_reason = "successful HTTP GETs" - } - forward_to = [loki.write.grafana_loki.receiver] -} - -// Push the logs to loki -// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/ -loki.write "grafana_loki" { - endpoint { - // FIXME: loki not yet running on deltoid (and not even in DNS!) - url = "https://loki.taler.net/loki/api/v1/push" - - // basic_auth { - // username = "admin" - // password = "admin" - // } - } -} - -// This was in the defaults, FIXME: not sure what it does... -prometheus.exporter.unix "default" { - include_exporter_metrics = true - disable_collectors = ["mdadm"] -} - -// This was in the defaults, FIXME: not sure what it does... -prometheus.scrape "default" { - targets = array.concat( - prometheus.exporter.unix.default.targets, - [{ - // Self-collect metrics - job = "alloy", - __address__ = "127.0.0.1:12345", - }], - ) - - forward_to = [ - // TODO: components to forward metrics to (like prometheus.remote_write or - // prometheus.relabel). - ] -} - - -loki.source.file "nginx_taler_performance_logs" { - targets = [{ - __path__ = "/var/log/nginx/*.tal", - job = "nginx/performance", - }] - forward_to = [loki.process.perf_logs.receiver] -} - - -# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/ -loki.process "perf_logs" { - -# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block - stage.regex { - expression = "uri=/(?P<ep>[a-zA-Z]+)(?:/\w+)?(?:/(?P<act>[a-zA-Z-]+))? s=(?P<status>\d{3}).*urt=(?P<urt>\d+\.\d+|-) rt=(?P<response_time>\d+\.\d+) rl=(?P<request_length>\d+) bs=(?P<bytes_sent>\d+)" - } - -# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stagetemplate-block - stage.template { - source = "endpoint" - template = '{{ printf "%s-%s" .ep .act | trimSuffix "-" }}' - } - - stage.template { - source = upstream_response_time - template = '{{ .urt | replace "-" "0" }}' - } - - stage.metrics { -# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block - metric.counter { - name = 'total_requests' - prefix = 'taler_requests_' - description = "Total Requests" - match_all = true - action = inc - } - metric.gauge { - name = 'response_time' - prefix = 'taler_requests_' - description = "Time taken for Nginx to respond" - source = 'response_time' - max_idle_duration = "24h" - action = set - } -# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block -# https://www.robustperception.io/how-does-a-prometheus-histogram-work - metric.histogram { - name = "request_length_hist" - prefix = 'taler_requests_' - description = "Request Length reported from Nginx" - source = 'request_length' - max_idle_duration = "24h" - buckets = [1,10,50,100,200,500,1000,2000,5000] - } - - metric.histogram { - name = "bytes_sent_hist" - prefix = 'taler_requests_' - description = "Number of bytes sent, reported from Nginx" - source = 'bytes_sent' - max_idle_duration = "24h" - buckets = [1,10,50,100,200,500,1000,2000,5000] - } - metric.histogram { - name = "response_time_hist" - prefix = 'taler_requests_' - description = "Time taken for Nginx to respond" - source = 'response_time' - max_idle_duration = "24h" - buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5] - } - metric.histogram { - name = "upstream_response_time_hist" - prefix = 'taler_requests_' - description = "Time taken for the Exchange to respond to Nginx" - source = 'upstream_response_time' - max_idle_duration = "24h" - buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5] - } - - forward_to = [loki.write.grafana_loki.receiver] - -} -\ No newline at end of file diff --git a/roles/monitoring/tasks/main.yml b/roles/monitoring/tasks/main.yml @@ -181,8 +181,8 @@ mode: 0644 - name: Configure alloy log export - copy: - src: etc/alloy/config.alloy + template: + src: templates/etc/alloy/config.alloy dest: /etc/alloy/config.alloy owner: root group: root diff --git a/roles/monitoring/templates/etc/alloy/config.alloy b/roles/monitoring/templates/etc/alloy/config.alloy @@ -0,0 +1,170 @@ +// Sample config for Alloy. +// +// For a full configuration reference, see https://grafana.com/docs/alloy +logging { + level = "warn" +} + +// Which log files to monitor +local.file_match "local_files" { + path_targets = [ + {"__path__" = "/var/log/*.log"}, + {"__path__" = "/var/log/nginx/*.err"}, + ] + sync_period = "5s" +} + +// Which log files to monitor +local.file_match "http_logs" { + path_targets = [ + {"__path__" = "/var/log/nginx/*.log"}, + ] + sync_period = "5s" +} + +// Connect local_files as source to filter_logs +// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/ +loki.source.file "log_scrape" { + targets = local.file_match.local_files.targets + forward_to = [loki.process.filter_logs.receiver] + tail_from_end = true +} + +loki.source.file "web_scrape" { + targets = local.file_match.http_logs.targets + forward_to = [loki.process.filter_logs.receiver] + tail_from_end = true +} + +// Filter the logs +// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/ +loki.process "filter_logs" { + stage.drop { + source = "http_logs" + expression = ".*GET.* 200 .*" + drop_counter_reason = "successful HTTP GETs" + } + forward_to = [loki.write.grafana_loki.receiver] +} + +// Push the logs to loki +// See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/ +loki.write "grafana_loki" { + endpoint { + url = "https://loki.taler-systems.com/loki/api/v1/push" + authorization { + type = "Bearer" + credentials = "{{ LOKI_ACCESS_TOKEN }}" + } + } +} + +// This was in the defaults, FIXME: not sure what it does... +prometheus.exporter.unix "default" { + include_exporter_metrics = true + disable_collectors = ["mdadm"] +} + +// This was in the defaults, FIXME: not sure what it does... +prometheus.scrape "default" { + targets = array.concat( + prometheus.exporter.unix.default.targets, + [{ + // Self-collect metrics + job = "alloy", + __address__ = "127.0.0.1:12345", + }], + ) + + forward_to = [ + // TODO: components to forward metrics to (like prometheus.remote_write or + // prometheus.relabel). + ] +} + + +loki.source.file "nginx_taler_performance_logs" { + targets = [{ + __path__ = "/var/log/nginx/*.tal", + job = "nginx/performance", + }] + forward_to = [loki.process.perf_logs.receiver] +} + + +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/ +loki.process "perf_logs" { + +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block + stage.regex { + expression = "uri=/(?P<ep>[a-zA-Z]+)(?:/\w+)?(?:/(?P<act>[a-zA-Z-]+))? s=(?P<status>\d{3}).*urt=(?P<urt>\d+\.\d+|-) rt=(?P<response_time>\d+\.\d+) rl=(?P<request_length>\d+) bs=(?P<bytes_sent>\d+)" + } + +# +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stagetemplate-block + stage.template { + source = "endpoint" + template = '{{ '{{' }} printf "%s-%s" .ep .act | trimSuffix "-" {{ '}}' }}' + } + + stage.template { + source = upstream_response_time + template = '{{ '{{' }} .urt | replace "-" "0" {{ '}}' }}' + } + + stage.metrics { +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block + metric.counter { + name = 'total_requests' + prefix = 'taler_requests_' + description = "Total Requests" + match_all = true + action = inc + } + metric.gauge { + name = 'response_time' + prefix = 'taler_requests_' + description = "Time taken for Nginx to respond" + source = 'response_time' + max_idle_duration = "24h" + action = set + } +# https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block +# https://www.robustperception.io/how-does-a-prometheus-histogram-work + metric.histogram { + name = "request_length_hist" + prefix = 'taler_requests_' + description = "Request Length reported from Nginx" + source = 'request_length' + max_idle_duration = "24h" + buckets = [1,10,50,100,200,500,1000,2000,5000] + } + + metric.histogram { + name = "bytes_sent_hist" + prefix = 'taler_requests_' + description = "Number of bytes sent, reported from Nginx" + source = 'bytes_sent' + max_idle_duration = "24h" + buckets = [1,10,50,100,200,500,1000,2000,5000] + } + metric.histogram { + name = "response_time_hist" + prefix = 'taler_requests_' + description = "Time taken for Nginx to respond" + source = 'response_time' + max_idle_duration = "24h" + buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5] + } + metric.histogram { + name = "upstream_response_time_hist" + prefix = 'taler_requests_' + description = "Time taken for the Exchange to respond to Nginx" + source = 'upstream_response_time' + max_idle_duration = "24h" + buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5] + } + + forward_to = [loki.write.grafana_loki.receiver] + +} +\ No newline at end of file diff --git a/roles/monitoring/templates/etc/nginx/sites-available/monitoring-nginx.conf.j2 b/roles/monitoring/templates/etc/nginx/sites-available/monitoring-nginx.conf.j2 @@ -68,7 +68,7 @@ server { # See /etc/default/alloy for the export location /alloy/ { # Put API behind simple access control. - if ($http_authorization != "Bearer {{ LOKI_ACCESS_TOKEN }}") { + if ($http_authorization != "Bearer {{ PROMETHEUS_ACCESS_TOKEN }}") { return 401; } rewrite ^/alloy/(.*)$ /$1 break;