ansible-taler-exchange

Ansible playbook to deploy a production Taler Exchange
Log | Files | Refs | Submodules | README | LICENSE

config.alloy (10771B)


      1 // For a full configuration reference, see https://grafana.com/docs/alloy
      2 logging {
      3   level = "warn"
      4 }
      5 
      6 // Push the logs to loki
      7 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
      8 loki.write "grafana_loki" {
      9     endpoint {
     10         url = "https://loki.taler-systems.com/loki/api/v1/push"
     11         tenant_id = "{{ TARGET_HOST_NAME }}"
     12         authorization {
     13           type = "Bearer"
     14           credentials = "{{ LOKI_ACCESS_TOKEN }}"
     15         }
     16     }
     17 }
     18 
     19 
     20 // Which log files to monitor: all regular log files with errors
     21 local.file_match "local_files" {
     22    path_targets = [
     23         {
     24                 "__path__" = "/var/log/*.log",
     25                 "job"      = "system logs",
     26                 "hostname" = "{{ TARGET_HOST_NAME }}",
     27         },
     28    ]
     29    sync_period = "5s"
     30 }
     31 
     32 
     33 // Connect local_files as source to filter_generic_logs
     34 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
     35 loki.source.file "log_scrape" {
     36     targets    = local.file_match.local_files.targets
     37     forward_to = [loki.process.filter_generic_logs.receiver]
     38     tail_from_end = true
     39 }
     40 
     41 // Which log files to monitor: all postgres logs
     42 local.file_match "postgres_log_files" {
     43    path_targets = [
     44         {
     45                 "__path__" = "/var/log/postgresql/*.log",
     46                 "job"      = "postgres logs",
     47                 "hostname" = "{{ TARGET_HOST_NAME }}",
     48         },
     49    ]
     50    sync_period = "5s"
     51 }
     52 
     53 
     54 // Connect postgres_log_files as source to filter_generic_logs
     55 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
     56 loki.source.file "postgres_log_scrape" {
     57     targets    = local.file_match.postgres_log_files.targets
     58     forward_to = [loki.process.filter_generic_logs.receiver]
     59     tail_from_end = true
     60 }
     61 
     62 // Which log files to monitor: all regular log files with errors
     63 local.file_match "nginx_errors" {
     64    path_targets = [
     65         {
     66                 "__path__" = "/var/log/nginx/*.err",
     67                 "job"      = "nginx errors",
     68                 "hostname" = "{{ TARGET_HOST_NAME }}",
     69         },
     70    ]
     71    sync_period = "5s"
     72 }
     73 
     74 // Connect nginx_errors directly to loki
     75 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
     76 loki.source.file "nginx_error_scrape" {
     77     targets    = local.file_match.nginx_errors.targets
     78     forward_to = [loki.write.grafana_loki.receiver]
     79     tail_from_end = true
     80 }
     81 
     82 
     83 // Which log files to monitor: nginx regular logs
     84 local.file_match "http_logs" {
     85    path_targets = [
     86         {
     87                 "__path__" = "/var/log/nginx/*.log",
     88                 "job"      = "nginx logs",
     89                 "hostname" = "{{ TARGET_HOST_NAME }}",
     90         },
     91    ]
     92    sync_period = "5s"
     93 }
     94 
     95 // Connect http_files as source to filter_http
     96 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
     97 loki.source.file "http_scrape" {
     98     targets    = local.file_match.http_logs.targets
     99     forward_to = [loki.process.filter_http.receiver]
    100     tail_from_end = true
    101 }
    102 
    103 // Filter the HTTP logs
    104 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
    105 loki.process "filter_http" {
    106 
    107      // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block
    108     stage.regex {
    109       expression  = "(?P<ip>\\S+) (?P<identd>\\S+) (?P<user>\\S+) \\[(?P<timestamp>[\\w:\\/]+\\s[+\\\\-]\\d{4})\\] \"(?P<action>\\S+)\\s?(?P<path>\\S+)\\s?(?P<protocol>\\S+)?\" (?P<status>\\d{3}|-) (?P<size>\\d+|-)\\s?\"?(?P<referrer>[^\\\"]*)\"?\\s?\"?(?P<useragent>[^\\\"]*)?\"?"
    110     }
    111 
    112     // exported via http://localhost:12345/metrics to Prometheus
    113     stage.metrics {
    114       metric.histogram {
    115         name = "http_status_codes"
    116         prefix = "taler_requests_"
    117         description = "HTTP status codes, reported from Nginx (all requests)"
    118         source = "status"
    119         max_idle_duration = "24h"
    120         buckets = [100,199,200,201,202,203,299,300,399,400,401,402,403,404,405,406,407,408,409,410,411,418,419,420,450,451,452,499,500,599]
    121       }
    122 
    123       // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
    124       metric.counter {
    125         name = "total_requests"
    126         prefix = "taler_requests_"
    127         description = "Total Requests"
    128         match_all = true
    129         action = "inc"
    130       }
    131     }
    132 
    133     stage.drop {
    134         source = "http_logs"
    135         expression  = ".*GET.* 200 .*"
    136         drop_counter_reason = "successful HTTP GETs"
    137       }
    138     forward_to = [loki.write.grafana_loki.receiver]
    139 }
    140 
    141 
    142 // Monitor the logs with the latency statistics
    143 local.file_match "nginx_taler_performance_logs" {
    144   path_targets    = [
    145      {
    146          "__path__"  = "/var/log/nginx/*.tal",
    147          "job"       = "nginx/performance",
    148          "hostname"  = "{{ TARGET_HOST_NAME }}",
    149      },
    150   ]
    151   sync_period = "5s"
    152 }
    153 
    154 
    155 // Connect nginx_taler_performance_logs as source to perf_logs
    156 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
    157 loki.source.file "perf_scrape" {
    158    targets    = local.file_match.nginx_taler_performance_logs.targets
    159    forward_to = [loki.process.perf_logs.receiver]
    160    tail_from_end = true
    161 }
    162 
    163 
    164 
    165 // Here we export the *.tal logs with the Nginx latency data.
    166 // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/
    167 loki.process "perf_logs" {
    168 
    169   // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stageregex-block
    170   stage.regex {
    171     expression  = "uri=/(?P<ep>[a-zA-Z]+)(?:/\\w+)?(?:/(?P<act>[a-zA-Z-]+))? s=(?P<status>\\d{3}).*urt=(?P<urt>\\d+\\.\\d+|-) rt=(?P<response_time>\\d+\\.\\d+) rl=(?P<request_length>\\d+) bs=(?P<bytes_sent>\\d+)"
    172   }
    173 
    174   // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#stagetemplate-block
    175   stage.template {
    176     source = "endpoint"
    177     template = "{{ '{{' }} printf \"%s-%s\" .ep .act | trimSuffix \"-\" {{ '}}' }}"
    178   }
    179 
    180   stage.template {
    181     source = "upstream_response_time"
    182     template = "{{ '{{' }} .urt | replace \"-\" \"0\" {{ '}}' }}"
    183   }
    184 
    185     // exported via http://localhost:12345/metrics to Prometheus
    186   stage.metrics {
    187     // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
    188     metric.gauge {
    189        name = "response_time"
    190        prefix = "taler_requests_"
    191        description = "Time taken for Nginx to respond (non-GET requests)"
    192        source = "response_time"
    193        max_idle_duration = "24h"
    194        action = "set"
    195     }
    196     // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block
    197     // https://www.robustperception.io/how-does-a-prometheus-histogram-work
    198     metric.histogram {
    199       name = "request_length_hist"
    200       prefix = "taler_requests_"
    201       description = "Request Length reported from Nginx (non-GET requests)"
    202       source = "request_length"
    203       max_idle_duration = "24h"
    204       buckets = [1,10,50,100,200,500,1000,2000,5000]
    205     }
    206 
    207     metric.histogram {
    208       name = "bytes_sent_hist"
    209       prefix = "taler_requests_"
    210       description = "Number of bytes sent, reported from Nginx (non-GET requests)"
    211       source = "bytes_sent"
    212       max_idle_duration = "24h"
    213       buckets = [1,10,50,100,200,500,1000,2000,5000]
    214     }
    215     metric.histogram {
    216       name = "response_time_hist"
    217       prefix = "taler_requests_"
    218       description = "Time taken for Nginx to respond (non-GET requests)"
    219       source = "response_time"
    220       max_idle_duration = "24h"
    221       buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5]
    222     }
    223     metric.histogram {
    224       name = "upstream_response_time_hist"
    225       prefix = "taler_requests_"
    226       description = "Time taken for the Exchange to respond to Nginx (non-GET requests)"
    227       source = "upstream_response_time"
    228       max_idle_duration = "24h"
    229       buckets = [0.001,0.0025,0.005,0.010,0.025,0.050,0.1,0.25,0.5,1,2,5]
    230     }
    231 
    232   }
    233   // Finally, pass on to Loki
    234   forward_to  = [loki.write.grafana_loki.receiver]
    235 
    236 }
    237 
    238 
    239 // Monitor journald logs
    240 // Export journald logs to our generic filter
    241 // but first pass to our generic filter to change labels
    242 loki.source.journal "read"  {
    243   forward_to    = [loki.process.filter_generic_logs.receiver]
    244   relabel_rules = loki.relabel.journal.rules
    245   max_age       = "12h"
    246   labels        = {component = "loki.source.journal"}
    247 }
    248 
    249 
    250 // https://community.grafana.com/t/scrape-journald-log-with-alloy-docker-container/119896
    251 loki.relabel "journal" {
    252   forward_to = []
    253   rule {
    254     source_labels = ["__journal__systemd_unit"]
    255     target_label  = "systemd_unit"
    256   }
    257   rule {
    258     source_labels = ["__journal__hostname"]
    259     target_label = "systemd_hostname"
    260   }
    261   rule {
    262     source_labels = ["__journal__transport"]
    263     target_label = "systemd_transport"
    264   }
    265 }
    266 
    267 
    268 // Generic filter for logs
    269 // See: https://grafana.com/docs/alloy/latest/tutorials/send-logs-to-loki/
    270 loki.process "filter_generic_logs" {
    271     // Determine log level:
    272     // https://community.grafana.com/t/extract-log-level-via-regex-and-set-it-as-a-label/134938/5
    273     stage.regex {
    274       expression = `(?P<level>(?i)\b(info|debug|error|warn|warning|trace|fatal)\b)`
    275     }
    276 
    277     // https://grafana.com/docs/alloy/latest/tutorials/processing-logs/
    278     // Drop debug
    279     stage.drop {
    280         source = "level"
    281         value = "debug"
    282         drop_counter_reason = "boring debugging data"
    283     }
    284     // Drop info
    285     stage.drop {
    286         source = "level"
    287         value = "info"
    288         drop_counter_reason = "boring info logs"
    289     }
    290     // Drop trace
    291     stage.drop {
    292         source = "level"
    293         value = "trace"
    294         drop_counter_reason = "boring info logs"
    295     }
    296 
    297     stage.metrics {
    298       // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metriccounter-block
    299 
    300       // https://grafana.com/docs/alloy/latest/reference/components/loki/loki.process/#metrichistogram-block
    301       // https://www.robustperception.io/how-does-a-prometheus-histogram-work
    302       metric.counter {
    303         name = "warn_log_level"
    304         prefix = "system_logs_"
    305         description = "Warnings in system logs"
    306         source = "level"
    307         value = "warn"
    308         action = "inc"
    309       }
    310       metric.counter {
    311         name = "warn_log_level"
    312         prefix = "system_logs_"
    313         description = "Warnings in system logs"
    314         source = "level"
    315         value = "warning"
    316         action = "inc"
    317       }
    318       metric.counter {
    319         name = "error_log_level"
    320         prefix = "system_logs_"
    321         description = "Errors in system logs"
    322         source = "level"
    323         value = "error"
    324         action = "inc"
    325       }
    326       metric.counter {
    327         name = "fatal_log_level"
    328         prefix = "system_logs_"
    329         description = "Fatal reports in system logs"
    330         source = "level"
    331         value = "fatal"
    332         action = "inc"
    333       }
    334     }
    335 
    336     forward_to = [loki.write.grafana_loki.receiver]
    337 }