diff --git a/modules/services/blackbox/default.nix b/modules/services/blackbox/default.nix index 1ee7c26..ef855ef 100644 --- a/modules/services/blackbox/default.nix +++ b/modules/services/blackbox/default.nix @@ -92,7 +92,7 @@ in my.services.prometheus.rules = { BlackboxProbeFailed = { - condition = ''probe_success == 0''; + condition = "probe_success == 0"; description = "Blackbox probe failed (instance {{ $labels.instance }}): {{$value}}"; time = "1m"; labels = { @@ -100,7 +100,7 @@ in }; }; BlackboxConfigurationReloadFailure = { - condition = ''blackbox_exporter_config_last_reload_successful != 1''; + condition = "blackbox_exporter_config_last_reload_successful != 1"; description = "Blackbox configuration reload failure\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "0m"; labels = { @@ -108,7 +108,7 @@ in }; }; BlackboxSlowProbe = { - condition = ''avg_over_time(probe_duration_seconds[1m]) > 2''; + condition = "avg_over_time(probe_duration_seconds[1m]) > 2"; description = "Blackbox probe took more than 2s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "1m"; labels = { @@ -116,12 +116,12 @@ in }; }; BlackboxProbeHttpFailure = { - condition = ''probe_http_status_code <= 199 OR probe_http_status_code >= 400''; + condition = "probe_http_status_code <= 199 OR probe_http_status_code >= 400"; description = "HTTP status code is not 200-399\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "1m"; }; BlackboxSslCertificateWillExpireSoon = { - condition = ''3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20''; + condition = "3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20"; description = "SSL certificate expires in less than 20 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "0m"; labels = { @@ -129,7 +129,7 @@ in }; }; BlackboxSslCertificateWillExpireShortly = { - condition = ''0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3''; + condition = "0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3"; description = "SSL certificate expires in less than 3 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "0m"; labels = { @@ -137,7 +137,7 @@ in }; }; BlackboxProbeSlowHttp = { - condition = ''avg_over_time(probe_http_duration_seconds[1m]) > 2''; + condition = "avg_over_time(probe_http_duration_seconds[1m]) > 2"; description = "HTTP request took more than 2s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "1m"; labels = { @@ -145,7 +145,7 @@ in }; }; BlackboxProbeSlowPing = { - condition = ''avg_over_time(probe_icmp_duration_seconds[1m]) > 1''; + condition = "avg_over_time(probe_icmp_duration_seconds[1m]) > 1"; description = "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"; time = "1m"; labels = { diff --git a/modules/services/media/navidrome/default.nix b/modules/services/media/navidrome/default.nix index ef64dad..87cd51b 100644 --- a/modules/services/media/navidrome/default.nix +++ b/modules/services/media/navidrome/default.nix @@ -76,7 +76,7 @@ in my.services.prometheus.rules = { navidrome_not_enough_albums = { - condition = ''http_navidrome_album_count != 1''; + condition = "http_navidrome_album_count != 1"; description = "navidrome: not enough albums as expected: {{$value}}"; }; }; diff --git a/modules/services/node-exporter/default.nix b/modules/services/node-exporter/default.nix index e3f4648..715e517 100644 --- a/modules/services/node-exporter/default.nix +++ b/modules/services/node-exporter/default.nix @@ -115,7 +115,7 @@ in description = "{{$labels.instance}} device {{$labels.device}} on {{$labels.path}} got less than 10% space left on its filesystem"; }; filesystem_inodes_full = { - condition = ''node_filesystem_files_free / node_filesystem_files < 0.10''; + condition = "node_filesystem_files_free / node_filesystem_files < 0.10"; time = "10m"; description = "{{$labels.instance}} device {{$labels.device}} on {{$labels.path}} got less than 10% inodes left on its filesystem"; }; @@ -125,11 +125,11 @@ in description = "{{$labels.instance}}: filesystem has reported {{$value}} errors: check /sys/fs/ext4/*/errors_count"; }; disk_unusual_read = { - condition = ''sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50''; + condition = "sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50"; description = ''Disk is probably reading too much data (> 50 MB/s)\n VALUE = {{ $value }}''; }; disk_unusual_write = { - condition = ''sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50''; + condition = "sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50"; description = ''Disk is probably writing too much data (> 50 MB/s)\n VALUE = {{ $value }}''; }; # memory @@ -144,21 +144,21 @@ in description = ''CPU load is > 80%\n VALUE = {{ $value }}''; }; swap_is_filling = { - condition = ''(1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80''; + condition = "(1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80"; description = "{{$labels.host}} is using {{$value}} (>80%) of its swap space"; }; oom_kill_detected = { - condition = ''increase(node_vmstat_oom_kill[1m]) > 0''; + condition = "increase(node_vmstat_oom_kill[1m]) > 0"; description = ''OOM kill detected\n VALUE = {{ $value }}''; time = "0m"; }; # network network_unusual_throughput_in = { - condition = ''sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100''; + condition = "sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100"; description = ''Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}''; }; network_unusual_throughput_out = { - condition = ''sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100''; + condition = "sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100"; description = ''Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}''; }; # uptime @@ -167,7 +167,7 @@ in description = "{{$labels.host}} just rebooted"; }; uptime = { - condition = ''node_time_seconds - node_boot_time_seconds > (30 * 24 * 60 * 60)''; + condition = "node_time_seconds - node_boot_time_seconds > (30 * 24 * 60 * 60)"; description = "Uptime monster: {{$labels.host}} has been up for more than 30 days"; }; # systemd @@ -178,7 +178,7 @@ in }; # time clock_not_synchronising = { - condition = ''min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16''; + condition = "min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16"; description = ''Clock not synchronising.\n VALUE = {{ $value }}''; }; }; diff --git a/modules/services/promtail/default.nix b/modules/services/promtail/default.nix index cc8374d..0fec069 100644 --- a/modules/services/promtail/default.nix +++ b/modules/services/promtail/default.nix @@ -61,9 +61,9 @@ in }; promtail_file_lagging = { - condition = ''abs(promtail_file_bytes_total - promtail_read_bytes_total) > 1e6''; + condition = "abs(promtail_file_bytes_total - promtail_read_bytes_total) > 1e6"; time = "15m"; - description = ''{{ $labels.instance }} {{ $labels.job }} {{ $labels.path }} has been lagging by more than 1MB for more than 15m''; + description = "{{ $labels.instance }} {{ $labels.job }} {{ $labels.path }} has been lagging by more than 1MB for more than 15m"; }; };