service/blackbox: add alerts

This commit is contained in:
Felix Buehler 2023-07-02 22:57:03 +02:00
parent e6639e6cfb
commit 2315f51b93

View file

@ -81,6 +81,69 @@ in
];
};
my.services.prometheus.rules = {
BlackboxProbeFailed = {
condition = ''probe_success == 0'';
description = "Blackbox probe failed (instance {{ $labels.instance }}): {{$value}}";
time = "0m";
labels = {
severity = "critical";
};
};
BlackboxConfigurationReloadFailure = {
condition = ''blackbox_exporter_config_last_reload_successful != 1'';
description = "Blackbox configuration reload failure\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "0m";
labels = {
severity = "warning";
};
};
BlackboxSlowProbe = {
condition = ''avg_over_time(probe_duration_seconds[1m]) > 1'';
description = "Blackbox probe took more than 1s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "1m";
labels = {
severity = "warning";
};
};
BlackboxProbeHttpFailure = {
condition = ''probe_http_status_code <= 199 OR probe_http_status_code >= 400'';
description = "HTTP status code is not 200-399\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "0m";
};
BlackboxSslCertificateWillExpireSoon = {
condition = ''3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20'';
description = "SSL certificate expires in less than 20 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "0m";
labels = {
severity = "warning";
};
};
BlackboxSslCertificateWillExpireShortly = {
condition = ''0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3'';
description = "SSL certificate expires in less than 3 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "0m";
labels = {
severity = "critical";
};
};
BlackboxProbeSlowHttp = {
condition = ''avg_over_time(probe_http_duration_seconds[1m]) > 1'';
description = "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "1m";
labels = {
severity = "warning";
};
};
BlackboxProbeSlowPing = {
condition = ''avg_over_time(probe_icmp_duration_seconds[1m]) > 1'';
description = "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
time = "1m";
labels = {
severity = "warning";
};
};
};
services.grafana.provision.dashboards.settings.providers = [
{
name = "Blackbox";