mirror of
https://github.com/Stunkymonkey/nixos.git
synced 2025-05-24 18:04:41 +02:00
service/blackbox: add alerts
This commit is contained in:
parent
e6639e6cfb
commit
2315f51b93
1 changed files with 63 additions and 0 deletions
|
@ -81,6 +81,69 @@ in
|
|||
];
|
||||
};
|
||||
|
||||
my.services.prometheus.rules = {
|
||||
BlackboxProbeFailed = {
|
||||
condition = ''probe_success == 0'';
|
||||
description = "Blackbox probe failed (instance {{ $labels.instance }}): {{$value}}";
|
||||
time = "0m";
|
||||
labels = {
|
||||
severity = "critical";
|
||||
};
|
||||
};
|
||||
BlackboxConfigurationReloadFailure = {
|
||||
condition = ''blackbox_exporter_config_last_reload_successful != 1'';
|
||||
description = "Blackbox configuration reload failure\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "0m";
|
||||
labels = {
|
||||
severity = "warning";
|
||||
};
|
||||
};
|
||||
BlackboxSlowProbe = {
|
||||
condition = ''avg_over_time(probe_duration_seconds[1m]) > 1'';
|
||||
description = "Blackbox probe took more than 1s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "1m";
|
||||
labels = {
|
||||
severity = "warning";
|
||||
};
|
||||
};
|
||||
BlackboxProbeHttpFailure = {
|
||||
condition = ''probe_http_status_code <= 199 OR probe_http_status_code >= 400'';
|
||||
description = "HTTP status code is not 200-399\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "0m";
|
||||
};
|
||||
BlackboxSslCertificateWillExpireSoon = {
|
||||
condition = ''3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20'';
|
||||
description = "SSL certificate expires in less than 20 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "0m";
|
||||
labels = {
|
||||
severity = "warning";
|
||||
};
|
||||
};
|
||||
BlackboxSslCertificateWillExpireShortly = {
|
||||
condition = ''0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3'';
|
||||
description = "SSL certificate expires in less than 3 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "0m";
|
||||
labels = {
|
||||
severity = "critical";
|
||||
};
|
||||
};
|
||||
BlackboxProbeSlowHttp = {
|
||||
condition = ''avg_over_time(probe_http_duration_seconds[1m]) > 1'';
|
||||
description = "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "1m";
|
||||
labels = {
|
||||
severity = "warning";
|
||||
};
|
||||
};
|
||||
BlackboxProbeSlowPing = {
|
||||
condition = ''avg_over_time(probe_icmp_duration_seconds[1m]) > 1'';
|
||||
description = "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}";
|
||||
time = "1m";
|
||||
labels = {
|
||||
severity = "warning";
|
||||
};
|
||||
};
|
||||
};
|
||||
services.grafana.provision.dashboards.settings.providers = [
|
||||
{
|
||||
name = "Blackbox";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue