mirror of
https://github.com/Stunkymonkey/nixos.git
synced 2025-05-24 18:04:41 +02:00
198 lines
5.5 KiB
Nix
198 lines
5.5 KiB
Nix
# log monitoring
|
|
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
let
|
|
cfg = config.my.services.loki;
|
|
in
|
|
{
|
|
options.my.services.loki = with lib; {
|
|
enable = mkEnableOption "loki log monitoring";
|
|
|
|
port = mkOption {
|
|
type = types.port;
|
|
default = 3100;
|
|
example = 3002;
|
|
description = "Internal port";
|
|
};
|
|
|
|
rules = mkOption {
|
|
type = types.attrsOf (
|
|
types.submodule {
|
|
options = {
|
|
condition = mkOption {
|
|
type = types.str;
|
|
description = ''
|
|
Loki alert expression.
|
|
'';
|
|
example = ''count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 15'';
|
|
default = null;
|
|
};
|
|
description = mkOption {
|
|
type = types.str;
|
|
description = ''
|
|
Loki alert message.
|
|
'';
|
|
example = "Prometheus encountered value {{ $value }} with {{ $labels }}";
|
|
default = null;
|
|
};
|
|
labels = mkOption {
|
|
type = types.nullOr (types.attrsOf types.str);
|
|
description = ''
|
|
Additional alert labels.
|
|
'';
|
|
example = literalExpression ''
|
|
{ severity = "page" };
|
|
'';
|
|
default = { };
|
|
};
|
|
time = lib.mkOption {
|
|
type = lib.types.str;
|
|
description = ''
|
|
Time until the alert is fired.
|
|
'';
|
|
example = "5m";
|
|
default = "2m";
|
|
};
|
|
};
|
|
}
|
|
);
|
|
description = ''
|
|
Defines the loki rules.
|
|
'';
|
|
default = { };
|
|
};
|
|
};
|
|
|
|
config =
|
|
let
|
|
rulerConfig = {
|
|
groups = [
|
|
{
|
|
name = "alerting-rules";
|
|
rules = lib.mapAttrsToList (name: opts: {
|
|
alert = name;
|
|
inherit (opts) condition labels;
|
|
for = opts.time;
|
|
annotations.description = opts.description;
|
|
}) cfg.rules;
|
|
}
|
|
];
|
|
};
|
|
rulerFile = pkgs.writeText "ruler.yml" (builtins.toJSON rulerConfig);
|
|
in
|
|
lib.mkIf cfg.enable {
|
|
services = {
|
|
loki = {
|
|
enable = true;
|
|
configuration = {
|
|
server = {
|
|
http_listen_address = "localhost";
|
|
http_listen_port = cfg.port;
|
|
};
|
|
auth_enabled = false;
|
|
|
|
common = {
|
|
instance_addr = "localhost";
|
|
ring.kvstore.store = "inmemory";
|
|
replication_factor = 1;
|
|
|
|
path_prefix = config.services.loki.dataDir;
|
|
storage.filesystem = {
|
|
chunks_directory = "${config.services.loki.dataDir}/chunks";
|
|
rules_directory = "${config.services.loki.dataDir}/rules";
|
|
};
|
|
};
|
|
|
|
ruler = lib.mkIf config.my.services.alertmanager.enable {
|
|
storage = {
|
|
type = "local";
|
|
local.directory = "${config.services.loki.dataDir}/ruler";
|
|
};
|
|
rule_path = "${config.services.loki.dataDir}/rules";
|
|
alertmanager_url = "http://localhost:${toString config.my.services.alertmanager.port}";
|
|
enable_alertmanager_v2 = true;
|
|
};
|
|
|
|
schema_config = {
|
|
configs = [
|
|
{
|
|
from = "2020-11-08";
|
|
store = "tsdb";
|
|
object_store = "filesystem";
|
|
schema = "v13";
|
|
index = {
|
|
prefix = "index_";
|
|
period = "24h";
|
|
};
|
|
}
|
|
];
|
|
};
|
|
|
|
limits_config = {
|
|
max_query_lookback = "672h"; # 28 days
|
|
retention_period = "672h"; # 28 days
|
|
};
|
|
|
|
compactor = {
|
|
working_directory = "${config.services.loki.dataDir}/compactor";
|
|
retention_enabled = true;
|
|
delete_request_store = "filesystem";
|
|
};
|
|
};
|
|
};
|
|
|
|
grafana.provision = {
|
|
datasources.settings.datasources = [
|
|
{
|
|
name = "Loki";
|
|
type = "loki";
|
|
access = "proxy";
|
|
url = "http://localhost:${toString cfg.port}";
|
|
}
|
|
];
|
|
dashboards.settings.providers = [
|
|
{
|
|
name = "Loki";
|
|
options.path = pkgs.grafana-dashboards.loki;
|
|
disableDeletion = true;
|
|
}
|
|
];
|
|
};
|
|
|
|
prometheus = {
|
|
scrapeConfigs = [
|
|
{
|
|
job_name = "loki";
|
|
static_configs = [
|
|
{
|
|
targets = [ "localhost:${toString cfg.port}" ];
|
|
labels = {
|
|
instance = config.networking.hostName;
|
|
};
|
|
}
|
|
];
|
|
}
|
|
];
|
|
};
|
|
};
|
|
|
|
systemd.tmpfiles.rules = [
|
|
"d /var/lib/loki 0700 loki loki - -"
|
|
"d /var/lib/loki/ruler 0700 loki loki - -"
|
|
"d /var/lib/loki/rules 0700 loki loki - -"
|
|
"L /var/lib/loki/ruler/ruler.yml - - - - ${rulerFile}"
|
|
];
|
|
systemd.services.loki.reloadTriggers = [ rulerFile ];
|
|
|
|
my.services.loki.rules = {
|
|
loki_highLogRate = {
|
|
condition = ''sum by (host) (rate({unit="loki.service"}[1m])) > 60'';
|
|
description = "Loki has a high logging rate";
|
|
};
|
|
};
|
|
};
|
|
}
|