nixos/modules/services/loki/default.nix
2024-07-28 21:08:02 +02:00

198 lines
5.5 KiB
Nix

# log monitoring
{
config,
lib,
pkgs,
...
}:
let
cfg = config.my.services.loki;
in
{
options.my.services.loki = with lib; {
enable = mkEnableOption "loki log monitoring";
port = mkOption {
type = types.port;
default = 3100;
example = 3002;
description = "Internal port";
};
rules = mkOption {
type = types.attrsOf (
types.submodule {
options = {
condition = mkOption {
type = types.str;
description = ''
Loki alert expression.
'';
example = ''count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 15'';
default = null;
};
description = mkOption {
type = types.str;
description = ''
Loki alert message.
'';
example = "Prometheus encountered value {{ $value }} with {{ $labels }}";
default = null;
};
labels = mkOption {
type = types.nullOr (types.attrsOf types.str);
description = ''
Additional alert labels.
'';
example = literalExpression ''
{ severity = "page" };
'';
default = { };
};
time = lib.mkOption {
type = lib.types.str;
description = ''
Time until the alert is fired.
'';
example = "5m";
default = "2m";
};
};
}
);
description = ''
Defines the loki rules.
'';
default = { };
};
};
config =
let
rulerConfig = {
groups = [
{
name = "alerting-rules";
rules = lib.mapAttrsToList (name: opts: {
alert = name;
inherit (opts) condition labels;
for = opts.time;
annotations.description = opts.description;
}) cfg.rules;
}
];
};
rulerFile = pkgs.writeText "ruler.yml" (builtins.toJSON rulerConfig);
in
lib.mkIf cfg.enable {
services = {
loki = {
enable = true;
configuration = {
server = {
http_listen_address = "127.0.0.1";
http_listen_port = cfg.port;
};
auth_enabled = false;
common = {
instance_addr = "127.0.0.1";
ring.kvstore.store = "inmemory";
replication_factor = 1;
path_prefix = config.services.loki.dataDir;
storage.filesystem = {
chunks_directory = "${config.services.loki.dataDir}/chunks";
rules_directory = "${config.services.loki.dataDir}/rules";
};
};
ruler = lib.mkIf config.my.services.alertmanager.enable {
storage = {
type = "local";
local.directory = "${config.services.loki.dataDir}/ruler";
};
rule_path = "${config.services.loki.dataDir}/rules";
alertmanager_url = "http://127.0.0.1:${toString config.my.services.alertmanager.port}";
enable_alertmanager_v2 = true;
};
schema_config = {
configs = [
{
from = "2020-11-08";
store = "tsdb";
object_store = "filesystem";
schema = "v13";
index = {
prefix = "index_";
period = "24h";
};
}
];
};
limits_config = {
max_query_lookback = "672h"; # 28 days
retention_period = "672h"; # 28 days
};
compactor = {
working_directory = "${config.services.loki.dataDir}/compactor";
retention_enabled = true;
delete_request_store = "filesystem";
};
};
};
grafana.provision = {
datasources.settings.datasources = [
{
name = "Loki";
type = "loki";
access = "proxy";
url = "http://127.0.0.1:${toString cfg.port}";
}
];
dashboards.settings.providers = [
{
name = "Loki";
options.path = pkgs.grafana-dashboards.loki;
disableDeletion = true;
}
];
};
prometheus = {
scrapeConfigs = [
{
job_name = "loki";
static_configs = [
{
targets = [ "127.0.0.1:${toString cfg.port}" ];
labels = {
instance = config.networking.hostName;
};
}
];
}
];
};
};
systemd.tmpfiles.rules = [
"d /var/lib/loki 0700 loki loki - -"
"d /var/lib/loki/ruler 0700 loki loki - -"
"d /var/lib/loki/rules 0700 loki loki - -"
"L /var/lib/loki/ruler/ruler.yml - - - - ${rulerFile}"
];
systemd.services.loki.reloadTriggers = [ rulerFile ];
my.services.loki.rules = {
loki_highLogRate = {
condition = ''sum by (host) (rate({unit="loki.service"}[1m])) > 60'';
description = "Loki has a high logging rate";
};
};
};
}