mirror of
				https://github.com/Stunkymonkey/nixos.git
				synced 2025-10-31 09:42:11 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			198 lines
		
	
	
	
		
			5.5 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
			
		
		
	
	
			198 lines
		
	
	
	
		
			5.5 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
| # log monitoring
 | |
| {
 | |
|   config,
 | |
|   lib,
 | |
|   pkgs,
 | |
|   ...
 | |
| }:
 | |
| let
 | |
|   cfg = config.my.services.loki;
 | |
| in
 | |
| {
 | |
|   options.my.services.loki = with lib; {
 | |
|     enable = mkEnableOption "loki log monitoring";
 | |
| 
 | |
|     port = mkOption {
 | |
|       type = types.port;
 | |
|       default = 3100;
 | |
|       example = 3002;
 | |
|       description = "Internal port";
 | |
|     };
 | |
| 
 | |
|     rules = mkOption {
 | |
|       type = types.attrsOf (
 | |
|         types.submodule {
 | |
|           options = {
 | |
|             condition = mkOption {
 | |
|               type = types.str;
 | |
|               description = ''
 | |
|                 Loki alert expression.
 | |
|               '';
 | |
|               example = ''count_over_time({job=~"secure"} |="sshd[" |~": Failed|: Invalid|: Connection closed by authenticating user" | __error__="" [15m]) > 15'';
 | |
|               default = null;
 | |
|             };
 | |
|             description = mkOption {
 | |
|               type = types.str;
 | |
|               description = ''
 | |
|                 Loki alert message.
 | |
|               '';
 | |
|               example = "Prometheus encountered value {{ $value }} with {{ $labels }}";
 | |
|               default = null;
 | |
|             };
 | |
|             labels = mkOption {
 | |
|               type = types.nullOr (types.attrsOf types.str);
 | |
|               description = ''
 | |
|                 Additional alert labels.
 | |
|               '';
 | |
|               example = literalExpression ''
 | |
|                 { severity = "page" };
 | |
|               '';
 | |
|               default = { };
 | |
|             };
 | |
|             time = lib.mkOption {
 | |
|               type = lib.types.str;
 | |
|               description = ''
 | |
|                 Time until the alert is fired.
 | |
|               '';
 | |
|               example = "5m";
 | |
|               default = "2m";
 | |
|             };
 | |
|           };
 | |
|         }
 | |
|       );
 | |
|       description = ''
 | |
|         Defines the loki rules.
 | |
|       '';
 | |
|       default = { };
 | |
|     };
 | |
|   };
 | |
| 
 | |
|   config =
 | |
|     let
 | |
|       rulerConfig = {
 | |
|         groups = [
 | |
|           {
 | |
|             name = "alerting-rules";
 | |
|             rules = lib.mapAttrsToList (name: opts: {
 | |
|               alert = name;
 | |
|               inherit (opts) condition labels;
 | |
|               for = opts.time;
 | |
|               annotations.description = opts.description;
 | |
|             }) cfg.rules;
 | |
|           }
 | |
|         ];
 | |
|       };
 | |
|       rulerFile = pkgs.writeText "ruler.yml" (builtins.toJSON rulerConfig);
 | |
|     in
 | |
|     lib.mkIf cfg.enable {
 | |
|       services = {
 | |
|         loki = {
 | |
|           enable = true;
 | |
|           configuration = {
 | |
|             server = {
 | |
|               http_listen_address = "127.0.0.1";
 | |
|               http_listen_port = cfg.port;
 | |
|             };
 | |
|             auth_enabled = false;
 | |
| 
 | |
|             common = {
 | |
|               instance_addr = "127.0.0.1";
 | |
|               ring.kvstore.store = "inmemory";
 | |
|               replication_factor = 1;
 | |
| 
 | |
|               path_prefix = config.services.loki.dataDir;
 | |
|               storage.filesystem = {
 | |
|                 chunks_directory = "${config.services.loki.dataDir}/chunks";
 | |
|                 rules_directory = "${config.services.loki.dataDir}/rules";
 | |
|               };
 | |
|             };
 | |
| 
 | |
|             ruler = lib.mkIf config.my.services.alertmanager.enable {
 | |
|               storage = {
 | |
|                 type = "local";
 | |
|                 local.directory = "${config.services.loki.dataDir}/ruler";
 | |
|               };
 | |
|               rule_path = "${config.services.loki.dataDir}/rules";
 | |
|               alertmanager_url = "http://127.0.0.1:${toString config.my.services.alertmanager.port}";
 | |
|               enable_alertmanager_v2 = true;
 | |
|             };
 | |
| 
 | |
|             schema_config = {
 | |
|               configs = [
 | |
|                 {
 | |
|                   from = "2020-11-08";
 | |
|                   store = "tsdb";
 | |
|                   object_store = "filesystem";
 | |
|                   schema = "v13";
 | |
|                   index = {
 | |
|                     prefix = "index_";
 | |
|                     period = "24h";
 | |
|                   };
 | |
|                 }
 | |
|               ];
 | |
|             };
 | |
| 
 | |
|             limits_config = {
 | |
|               max_query_lookback = "672h"; # 28 days
 | |
|               retention_period = "672h"; # 28 days
 | |
|             };
 | |
| 
 | |
|             compactor = {
 | |
|               working_directory = "${config.services.loki.dataDir}/compactor";
 | |
|               retention_enabled = true;
 | |
|               delete_request_store = "filesystem";
 | |
|             };
 | |
|           };
 | |
|         };
 | |
| 
 | |
|         grafana.provision = {
 | |
|           datasources.settings.datasources = [
 | |
|             {
 | |
|               name = "Loki";
 | |
|               type = "loki";
 | |
|               access = "proxy";
 | |
|               url = "http://127.0.0.1:${toString cfg.port}";
 | |
|             }
 | |
|           ];
 | |
|           dashboards.settings.providers = [
 | |
|             {
 | |
|               name = "Loki";
 | |
|               options.path = pkgs.grafana-dashboards.loki;
 | |
|               disableDeletion = true;
 | |
|             }
 | |
|           ];
 | |
|         };
 | |
| 
 | |
|         prometheus = {
 | |
|           scrapeConfigs = [
 | |
|             {
 | |
|               job_name = "loki";
 | |
|               static_configs = [
 | |
|                 {
 | |
|                   targets = [ "127.0.0.1:${toString cfg.port}" ];
 | |
|                   labels = {
 | |
|                     instance = config.networking.hostName;
 | |
|                   };
 | |
|                 }
 | |
|               ];
 | |
|             }
 | |
|           ];
 | |
|         };
 | |
|       };
 | |
| 
 | |
|       systemd.tmpfiles.rules = [
 | |
|         "d /var/lib/loki 0700 loki loki - -"
 | |
|         "d /var/lib/loki/ruler 0700 loki loki - -"
 | |
|         "d /var/lib/loki/rules 0700 loki loki - -"
 | |
|         "L /var/lib/loki/ruler/ruler.yml - - - - ${rulerFile}"
 | |
|       ];
 | |
|       systemd.services.loki.reloadTriggers = [ rulerFile ];
 | |
| 
 | |
|       my.services.loki.rules = {
 | |
|         loki_highLogRate = {
 | |
|           condition = ''sum by (host) (rate({unit="loki.service"}[1m])) > 60'';
 | |
|           description = "Loki has a high logging rate";
 | |
|         };
 | |
|       };
 | |
|     };
 | |
| }
 | 
