From db8924bcc286c2038312af4098b6ef590a070dce Mon Sep 17 00:00:00 2001 From: Felix Buehler Date: Sun, 16 Apr 2023 17:27:48 +0200 Subject: [PATCH] service/grafana: add monitoring --- modules/services/grafana/default.nix | 27 +- pkgs/grafana-dashboards/default.nix | 7 + pkgs/grafana-dashboards/grafana.json | 1285 ++++++++++++++++++++++++++ 3 files changed, 1318 insertions(+), 1 deletion(-) create mode 100644 pkgs/grafana-dashboards/grafana.json diff --git a/modules/services/grafana/default.nix b/modules/services/grafana/default.nix index d6ae70c..afb1144 100644 --- a/modules/services/grafana/default.nix +++ b/modules/services/grafana/default.nix @@ -47,7 +47,32 @@ in }; }; - provision.enable = true; + provision = { + enable = true; + dashboards.settings.providers = [ + { + name = "Grafana"; + options.path = pkgs.grafana-dashboards.grafana; + disableDeletion = true; + } + ]; + }; + }; + + services.prometheus = { + scrapeConfigs = [ + { + job_name = "grafana"; + static_configs = [ + { + targets = [ "127.0.0.1:${toString cfg.port}" ]; + labels = { + instance = config.networking.hostName; + }; + } + ]; + } + ]; }; my.services.nginx.virtualHosts = [ diff --git a/pkgs/grafana-dashboards/default.nix b/pkgs/grafana-dashboards/default.nix index c247302..009c33b 100644 --- a/pkgs/grafana-dashboards/default.nix +++ b/pkgs/grafana-dashboards/default.nix @@ -63,4 +63,11 @@ in }).overrideAttrs (self: super: { src = ./gitea.json; # sadly not yet updated to latest grafana }); + grafana = (buildGrafanaDashboard { + id = 3590; + pname = "grafana"; + version = "3"; + }).overrideAttrs (self: super: { + src = ./grafana.json; # sadly only imported dashboards work + }); }) diff --git a/pkgs/grafana-dashboards/grafana.json b/pkgs/grafana-dashboards/grafana.json new file mode 100644 index 0000000..800d4a8 --- /dev/null +++ b/pkgs/grafana-dashboards/grafana.json @@ -0,0 +1,1285 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Metrics about Grafana", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 39, + "links": [ + { + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "Available metrics", + "type": "link", + "url": "/metrics" + }, + { + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "Grafana docs", + "type": "link", + "url": "https://grafana.com/docs/grafana/latest/" + }, + { + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "Prometheus docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "0": { + "text": ":(" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(222, 3, 3, 0.9)", + "value": null + }, + { + "color": "rgb(234, 245, 234)", + "value": 1 + }, + { + "color": "rgb(235, 244, 235)", + "value": 10000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 4, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "up{job=\"grafana\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "title": "Active instances", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 5, + "y": 0 + }, + "id": 8, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "grafana_stat_totals_dashboard", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "title": "Dashboard count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 10, + "y": 0 + }, + "id": 9, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "grafana_stat_total_users", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "title": "User count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 15, + "y": 0 + }, + "id": 10, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "grafana_stat_total_playlists", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "title": "Playlist count", + "type": "stat" + }, + { + "columns": [], + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 17, + "links": [], + "scroll": false, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "align": "auto", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "topk(1, grafana_info or grafana_build_info)", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "legendFormat": "{{version}}", + "refId": "A", + "step": 20 + } + ], + "title": "Grafana version", + "transform": "timeseries_to_rows", + "type": "table-old" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "400" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447EBC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "500" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 10, + "x": 0, + "y": 5 + }, + "id": 15, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.0-pre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sum by (statuscode) (irate(http_request_total{job='grafana'}[5m]))", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "{{statuscode}}", + "refId": "B", + "step": 15, + "target": "dev.grafana.cb-office.alerting.active_alerts" + } + ], + "title": "http status codes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "400" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447EBC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "500" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 10, + "x": 10, + "y": 5 + }, + "id": 11, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.0-pre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sum(irate(grafana_api_response_status_total[5m]))", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "api", + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sum(irate(grafana_proxy_response_status_total[5m]))", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "proxy", + "refId": "B", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sum(irate(grafana_page_response_status_total[5m]))", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "web", + "refId": "C", + "step": 20 + } + ], + "title": "Requests by routing group", + "type": "timeseries" + }, + { + "columns": [], + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 4, + "x": 20, + "y": 5 + }, + "height": "", + "id": 12, + "links": [], + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "align": "auto", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sort(topk(8, sum by (handler) (http_request_total{job=\"grafana\"})))", + "format": "time_series", + "instant": true, + "intervalFactor": 10, + "legendFormat": "{{handler}}", + "refId": "A", + "step": 100 + } + ], + "title": "Most used handlers", + "transform": "timeseries_to_rows", + "type": "table-old" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "alerting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ok" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 6, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.0-pre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "increase(grafana_alerting_active_alerts[1m])", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "{{state}}", + "refId": "A", + "step": 15 + } + ], + "title": "Grafana active alerts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "alerting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "alertname" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "firing alerts" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ok" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 18, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.0-pre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": " sum (ALERTS)", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "firing alerts", + "refId": "A", + "step": 15 + } + ], + "title": "Prometheus alerts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "description": "Aggregated over all Grafana nodes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "avg gc duration" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "allocated memory" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "used memory" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory usage" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 7, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.0-pre", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sum(go_goroutines{job=\"grafana\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 4, + "legendFormat": "go routines", + "refId": "A", + "step": 8, + "target": "select metric", + "type": "timeserie" + }, + { + "datasource": { + "type": "prometheus", + "uid": "KNkR71YVk" + }, + "expr": "sum(process_resident_memory_bytes{job=\"grafana\"})", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "memory usage", + "refId": "B", + "step": 8 + } + ], + "title": "Grafana performance", + "type": "timeseries" + } + ], + "refresh": "", + "revision": "1.0", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "grafana", + "prometheus" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Grafana metrics", + "uid": "isFoa0z7k", + "version": 1, + "weekStart": "" +}