dotfiles/nixos/systems/blowhole/monitoring.nix

487 lines
14 KiB
Nix
Raw Normal View History

# SPDX-FileCopyrightText: 2023 Richard Brežák <richard@brezak.sk>
#
# SPDX-License-Identifier: LGPL-3.0-or-later
{ pkgs, roots, lib, inputs, config, secret, ... }:
let
inherit (lib)
singleton
nixosTests
concatStringsSep
;
in
{
uterranix.config = { tflib, ... }:
let
inherit (tflib)
tf;
in
{
output."envoy_grafana".value = tf "vault_consul_secret_backend_role.envoy-grafana";
output."envoy_blowhole".value = tf "vault_consul_secret_backend_role.envoy-blowhole";
# data."influxdb-v2_organization"."redalder" = {
# name = "redalder";
# };
# resource."influxdb-v2_bucket"."metrics_bucket" = {
# name = "metrics";
# description = "Metrics bucket";
# org_id = "\${data.influxdb-v2_organization.redalder.id}";
# retention_rules = {
# every_seconds = 30 * 24 * 60 * 60; # days * h/d * m/h * s/m
# };
# };
# resource."influxdb-v2_bucket"."logs_bucket" = {
# org_id = "\${data.influxdb-v2_organization.redalder.id}";
# name = "logs";
# description = "Logs bucket";
# retention_rules = {
# every_seconds = 30 * 24 * 60 * 60; # days * h/d * m/h * s/m
# };
# };
# resource."influxdb-v2_authorization"."telegraf_authorization" = {
# org_id = "\${data.influxdb-v2_organization.redalder.id}";
# description = "Token for telegraf ingestion";
# status = "active";
# permissions = [
# {
# action = "write";
# resource = {
# id = "\${influxdb-v2_bucket.logs_bucket.id}";
# org_id = "\${data.influxdb-v2_organization.redalder.id}";
# type = "buckets";
# };
# }
# {
# action = "write";
# resource = {
# id = "\${influxdb-v2_bucket.metrics_bucket.id}";
# org_id = "\${data.influxdb-v2_organization.redalder.id}";
# type = "buckets";
# };
# }
# ];
# };
# resource."vault_mount"."kvv2" = {
# path = "kvv2";
# type = "kv";
# options = { version = 2; };
# description = "KV Version 2 secret engine mount";
# };
# resource."vault_kv_secret_v2"."telegraf_secret" = {
# mount = "\${vault_mount.kvv2.path}";
# name = "homelab-1/blowhole/monitor/telegraf";
# options = { version = 2; };
# data_json = builtins.toJSON {
# influxdb_token = "\${influxdb-v2_authorization.telegraf_authorization.token}";
# };
# };
};
nixpkgs.overlays = singleton (_: _:
{
telegraf =
pkgs.buildGoModule rec {
pname = "telegraf";
version = "1.25.3";
excludedPackages = "test";
doCheck = false;
subPackages = [ "cmd/telegraf" ];
src = pkgs.fetchFromGitHub {
owner = "influxdata";
repo = "telegraf";
rev = "v${version}";
sha256 = "sha256-FUZDS4As9qP2Dn0NSBM/e8udDLMk5OZol4CQSI39T4s=";
};
vendorHash = "sha256-uWoWvS9ZZzhpE+PiJv0fqblMLOAGIrhCdi0ugvF/lQI=";
proxyVendor = true;
ldflags = [
"-w" "-s" "-X main.version=${version}"
];
passthru.tests = { inherit (nixosTests) telegraf; };
meta = with lib; {
description = "The plugin-driven server agent for collecting & reporting metrics";
license = licenses.mit;
homepage = "https://www.influxdata.com/time-series-platform/telegraf/";
maintainers = with maintainers; [ mic92 roblabla timstott ];
};
};
envoy = inputs.nixpkgs-stable.legacyPackages.${pkgs.stdenv.system}.envoy;
});
services.hashicorp.vault-agent =
{
settings.template = [
{
source = pkgs.writeText "envoy-grafana.token.vtmpl" ''
{{ with secret "consul/creds/envoy-grafana" }}{{ .Data.token }}{{ end }}
'';
destination = "/run/secrets/monitor/envoy-grafana.token";
command =
let
serviceList =
[ "hashicorp-envoy-grafana" "hashicorp-envoy-influx" ];
in
pkgs.writeShellScript "envoy-grafana-reload.sh"
''
sudo systemd-run -P --machine monitor /run/current-system/sw/bin/bash -l -c \
'systemctl try-reload-or-restart ${concatStringsSep " " serviceList}'
'';
}
{
source = pkgs.writeText "envoy-blowhole.token.vtmpl"
''
{{ with secret "consul/creds/envoy-blowhole" }}{{ .Data.token }}{{ end }}
'';
destination = "/run/secrets/envoy-blowhole.token";
command = pkgs.writeShellScript "envoy-blowhole-reload.sh"
''
sudo systemctl try-reload-or-restart hashicorp-envoy-telegraf
'';
}
{
source = pkgs.writeText "telegraf.env.vtmpl" ''
INFLUXDB_TOKEN={{ with secret "kv/data/homelab-1/blowhole/monitor/telegraf" }}{{ .Data.data.influxdb_token }}{{ end }}
'';
destination = "/run/secrets/monitor/telegraf.env";
}
];
};
## There is no way to say, hey, listen on localhost. The listeners option is missing the `address` field
## and the `name` field so it's impossible to configure....
services.hashicorp-envoy.telegraf = {
type = "ingress";
address = "${secret.network.ips.blowhole.ip}:19000";
service = {
kind = "ingress-gateway";
name = "telegraf-blowhole";
listeners = [
{
port = 8086;
protocol = "tcp";
services = singleton {
name = "telegraf";
};
}
];
};
environment = {
"CONSUL_HTTP_ADDR" = "http://${secret.network.ips.blowhole.ip}:8500";
"CONSUL_GRPC_ADDR" = "http://${secret.network.ips.blowhole.ip}:8502";
"CONSUL_HTTP_TOKEN_FILE" = "/run/secrets/envoy-blowhole.token";
};
adminBind = "127.0.0.1:19100";
hotRestart = true;
};
services.telegraf-magic = {
enable = true;
settings = {
inputs.cpu = {
percpu = true;
totalcpu = true;
tags.host = "blowhole";
tags.bucket = "telegraf";
};
inputs.mem = {
tags.host = "blowhole";
tags.bucket = "telegraf";
};
inputs.nomad = {
url = "http://${secret.network.ips.blowhole.ip}:4646";
tags.host = "blowhole";
tags.bucket = "telegraf";
};
inputs.zfs = {
tags.host = "blowhole";
tags.bucket = "telegraf";
};
# inputs.tail = [
# {
# files = ["/var/lib/nomad/alloc/*/alloc/logs/*.stdout.*"];
# data_format = "value";
# data_type = "string";
# name_override = "nomad_alloc_log";
# tags.bucket = "logs";
# }
# {
# files = ["/var/lib/nomad/alloc/*/alloc/logs/*.stderr.*"];
# data_format = "value";
# data_type = "string";
# name_override = "nomad_alloc_log";
# tags.bucket = "logs";
# }
# ];
outputs.influxdb_v2 = [
{
urls = [ "http://${secret.network.ips.blowhole.ip}:8086" ];
bucket = "telegraf";
# tagdrop = [ "bucket" ];
# tagpass = {
# bucket = "telegraf";
# };
}
# {
# urls = [ "http://${secret.network.ips.blowhole.ip}:8086" ];
# bucket = "logs";
# tagdrop = [ "bucket" ];
# tagpass = {
# bucket = "logs";
# };
# }
];
};
};
fileSystems."/var/lib/grafana" = {
device = "blowhole-zpool/persist/grafana";
fsType = "zfs";
};
fileSystems."/var/lib/grafana-postgres" = {
device = "blowhole-zpool/persist/grafana-postgres";
fsType = "zfs";
};
fileSystems."/var/lib/grafana-influxdb2" = {
device = "blowhole-zpool/persist/grafana-influxdb2";
fsType = "zfs";
};
systemd.services."container@monitor".serviceConfig.LimitNOFILE = "infinity";
# TODO: split interface name and container name, i.e. rewrite the container module....... again
containers.monitor = {
ephemeral = true;
autoStart = true;
privateNetwork = true;
localAddress = "10.64.99.2";
hostAddress = "10.64.99.1";
# 0 1 2 3
# 4 5 6 7
# 8 9 10 11
extraFlags = [
"--capability=CAP_IPC_LOCK"
];
bindMounts = {
"/run/secrets" = {
hostPath = "/run/secrets/monitor";
isReadOnly = true;
};
"/var/lib/grafana" = {
hostPath = "/var/lib/grafana";
isReadOnly = false;
};
"/var/lib/postgresql" = {
hostPath = "/var/lib/grafana-postgres";
isReadOnly = false;
};
"/var/lib/influxdb2" = {
hostPath = "/var/lib/grafana-influxdb2";
isReadOnly = false;
};
};
config = {
nixpkgs.overlays = config.nixpkgs.overlays;
imports = [
../../modules/public/grafana.nix
../../modules/public/telegraf.nix
../../modules/public/hashicorp-envoy.nix
../../modules/hashicorp.nix
];
services.hashicorp-envoy.grafana = {
service = {
name = "grafana";
id = "grafana";
address = "10.64.99.2";
port = 3000;
connect.sidecar_service = {};
};
environment = {
"CONSUL_HTTP_ADDR" = "http://${secret.network.ips.blowhole.ip}:8500";
"CONSUL_GRPC_ADDR" = "http://${secret.network.ips.blowhole.ip}:8502";
"CONSUL_HTTP_TOKEN_FILE" = "/run/secrets/envoy-grafana.token";
};
address = "10.64.99.2:19000";
adminBind = "127.0.0.1:19100";
hotRestart = true;
};
services.postgresql = {
enable = true;
ensureDatabases = [
"grafana"
];
ensureUsers = [
{
name = "grafana";
ensurePermissions = {
"DATABASE grafana" = "ALL PRIVILEGES";
};
}
];
};
systemd.services.grafana = {
serviceConfig = {
Restart = "always";
RestartSec = "10s";
};
};
services.grafana-magic = {
settings = {
security = {
content_security_policy = true;
disable_gravatar = true;
data_source_proxy_whitelist = concatStringsSep " " [
"127.0.0.1:8086"
];
};
server = {
domain = "grafana.in.redalder.org";
};
system = {
http_addr = "127.0.0.1";
};
database = {
type = "postgres";
host = "/var/run/postgresql";
name = "grafana";
user = "grafana";
};
};
enable = true;
};
services.hashicorp-envoy.influx = {
service = {
name = "influx";
id = "influx";
address = "10.64.99.2";
port = 8086;
connect.sidecar_service = {};
};
environment = {
"CONSUL_HTTP_ADDR" = "http://${secret.network.ips.blowhole.ip}:8500";
"CONSUL_GRPC_ADDR" = "http://${secret.network.ips.blowhole.ip}:8502";
"CONSUL_HTTP_TOKEN_FILE" = "/run/secrets/envoy-grafana.token";
};
address = "10.64.99.2:19001";
adminBind = "127.0.0.1:19101";
hotRestart = true;
};
services.influxdb2 = {
enable = true;
settings = {
http-bind-address = "127.0.0.1:8086";
hardening-enabled = true;
reporting-disabled = true;
};
};
services.hashicorp-envoy.telegraf = {
service = {
name = "telegraf";
id = "telegraf";
address = "10.64.99.2";
port = 8087;
connect.sidecar_service = {};
};
environment = {
"CONSUL_HTTP_ADDR" = "http://${secret.network.ips.blowhole.ip}:8500";
"CONSUL_GRPC_ADDR" = "http://${secret.network.ips.blowhole.ip}:8502";
"CONSUL_HTTP_TOKEN_FILE" = "/run/secrets/envoy-grafana.token";
};
address = "10.64.99.2:19002";
adminBind = "127.0.0.1:19102";
hotRestart = true;
};
services.telegraf-magic = {
enable = true;
settings = {
inputs.influxdb_v2_listener = {
service_address = "127.0.0.1:8087";
bucket_tag = "bucket";
parser_type = "upstream";
};
inputs.systemd_units = {
unittype = "service";
tags = {
host = "blowhole#monitoring";
};
};
outputs.influxdb_v2 = [
# {
# urls = [ "http://127.0.0.1:8086" ];
# token = "\${INFLUXDB_TOKEN}";
# organization = "redalder";
# bucket = "logs";
# tagdrop = [ "bucket" ];
# tagpass = {
# bucket = "logs";
# };
# }
{
urls = [ "http://127.0.0.1:8086" ];
token = "\${INFLUXDB_TOKEN}";
organization = "redalder";
bucket = "telegraf";
tagdrop = [ "bucket" ];
# tagpass = {
# bucket = "telegraf";
# };
}
];
};
systemd.serviceConfig = {
EnvironmentFile = "/run/secrets/telegraf.env";
};
};
};
};
}