From 869c92d710b0b21e4f5d313aeedaeedecfd9a4a3 Mon Sep 17 00:00:00 2001 From: Finn Date: Mon, 12 Aug 2024 18:00:24 -0700 Subject: [PATCH] add alertmanager/prometheus external URLs + patch some alerts that fail on rclonefs --- .../cert-manager/controller-patches.yaml | 2 +- .../kube-prometheus/alertmanager-patches.yaml | 3 ++ .../grafana-deployment-patch.yaml | 14 ------- .../kube-prometheus/kustomization.yaml | 8 ++++ .../node-exporter-rules-patches.yaml | 37 +++++++++++++++++++ .../kube-prometheus/prometheus-patch.yaml | 3 ++ 6 files changed, 52 insertions(+), 15 deletions(-) create mode 100644 k8s/operators/kube-prometheus/alertmanager-patches.yaml create mode 100644 k8s/operators/kube-prometheus/node-exporter-rules-patches.yaml diff --git a/k8s/operators/cert-manager/controller-patches.yaml b/k8s/operators/cert-manager/controller-patches.yaml index 33aaa33..c13ad31 100644 --- a/k8s/operators/cert-manager/controller-patches.yaml +++ b/k8s/operators/cert-manager/controller-patches.yaml @@ -3,4 +3,4 @@ value: --dns01-recursive-nameservers-only # adding this arg makes DNS-01 validation work, unclear why it doesnt work otherwise. - op: add path: /spec/template/spec/containers/0/args/- - value: --dns01-recursive-nameservers=1.1.1.1,8.8.8.8,8.8.4.4 + value: --dns01-recursive-nameservers=1.1.1.1:53,8.8.8.8:53,8.8.4.4:53 diff --git a/k8s/operators/kube-prometheus/alertmanager-patches.yaml b/k8s/operators/kube-prometheus/alertmanager-patches.yaml new file mode 100644 index 0000000..ef12b50 --- /dev/null +++ b/k8s/operators/kube-prometheus/alertmanager-patches.yaml @@ -0,0 +1,3 @@ +- op: add + path: /spec/externalUrl + value: https://alertmanager.k8s.home.finn.io diff --git a/k8s/operators/kube-prometheus/grafana-deployment-patch.yaml b/k8s/operators/kube-prometheus/grafana-deployment-patch.yaml index c76a302..270ae5a 100644 --- a/k8s/operators/kube-prometheus/grafana-deployment-patch.yaml +++ b/k8s/operators/kube-prometheus/grafana-deployment-patch.yaml @@ -16,17 +16,3 @@ secretKeyRef: name: grafana.grafana-database.credentials.postgresql.acid.zalan.do key: password -# - op: add -# path: /spec/template/spec/containers/0/volumeMounts/- -# value: -# mountPath: /grafana-dashboard-definitions/1 -# name: grafana-dashboards-custom -# - op: add -# path: /spec/template/spec/volumes/- -# value: -# name: grafana-dashboards-custom -# configMap: -# name: grafana-dashboards-custom -# - op: replace -# path: /spec/template/spec/automountServiceAccountToken -# value: true diff --git a/k8s/operators/kube-prometheus/kustomization.yaml b/k8s/operators/kube-prometheus/kustomization.yaml index 566b87f..5d965c3 100644 --- a/k8s/operators/kube-prometheus/kustomization.yaml +++ b/k8s/operators/kube-prometheus/kustomization.yaml @@ -23,6 +23,10 @@ patches: target: kind: Prometheus name: k8s + - path: alertmanager-patches.yaml + target: + kind: Alertmanager + name: main - path: prometheus-cluster-role-patch.yaml target: kind: ClusterRole @@ -35,6 +39,10 @@ patches: target: kind: PrometheusRule name: kubernetes-monitoring-rules + - path: node-exporter-rules-patches.yaml + target: + kind: PrometheusRule + name: node-exporter-rules secretGenerator: - name: grafana-config namespace: monitoring diff --git a/k8s/operators/kube-prometheus/node-exporter-rules-patches.yaml b/k8s/operators/kube-prometheus/node-exporter-rules-patches.yaml new file mode 100644 index 0000000..fd4f1be --- /dev/null +++ b/k8s/operators/kube-prometheus/node-exporter-rules-patches.yaml @@ -0,0 +1,37 @@ +# NodeFilesystemFilesFillingUp/NodeFilesystemAlmostOutOfFiles: ignore rclone filesystems +- op: replace + path: /spec/groups/0/rules/4/expr # NodeFilesystemFilesFillingUp warning + value: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} == 0 + ) +- op: replace + path: /spec/groups/0/rules/5/expr # NodeFilesystemFilesFillingUp critical + value: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} == 0 + ) +- op: replace + path: /spec/groups/0/rules/6/expr # NodeFilesystemAlmostOutOfFiles warning + value: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} == 0 + ) +- op: replace + path: /spec/groups/0/rules/7/expr # NodeFilesystemAlmostOutOfFiles critical + value: | + ( + node_filesystem_files_free{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",fstype!="fuse.rclone",mountpoint!=""} == 0 + ) diff --git a/k8s/operators/kube-prometheus/prometheus-patch.yaml b/k8s/operators/kube-prometheus/prometheus-patch.yaml index 79f1bc3..6308b18 100644 --- a/k8s/operators/kube-prometheus/prometheus-patch.yaml +++ b/k8s/operators/kube-prometheus/prometheus-patch.yaml @@ -7,3 +7,6 @@ - op: add path: /spec/secrets value: [scrape-secrets] +- op: add + path: /spec/externalUrl + value: https://prometheus.k8s.home.finn.io