alertmanager matrix receiver
This commit is contained in:
parent
ba35dc9b32
commit
8d7ab62a3a
8 changed files with 310 additions and 0 deletions
43
k8s/monitoring/alertmanager-config/alertmanager.yaml
Normal file
43
k8s/monitoring/alertmanager-config/alertmanager.yaml
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
global:
|
||||||
|
resolve_timeout: 5m
|
||||||
|
inhibit_rules:
|
||||||
|
- equal:
|
||||||
|
- namespace
|
||||||
|
- alertname
|
||||||
|
source_matchers:
|
||||||
|
- severity = critical
|
||||||
|
target_matchers:
|
||||||
|
- severity =~ warning|info
|
||||||
|
- equal:
|
||||||
|
- namespace
|
||||||
|
- alertname
|
||||||
|
source_matchers:
|
||||||
|
- severity = warning
|
||||||
|
target_matchers:
|
||||||
|
- severity = info
|
||||||
|
- equal:
|
||||||
|
- namespace
|
||||||
|
source_matchers:
|
||||||
|
- alertname = InfoInhibitor
|
||||||
|
target_matchers:
|
||||||
|
- severity = info
|
||||||
|
receivers:
|
||||||
|
- name: default
|
||||||
|
webhook_configs:
|
||||||
|
- url: http://matrix-alertmanager-receiver:8080/alerts/alerts
|
||||||
|
- name: watchdog
|
||||||
|
- name: "null"
|
||||||
|
route:
|
||||||
|
group_by:
|
||||||
|
- namespace
|
||||||
|
group_interval: 5m
|
||||||
|
group_wait: 30s
|
||||||
|
receiver: default
|
||||||
|
repeat_interval: 12h
|
||||||
|
routes:
|
||||||
|
- matchers:
|
||||||
|
- alertname = Watchdog
|
||||||
|
receiver: watchdog
|
||||||
|
- matchers:
|
||||||
|
- alertname = InfoInhibitor
|
||||||
|
receiver: "null"
|
|
@ -8,6 +8,7 @@ resources:
|
||||||
- grafana-database.yaml
|
- grafana-database.yaml
|
||||||
- thanos.yaml
|
- thanos.yaml
|
||||||
- alerts-longhorn.yaml
|
- alerts-longhorn.yaml
|
||||||
|
- matrix-alertmanager-receiver.yaml
|
||||||
images:
|
images:
|
||||||
- name: quay.io/thanos/thanos
|
- name: quay.io/thanos/thanos
|
||||||
newTag: v0.36.0
|
newTag: v0.36.0
|
||||||
|
@ -17,3 +18,12 @@ secretGenerator:
|
||||||
disableNameSuffixHash: true
|
disableNameSuffixHash: true
|
||||||
files:
|
files:
|
||||||
- prom-scrape-configs/scrape-configs.yaml
|
- prom-scrape-configs/scrape-configs.yaml
|
||||||
|
- name: alertmanager-main
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
files:
|
||||||
|
- alertmanager-config/alertmanager.yaml
|
||||||
|
configMapGenerator:
|
||||||
|
- name: matrix-alertmanager-receiver
|
||||||
|
files:
|
||||||
|
- matrix-alertmanager-receiver/config.yaml
|
||||||
|
|
61
k8s/monitoring/matrix-alertmanager-receiver.yaml
Normal file
61
k8s/monitoring/matrix-alertmanager-receiver.yaml
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: matrix-alertmanager-receiver
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: matrix-alertmanager-receiver
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: matrix-alertmanager-receiver
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
- name: secret-replacer
|
||||||
|
image: debian:stable
|
||||||
|
command:
|
||||||
|
- bash
|
||||||
|
- -c
|
||||||
|
- "sed \"s#ACCESS_TOKEN#${ACCESS_TOKEN}#\" /config/config.yaml > /tempconfig/config.yaml"
|
||||||
|
volumeMounts:
|
||||||
|
- name: tempconfig
|
||||||
|
mountPath: /tempconfig
|
||||||
|
- name: config
|
||||||
|
mountPath: /config
|
||||||
|
envFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: matrix-alertmanager-receiver
|
||||||
|
containers:
|
||||||
|
- name: matrix-alertmanager-receiver
|
||||||
|
image: docker.io/metio/matrix-alertmanager-receiver:latest
|
||||||
|
args: ["--config-path", "/config/config.yaml"]
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
volumeMounts:
|
||||||
|
- name: tempconfig
|
||||||
|
mountPath: /config
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: matrix-alertmanager-receiver
|
||||||
|
- name: tempconfig
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: matrix-alertmanager-receiver
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: matrix-alertmanager-receiver
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
100
k8s/monitoring/matrix-alertmanager-receiver/config.yaml
Normal file
100
k8s/monitoring/matrix-alertmanager-receiver/config.yaml
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
# configuration of the HTTP server
|
||||||
|
http:
|
||||||
|
address: 0.0.0.0 # bind address for this service. Can be left unspecified to bind on all interfaces
|
||||||
|
port: 8080 # port used by this service
|
||||||
|
alerts-path-prefix: /alerts # URL path for the webhook receiver called by an Alertmanager. Defaults to /alerts
|
||||||
|
metrics-path: /metrics # URL path to collect metrics. Defaults to /metrics
|
||||||
|
metrics-enabled: true # Whether to enable metrics or not. Defaults to false
|
||||||
|
|
||||||
|
# configuration for the Matrix connection
|
||||||
|
matrix:
|
||||||
|
homeserver-url: https://janky.bot # FQDN of the homeserver
|
||||||
|
user-id: "@alerts:janky.bot" # ID of the user used by this service
|
||||||
|
access-token: ACCESS_TOKEN # Access token for the user ID - will get replaced in an init container
|
||||||
|
# define short names for Matrix room ID
|
||||||
|
room-mapping:
|
||||||
|
alerts: "!nBmQwxtIfjWqYGDqLb:janky.solutions"
|
||||||
|
|
||||||
|
# configuration of the templating features
|
||||||
|
templating:
|
||||||
|
# mapping of ExternalURL values
|
||||||
|
external-url-mapping:
|
||||||
|
# key is the original value taken from the Alertmanager payload
|
||||||
|
# value is the mapped value which will be available as '.ExternalURL' in templates
|
||||||
|
# "http://alertmanager:9093": https://alertmanager.example.com
|
||||||
|
# mapping of GeneratorURL values
|
||||||
|
generator-url-mapping:
|
||||||
|
# key is the original value taken from the Alertmanager payload
|
||||||
|
# value is the mapped value which will be available as '.GeneratorURL' in templates
|
||||||
|
# "http://prometheus:8080": https://prometheus.example.com
|
||||||
|
|
||||||
|
# computation of arbitrary values based on matching alert annotations, labels, or status
|
||||||
|
# values will be evaluated top to bottom, last entry wins
|
||||||
|
computed-values:
|
||||||
|
- values: # always set 'color' to 'yellow'
|
||||||
|
color: yellow
|
||||||
|
- values: # set 'color' to 'orange' when alert label 'severity' is 'warning'
|
||||||
|
color: orange
|
||||||
|
when-matching-labels:
|
||||||
|
severity: warning
|
||||||
|
- values: # set 'color' to 'red' when alert label 'severity' is 'critical'
|
||||||
|
color: red
|
||||||
|
when-matching-labels:
|
||||||
|
severity: critical
|
||||||
|
- values: # set 'color' to 'green' when alert status is 'resolved'
|
||||||
|
color: green
|
||||||
|
when-matching-status: resolved
|
||||||
|
|
||||||
|
# template for alerts in status 'firing'
|
||||||
|
firing-template: '
|
||||||
|
<p>
|
||||||
|
<strong><font color="{{ .ComputedValues.color }}">{{ .Alert.Status | ToUpper }}</font></strong>
|
||||||
|
{{ if .Alert.Labels.name }}
|
||||||
|
{{ .Alert.Labels.name }}
|
||||||
|
{{ else if .Alert.Labels.alertname }}
|
||||||
|
{{ .Alert.Labels.alertname }}
|
||||||
|
{{ end }}
|
||||||
|
>>
|
||||||
|
{{ if .Alert.Labels.severity }}
|
||||||
|
{{ .Alert.Labels.severity | ToUpper }}:
|
||||||
|
{{ end }}
|
||||||
|
{{ if .Alert.Annotations.description }}
|
||||||
|
{{ .Alert.Annotations.description }}
|
||||||
|
{{ else if .Alert.Annotations.summary }}
|
||||||
|
{{ .Alert.Annotations.summary }}
|
||||||
|
{{ end }}
|
||||||
|
>>
|
||||||
|
{{ if .Alert.Annotations.runbook }}
|
||||||
|
<a href="{{ .Alert.Annotations.runbook }}">Runbook</a> |
|
||||||
|
{{ end }}
|
||||||
|
{{ if .Alert.Annotations.dashboard }}
|
||||||
|
<a href="{{ .Alert.Annotations.dashboard }}">Dashboard</a> |
|
||||||
|
{{ end }}
|
||||||
|
<a href="{{ .SilenceURL }}">Silence</a>
|
||||||
|
</p>'
|
||||||
|
|
||||||
|
# template for alerts in status 'resolved', if not specified will use the firing-template
|
||||||
|
resolved-template: '
|
||||||
|
<p>
|
||||||
|
<strong><font color="{{ .ComputedValues.color }}">{{ .Alert.Status | ToUpper }}</font></strong>
|
||||||
|
{{ if .Alert.Labels.name }}
|
||||||
|
{{ .Alert.Labels.name }}
|
||||||
|
{{ else if .Alert.Labels.alertname }}
|
||||||
|
{{ .Alert.Labels.alertname }}
|
||||||
|
{{ end }}
|
||||||
|
>>
|
||||||
|
{{ if .Alert.Labels.severity }}
|
||||||
|
{{ .Alert.Labels.severity | ToUpper }}:
|
||||||
|
{{ end }}
|
||||||
|
{{ if .Alert.Annotations.description }}
|
||||||
|
{{ .Alert.Annotations.description }}
|
||||||
|
{{ else if .Alert.Annotations.summary }}
|
||||||
|
{{ .Alert.Annotations.summary }}
|
||||||
|
{{ end }}
|
||||||
|
{{ if .Alert.Annotations.runbook }}
|
||||||
|
<a href="{{ .Alert.Annotations.runbook }}">Runbook</a> |
|
||||||
|
{{ end }}
|
||||||
|
{{ if .Alert.Annotations.dashboard }}
|
||||||
|
<a href="{{ .Alert.Annotations.dashboard }}">Dashboard</a> |
|
||||||
|
{{ end }}
|
||||||
|
</p>'
|
|
@ -0,0 +1,5 @@
|
||||||
|
$patch: delete
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: alertmanager-main
|
5
k8s/operators/kube-prometheus/apiregistration-patch.yaml
Normal file
5
k8s/operators/kube-prometheus/apiregistration-patch.yaml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
$patch: delete
|
||||||
|
apiVersion: apiregistration.k8s.io/v1
|
||||||
|
kind: APIService
|
||||||
|
metadata:
|
||||||
|
name: v1beta1.metrics.k8s.io
|
|
@ -59,6 +59,15 @@ patches:
|
||||||
target:
|
target:
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
|
- path: alertmanager-main-secret-patch.yaml
|
||||||
|
target:
|
||||||
|
kind: Secret
|
||||||
|
name: alertmanager-main
|
||||||
|
- path: node-exporter-prometheus-rule-patches.yaml
|
||||||
|
target:
|
||||||
|
kind: PrometheusRule
|
||||||
|
name: node-exporter-rules
|
||||||
|
- path: apiregistration-patch.yaml
|
||||||
secretGenerator:
|
secretGenerator:
|
||||||
- name: grafana-config
|
- name: grafana-config
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
# Set the dashboard for all node-exporter alerts to the Node Exporter Full dashboard
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/0/annotations/dashboard # NodeFilesystemSpaceFillingUp
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/1/annotations/dashboard # NodeFilesystemSpaceFillingUp
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/2/annotations/dashboard # NodeFilesystemAlmostOutOfSpace
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/3/annotations/dashboard # NodeFilesystemAlmostOutOfSpace
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/4/annotations/dashboard # NodeFilesystemFilesFillingUp
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/5/annotations/dashboard # NodeFilesystemFilesFillingUp
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/6/annotations/dashboard # NodeFilesystemAlmostOutOfFiles
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/7/annotations/dashboard # NodeFilesystemAlmostOutOfFiles
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/8/annotations/dashboard # NodeNetworkReceiveErrs
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/9/annotations/dashboard # NodeNetworkTransmitErrs
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/10/annotations/dashboard # NodeHighNumberConntrackEntriesUsed
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/11/annotations/dashboard # NodeTextFileCollectorScrapeError
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/12/annotations/dashboard # NodeClockSkewDetected
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/13/annotations/dashboard # NodeClockNotSynchronising
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/14/annotations/dashboard # NodeRAIDDegraded
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/15/annotations/dashboard # NodeRAIDDiskFailure
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/16/annotations/dashboard # NodeFileDescriptorLimit
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/17/annotations/dashboard # NodeFileDescriptorLimit
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/18/annotations/dashboard # NodeCPUHighUsage
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/19/annotations/dashboard # NodeSystemSaturation
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/20/annotations/dashboard # NodeMemoryMajorPagesFaults
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/21/annotations/dashboard # NodeMemoryHighUtilization
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/22/annotations/dashboard # NodeDiskIOSaturation
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
- op: add
|
||||||
|
path: /spec/groups/0/rules/23/annotations/dashboard # NodeSystemdServiceFailed
|
||||||
|
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||||
|
# unclear why this one doesn't want to patch, leaving it out for now
|
||||||
|
# - op: add
|
||||||
|
# path: /spec/groups/0/rules/24/annotations/dashboard # NodeBondingDegraded
|
||||||
|
# value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
Loading…
Reference in a new issue