alertmanager matrix receiver
This commit is contained in:
parent
ba35dc9b32
commit
8d7ab62a3a
8 changed files with 310 additions and 0 deletions
43
k8s/monitoring/alertmanager-config/alertmanager.yaml
Normal file
43
k8s/monitoring/alertmanager-config/alertmanager.yaml
Normal file
|
@ -0,0 +1,43 @@
|
|||
global:
|
||||
resolve_timeout: 5m
|
||||
inhibit_rules:
|
||||
- equal:
|
||||
- namespace
|
||||
- alertname
|
||||
source_matchers:
|
||||
- severity = critical
|
||||
target_matchers:
|
||||
- severity =~ warning|info
|
||||
- equal:
|
||||
- namespace
|
||||
- alertname
|
||||
source_matchers:
|
||||
- severity = warning
|
||||
target_matchers:
|
||||
- severity = info
|
||||
- equal:
|
||||
- namespace
|
||||
source_matchers:
|
||||
- alertname = InfoInhibitor
|
||||
target_matchers:
|
||||
- severity = info
|
||||
receivers:
|
||||
- name: default
|
||||
webhook_configs:
|
||||
- url: http://matrix-alertmanager-receiver:8080/alerts/alerts
|
||||
- name: watchdog
|
||||
- name: "null"
|
||||
route:
|
||||
group_by:
|
||||
- namespace
|
||||
group_interval: 5m
|
||||
group_wait: 30s
|
||||
receiver: default
|
||||
repeat_interval: 12h
|
||||
routes:
|
||||
- matchers:
|
||||
- alertname = Watchdog
|
||||
receiver: watchdog
|
||||
- matchers:
|
||||
- alertname = InfoInhibitor
|
||||
receiver: "null"
|
|
@ -8,6 +8,7 @@ resources:
|
|||
- grafana-database.yaml
|
||||
- thanos.yaml
|
||||
- alerts-longhorn.yaml
|
||||
- matrix-alertmanager-receiver.yaml
|
||||
images:
|
||||
- name: quay.io/thanos/thanos
|
||||
newTag: v0.36.0
|
||||
|
@ -17,3 +18,12 @@ secretGenerator:
|
|||
disableNameSuffixHash: true
|
||||
files:
|
||||
- prom-scrape-configs/scrape-configs.yaml
|
||||
- name: alertmanager-main
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
files:
|
||||
- alertmanager-config/alertmanager.yaml
|
||||
configMapGenerator:
|
||||
- name: matrix-alertmanager-receiver
|
||||
files:
|
||||
- matrix-alertmanager-receiver/config.yaml
|
||||
|
|
61
k8s/monitoring/matrix-alertmanager-receiver.yaml
Normal file
61
k8s/monitoring/matrix-alertmanager-receiver.yaml
Normal file
|
@ -0,0 +1,61 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: matrix-alertmanager-receiver
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: matrix-alertmanager-receiver
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: matrix-alertmanager-receiver
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
spec:
|
||||
initContainers:
|
||||
- name: secret-replacer
|
||||
image: debian:stable
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- "sed \"s#ACCESS_TOKEN#${ACCESS_TOKEN}#\" /config/config.yaml > /tempconfig/config.yaml"
|
||||
volumeMounts:
|
||||
- name: tempconfig
|
||||
mountPath: /tempconfig
|
||||
- name: config
|
||||
mountPath: /config
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: matrix-alertmanager-receiver
|
||||
containers:
|
||||
- name: matrix-alertmanager-receiver
|
||||
image: docker.io/metio/matrix-alertmanager-receiver:latest
|
||||
args: ["--config-path", "/config/config.yaml"]
|
||||
resources:
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
volumeMounts:
|
||||
- name: tempconfig
|
||||
mountPath: /config
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: matrix-alertmanager-receiver
|
||||
- name: tempconfig
|
||||
emptyDir: {}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: matrix-alertmanager-receiver
|
||||
spec:
|
||||
selector:
|
||||
app: matrix-alertmanager-receiver
|
||||
ports:
|
||||
- port: 8080
|
100
k8s/monitoring/matrix-alertmanager-receiver/config.yaml
Normal file
100
k8s/monitoring/matrix-alertmanager-receiver/config.yaml
Normal file
|
@ -0,0 +1,100 @@
|
|||
# configuration of the HTTP server
|
||||
http:
|
||||
address: 0.0.0.0 # bind address for this service. Can be left unspecified to bind on all interfaces
|
||||
port: 8080 # port used by this service
|
||||
alerts-path-prefix: /alerts # URL path for the webhook receiver called by an Alertmanager. Defaults to /alerts
|
||||
metrics-path: /metrics # URL path to collect metrics. Defaults to /metrics
|
||||
metrics-enabled: true # Whether to enable metrics or not. Defaults to false
|
||||
|
||||
# configuration for the Matrix connection
|
||||
matrix:
|
||||
homeserver-url: https://janky.bot # FQDN of the homeserver
|
||||
user-id: "@alerts:janky.bot" # ID of the user used by this service
|
||||
access-token: ACCESS_TOKEN # Access token for the user ID - will get replaced in an init container
|
||||
# define short names for Matrix room ID
|
||||
room-mapping:
|
||||
alerts: "!nBmQwxtIfjWqYGDqLb:janky.solutions"
|
||||
|
||||
# configuration of the templating features
|
||||
templating:
|
||||
# mapping of ExternalURL values
|
||||
external-url-mapping:
|
||||
# key is the original value taken from the Alertmanager payload
|
||||
# value is the mapped value which will be available as '.ExternalURL' in templates
|
||||
# "http://alertmanager:9093": https://alertmanager.example.com
|
||||
# mapping of GeneratorURL values
|
||||
generator-url-mapping:
|
||||
# key is the original value taken from the Alertmanager payload
|
||||
# value is the mapped value which will be available as '.GeneratorURL' in templates
|
||||
# "http://prometheus:8080": https://prometheus.example.com
|
||||
|
||||
# computation of arbitrary values based on matching alert annotations, labels, or status
|
||||
# values will be evaluated top to bottom, last entry wins
|
||||
computed-values:
|
||||
- values: # always set 'color' to 'yellow'
|
||||
color: yellow
|
||||
- values: # set 'color' to 'orange' when alert label 'severity' is 'warning'
|
||||
color: orange
|
||||
when-matching-labels:
|
||||
severity: warning
|
||||
- values: # set 'color' to 'red' when alert label 'severity' is 'critical'
|
||||
color: red
|
||||
when-matching-labels:
|
||||
severity: critical
|
||||
- values: # set 'color' to 'green' when alert status is 'resolved'
|
||||
color: green
|
||||
when-matching-status: resolved
|
||||
|
||||
# template for alerts in status 'firing'
|
||||
firing-template: '
|
||||
<p>
|
||||
<strong><font color="{{ .ComputedValues.color }}">{{ .Alert.Status | ToUpper }}</font></strong>
|
||||
{{ if .Alert.Labels.name }}
|
||||
{{ .Alert.Labels.name }}
|
||||
{{ else if .Alert.Labels.alertname }}
|
||||
{{ .Alert.Labels.alertname }}
|
||||
{{ end }}
|
||||
>>
|
||||
{{ if .Alert.Labels.severity }}
|
||||
{{ .Alert.Labels.severity | ToUpper }}:
|
||||
{{ end }}
|
||||
{{ if .Alert.Annotations.description }}
|
||||
{{ .Alert.Annotations.description }}
|
||||
{{ else if .Alert.Annotations.summary }}
|
||||
{{ .Alert.Annotations.summary }}
|
||||
{{ end }}
|
||||
>>
|
||||
{{ if .Alert.Annotations.runbook }}
|
||||
<a href="{{ .Alert.Annotations.runbook }}">Runbook</a> |
|
||||
{{ end }}
|
||||
{{ if .Alert.Annotations.dashboard }}
|
||||
<a href="{{ .Alert.Annotations.dashboard }}">Dashboard</a> |
|
||||
{{ end }}
|
||||
<a href="{{ .SilenceURL }}">Silence</a>
|
||||
</p>'
|
||||
|
||||
# template for alerts in status 'resolved', if not specified will use the firing-template
|
||||
resolved-template: '
|
||||
<p>
|
||||
<strong><font color="{{ .ComputedValues.color }}">{{ .Alert.Status | ToUpper }}</font></strong>
|
||||
{{ if .Alert.Labels.name }}
|
||||
{{ .Alert.Labels.name }}
|
||||
{{ else if .Alert.Labels.alertname }}
|
||||
{{ .Alert.Labels.alertname }}
|
||||
{{ end }}
|
||||
>>
|
||||
{{ if .Alert.Labels.severity }}
|
||||
{{ .Alert.Labels.severity | ToUpper }}:
|
||||
{{ end }}
|
||||
{{ if .Alert.Annotations.description }}
|
||||
{{ .Alert.Annotations.description }}
|
||||
{{ else if .Alert.Annotations.summary }}
|
||||
{{ .Alert.Annotations.summary }}
|
||||
{{ end }}
|
||||
{{ if .Alert.Annotations.runbook }}
|
||||
<a href="{{ .Alert.Annotations.runbook }}">Runbook</a> |
|
||||
{{ end }}
|
||||
{{ if .Alert.Annotations.dashboard }}
|
||||
<a href="{{ .Alert.Annotations.dashboard }}">Dashboard</a> |
|
||||
{{ end }}
|
||||
</p>'
|
|
@ -0,0 +1,5 @@
|
|||
$patch: delete
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: alertmanager-main
|
5
k8s/operators/kube-prometheus/apiregistration-patch.yaml
Normal file
5
k8s/operators/kube-prometheus/apiregistration-patch.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
$patch: delete
|
||||
apiVersion: apiregistration.k8s.io/v1
|
||||
kind: APIService
|
||||
metadata:
|
||||
name: v1beta1.metrics.k8s.io
|
|
@ -59,6 +59,15 @@ patches:
|
|||
target:
|
||||
kind: Deployment
|
||||
name: kube-state-metrics
|
||||
- path: alertmanager-main-secret-patch.yaml
|
||||
target:
|
||||
kind: Secret
|
||||
name: alertmanager-main
|
||||
- path: node-exporter-prometheus-rule-patches.yaml
|
||||
target:
|
||||
kind: PrometheusRule
|
||||
name: node-exporter-rules
|
||||
- path: apiregistration-patch.yaml
|
||||
secretGenerator:
|
||||
- name: grafana-config
|
||||
namespace: monitoring
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
# Set the dashboard for all node-exporter alerts to the Node Exporter Full dashboard
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/0/annotations/dashboard # NodeFilesystemSpaceFillingUp
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/1/annotations/dashboard # NodeFilesystemSpaceFillingUp
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/2/annotations/dashboard # NodeFilesystemAlmostOutOfSpace
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/3/annotations/dashboard # NodeFilesystemAlmostOutOfSpace
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/4/annotations/dashboard # NodeFilesystemFilesFillingUp
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/5/annotations/dashboard # NodeFilesystemFilesFillingUp
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/6/annotations/dashboard # NodeFilesystemAlmostOutOfFiles
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/7/annotations/dashboard # NodeFilesystemAlmostOutOfFiles
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/8/annotations/dashboard # NodeNetworkReceiveErrs
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/9/annotations/dashboard # NodeNetworkTransmitErrs
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/10/annotations/dashboard # NodeHighNumberConntrackEntriesUsed
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/11/annotations/dashboard # NodeTextFileCollectorScrapeError
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/12/annotations/dashboard # NodeClockSkewDetected
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/13/annotations/dashboard # NodeClockNotSynchronising
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/14/annotations/dashboard # NodeRAIDDegraded
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/15/annotations/dashboard # NodeRAIDDiskFailure
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/16/annotations/dashboard # NodeFileDescriptorLimit
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/17/annotations/dashboard # NodeFileDescriptorLimit
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/18/annotations/dashboard # NodeCPUHighUsage
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/19/annotations/dashboard # NodeSystemSaturation
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/20/annotations/dashboard # NodeMemoryMajorPagesFaults
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/21/annotations/dashboard # NodeMemoryHighUtilization
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/22/annotations/dashboard # NodeDiskIOSaturation
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
- op: add
|
||||
path: /spec/groups/0/rules/23/annotations/dashboard # NodeSystemdServiceFailed
|
||||
value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
||||
# unclear why this one doesn't want to patch, leaving it out for now
|
||||
# - op: add
|
||||
# path: /spec/groups/0/rules/24/annotations/dashboard # NodeBondingDegraded
|
||||
# value: https://grafana.home.finn.io/d/rYdddlPWk/node-exporter-full?var-node={{ $labels.instance }}
|
Loading…
Reference in a new issue