Add Thanos to monitoring. just for historical data storage currently, alerting still happens the old fashion way

This commit is contained in:
Finn 2024-08-14 17:00:56 -07:00
parent 91169b00d9
commit 5e34870065
9 changed files with 372 additions and 8 deletions

View file

@ -6,10 +6,13 @@ resources:
- ingresses.yaml - ingresses.yaml
- secrets.yaml - secrets.yaml
- grafana-database.yaml - grafana-database.yaml
- thanos.yaml
images:
- name: quay.io/thanos/thanos
newTag: v0.36.0
secretGenerator: secretGenerator:
- name: additional-scrape-configs - name: additional-scrape-configs
options: options:
disableNameSuffixHash: true disableNameSuffixHash: true
files: files:
- prom-scrape-configs/scrape-configs.yaml - prom-scrape-configs/scrape-configs.yaml

306
k8s/monitoring/thanos.yaml Normal file
View file

@ -0,0 +1,306 @@
# This file contains all the components for Thanos that aren't configured by kube-prometheus
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: thanos-querier
spec:
selector:
matchLabels:
app: thanos-querier
template:
metadata:
labels:
app: thanos-querier
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
spec:
containers:
- name: thanos-querier
image: quay.io/thanos/thanos:latest
args:
- query
- --http-address
- 0.0.0.0:9090
- --endpoint
- dnssrv+_grpc._tcp.prometheus-k8s-headless.monitoring.svc
- --endpoint
- dnssrv+_grpc._tcp.thanos-store.monitoring.svc
resources:
limits:
memory: "128Mi"
cpu: "500m"
ports:
- containerPort: 9090
---
apiVersion: v1
kind: Service
metadata:
name: thanos-querier
spec:
selector:
app: thanos-querier
ports:
- port: 9090
targetPort: 9090
---
apiVersion: v1
kind: Service
metadata:
name: prometheus-k8s-headless
spec:
selector:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
clusterIP: None
ports:
- name: grpc
port: 10901
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app: thanos-store
name: thanos-store
spec:
replicas: 1
selector:
matchLabels:
app: thanos-store
serviceName: thanos-store
template:
metadata:
labels:
app: thanos-store
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "10902"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- thanos-store
- key: app.kubernetes.io/instance
operator: In
values:
- thanos-store
namespaces:
- monitoring
topologyKey: kubernetes.io/hostname
weight: 100
containers:
- args:
- store
- --log.level=info
- --log.format=logfmt
- --data-dir=/var/thanos/store
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --objstore.config=$(OBJSTORE_CONFIG)
- --ignore-deletion-marks-delay=24h
env:
- name: OBJSTORE_CONFIG
valueFrom:
secretKeyRef:
key: thanos.yaml
name: thanos-objstore
- name: HOST_IP_ADDRESS
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: quay.io/thanos/thanos:latest
livenessProbe:
failureThreshold: 8
httpGet:
path: /-/healthy
port: 10902
scheme: HTTP
periodSeconds: 30
timeoutSeconds: 1
name: thanos-store
ports:
- containerPort: 10901
name: grpc
- containerPort: 10902
name: http
readinessProbe:
failureThreshold: 20
httpGet:
path: /-/ready
port: 10902
scheme: HTTP
periodSeconds: 5
resources:
limits:
memory: "128Mi"
cpu: "500m"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: RuntimeDefault
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /var/thanos/store
name: data
readOnly: false
securityContext:
fsGroup: 65534
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: RuntimeDefault
terminationGracePeriodSeconds: 120
volumes: []
volumeClaimTemplates:
- metadata:
labels:
app: thanos-store
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: Service
metadata:
name: thanos-store
spec:
selector:
app: thanos-store
ports:
- name: grpc
port: 10901
- name: http
port: 10902
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app: thanos-compact
name: thanos-compact
namespace: thanos
spec:
replicas: 1
selector:
matchLabels:
app: thanos-compact
serviceName: thanos-compact
template:
metadata:
labels:
app: thanos-compact
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "10902"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- thanos-compact
- key: app.kubernetes.io/instance
operator: In
values:
- thanos-compact
namespaces:
- thanos
topologyKey: kubernetes.io/hostname
weight: 100
containers:
- args:
- compact
- --wait
- --log.level=info
- --log.format=logfmt
- --objstore.config=$(OBJSTORE_CONFIG)
- --data-dir=/var/thanos/compact
- --retention.resolution-1h=365d
env:
- name: OBJSTORE_CONFIG
valueFrom:
secretKeyRef:
key: thanos.yaml
name: thanos-objstore
- name: HOST_IP_ADDRESS
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: quay.io/thanos/thanos:v0.30.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 4
httpGet:
path: /-/healthy
port: 10902
scheme: HTTP
periodSeconds: 30
name: thanos-compact
ports:
- containerPort: 10902
name: http
readinessProbe:
failureThreshold: 20
httpGet:
path: /-/ready
port: 10902
scheme: HTTP
periodSeconds: 5
resources:
limits:
cpu: 500m
memory: 500Mi
requests:
cpu: 200m
memory: 100Mi
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /var/thanos/compact
name: data
readOnly: false
nodeSelector:
kubernetes.io/os: linux
securityContext:
fsGroup: 65534
runAsGroup: 65532
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: RuntimeDefault
terminationGracePeriodSeconds: 120
volumes: []
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi

View file

@ -1,3 +1,14 @@
- op: add - op: add
path: /spec/externalUrl path: /spec/externalUrl
value: https://alertmanager.k8s.home.finn.io value: https://alertmanager.k8s.home.finn.io
- op: replace
path: /spec/resources/requests/memory
value: 40Mi
- op: add
path: /spec/storage
value:
volumeClaimTemplate:
spec:
resources:
requests:
storage: 1Gi

View file

@ -7,8 +7,8 @@
"name": "prometheus", "name": "prometheus",
"orgId": 1, "orgId": 1,
"type": "prometheus", "type": "prometheus",
"url": "http://prometheus-k8s.monitoring.svc:9090", "url": "http://thanos-querier.monitoring.svc:9090",
"version": 1 "version": 2
}, },
{ {
"access": "proxy", "access": "proxy",

View file

@ -14,11 +14,11 @@ patches:
- path: network-policies-patch.yaml - path: network-policies-patch.yaml
target: target:
kind: NetworkPolicy kind: NetworkPolicy
name: prometheus-k8s name: alertmanager-main
- path: network-policies-patch.yaml - path: prometheus-network-policy-patch.yaml
target: target:
kind: NetworkPolicy kind: NetworkPolicy
name: alertmanager-main name: prometheus-k8s
- path: prometheus-patch.yaml - path: prometheus-patch.yaml
target: target:
kind: Prometheus kind: Prometheus
@ -43,6 +43,10 @@ patches:
target: target:
kind: PrometheusRule kind: PrometheusRule
name: node-exporter-rules name: node-exporter-rules
- path: prometheus-operator-deployment-patches.yaml
target:
kind: Deployment
name: prometheus-operator
secretGenerator: secretGenerator:
- name: grafana-config - name: grafana-config
namespace: monitoring namespace: monitoring

View file

@ -0,0 +1,22 @@
- op: add
path: /spec/ingress/0/from/-
value:
podSelector:
matchLabels:
app.kubernetes.io/name: traefik
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
- op: add
path: /spec/ingress/-
value:
from:
- podSelector:
matchLabels:
app: thanos-querier
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
ports:
- port: 10901
protocol: TCP

View file

@ -0,0 +1,3 @@
- op: add
path: /spec/template/spec/containers/0/args/-
value: --config-reloader-cpu-limit=500m

View file

@ -10,3 +10,18 @@
- op: add - op: add
path: /spec/externalUrl path: /spec/externalUrl
value: https://prometheus.k8s.home.finn.io value: https://prometheus.k8s.home.finn.io
- op: add
path: /spec/thanos
value:
image: quay.io/thanos/thanos:v0.36.0
objectStorageConfig:
key: thanos.yaml
name: thanos-objstore
- op: add
path: /spec/storage
value:
volumeClaimTemplate:
spec:
resources:
requests:
storage: 20Gi

View file

@ -52,9 +52,9 @@ spec:
- key: node-role.kubernetes.io/control-plane - key: node-role.kubernetes.io/control-plane
operator: DoesNotExist operator: DoesNotExist
tolerations: tolerations:
- key: "rtlsdr" - key: rtlsdr
value: "true" value: "true"
effect: "NoSchedule" effect: NoSchedule
prepare: prepare:
args: args:
- prepare - prepare