Add Thanos to monitoring. just for historical data storage currently, alerting still happens the old fashion way
This commit is contained in:
parent
91169b00d9
commit
5e34870065
9 changed files with 372 additions and 8 deletions
|
@ -6,10 +6,13 @@ resources:
|
|||
- ingresses.yaml
|
||||
- secrets.yaml
|
||||
- grafana-database.yaml
|
||||
- thanos.yaml
|
||||
images:
|
||||
- name: quay.io/thanos/thanos
|
||||
newTag: v0.36.0
|
||||
secretGenerator:
|
||||
- name: additional-scrape-configs
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
files:
|
||||
- prom-scrape-configs/scrape-configs.yaml
|
||||
|
||||
|
|
306
k8s/monitoring/thanos.yaml
Normal file
306
k8s/monitoring/thanos.yaml
Normal file
|
@ -0,0 +1,306 @@
|
|||
# This file contains all the components for Thanos that aren't configured by kube-prometheus
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: thanos-querier
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-querier
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-querier
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9090"
|
||||
spec:
|
||||
containers:
|
||||
- name: thanos-querier
|
||||
image: quay.io/thanos/thanos:latest
|
||||
args:
|
||||
- query
|
||||
- --http-address
|
||||
- 0.0.0.0:9090
|
||||
- --endpoint
|
||||
- dnssrv+_grpc._tcp.prometheus-k8s-headless.monitoring.svc
|
||||
- --endpoint
|
||||
- dnssrv+_grpc._tcp.thanos-store.monitoring.svc
|
||||
resources:
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: thanos-querier
|
||||
spec:
|
||||
selector:
|
||||
app: thanos-querier
|
||||
ports:
|
||||
- port: 9090
|
||||
targetPort: 9090
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-k8s-headless
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: grpc
|
||||
port: 10901
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-store
|
||||
name: thanos-store
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-store
|
||||
serviceName: thanos-store
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-store
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "10902"
|
||||
spec:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- thanos-store
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- thanos-store
|
||||
namespaces:
|
||||
- monitoring
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
containers:
|
||||
- args:
|
||||
- store
|
||||
- --log.level=info
|
||||
- --log.format=logfmt
|
||||
- --data-dir=/var/thanos/store
|
||||
- --grpc-address=0.0.0.0:10901
|
||||
- --http-address=0.0.0.0:10902
|
||||
- --objstore.config=$(OBJSTORE_CONFIG)
|
||||
- --ignore-deletion-marks-delay=24h
|
||||
env:
|
||||
- name: OBJSTORE_CONFIG
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: thanos.yaml
|
||||
name: thanos-objstore
|
||||
- name: HOST_IP_ADDRESS
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
image: quay.io/thanos/thanos:latest
|
||||
livenessProbe:
|
||||
failureThreshold: 8
|
||||
httpGet:
|
||||
path: /-/healthy
|
||||
port: 10902
|
||||
scheme: HTTP
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 1
|
||||
name: thanos-store
|
||||
ports:
|
||||
- containerPort: 10901
|
||||
name: grpc
|
||||
- containerPort: 10902
|
||||
name: http
|
||||
readinessProbe:
|
||||
failureThreshold: 20
|
||||
httpGet:
|
||||
path: /-/ready
|
||||
port: 10902
|
||||
scheme: HTTP
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePolicy: FallbackToLogsOnError
|
||||
volumeMounts:
|
||||
- mountPath: /var/thanos/store
|
||||
name: data
|
||||
readOnly: false
|
||||
securityContext:
|
||||
fsGroup: 65534
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationGracePeriodSeconds: 120
|
||||
volumes: []
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
labels:
|
||||
app: thanos-store
|
||||
name: data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: thanos-store
|
||||
spec:
|
||||
selector:
|
||||
app: thanos-store
|
||||
ports:
|
||||
- name: grpc
|
||||
port: 10901
|
||||
- name: http
|
||||
port: 10902
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-compact
|
||||
name: thanos-compact
|
||||
namespace: thanos
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-compact
|
||||
serviceName: thanos-compact
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-compact
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "10902"
|
||||
spec:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- thanos-compact
|
||||
- key: app.kubernetes.io/instance
|
||||
operator: In
|
||||
values:
|
||||
- thanos-compact
|
||||
namespaces:
|
||||
- thanos
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
containers:
|
||||
- args:
|
||||
- compact
|
||||
- --wait
|
||||
- --log.level=info
|
||||
- --log.format=logfmt
|
||||
- --objstore.config=$(OBJSTORE_CONFIG)
|
||||
- --data-dir=/var/thanos/compact
|
||||
- --retention.resolution-1h=365d
|
||||
env:
|
||||
- name: OBJSTORE_CONFIG
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: thanos.yaml
|
||||
name: thanos-objstore
|
||||
- name: HOST_IP_ADDRESS
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
image: quay.io/thanos/thanos:v0.30.2
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 4
|
||||
httpGet:
|
||||
path: /-/healthy
|
||||
port: 10902
|
||||
scheme: HTTP
|
||||
periodSeconds: 30
|
||||
name: thanos-compact
|
||||
ports:
|
||||
- containerPort: 10902
|
||||
name: http
|
||||
readinessProbe:
|
||||
failureThreshold: 20
|
||||
httpGet:
|
||||
path: /-/ready
|
||||
port: 10902
|
||||
scheme: HTTP
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 500Mi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 100Mi
|
||||
terminationMessagePolicy: FallbackToLogsOnError
|
||||
volumeMounts:
|
||||
- mountPath: /var/thanos/compact
|
||||
name: data
|
||||
readOnly: false
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
fsGroup: 65534
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationGracePeriodSeconds: 120
|
||||
volumes: []
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
|
@ -1,3 +1,14 @@
|
|||
- op: add
|
||||
path: /spec/externalUrl
|
||||
value: https://alertmanager.k8s.home.finn.io
|
||||
- op: replace
|
||||
path: /spec/resources/requests/memory
|
||||
value: 40Mi
|
||||
- op: add
|
||||
path: /spec/storage
|
||||
value:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
"name": "prometheus",
|
||||
"orgId": 1,
|
||||
"type": "prometheus",
|
||||
"url": "http://prometheus-k8s.monitoring.svc:9090",
|
||||
"version": 1
|
||||
"url": "http://thanos-querier.monitoring.svc:9090",
|
||||
"version": 2
|
||||
},
|
||||
{
|
||||
"access": "proxy",
|
||||
|
|
|
@ -14,11 +14,11 @@ patches:
|
|||
- path: network-policies-patch.yaml
|
||||
target:
|
||||
kind: NetworkPolicy
|
||||
name: prometheus-k8s
|
||||
- path: network-policies-patch.yaml
|
||||
name: alertmanager-main
|
||||
- path: prometheus-network-policy-patch.yaml
|
||||
target:
|
||||
kind: NetworkPolicy
|
||||
name: alertmanager-main
|
||||
name: prometheus-k8s
|
||||
- path: prometheus-patch.yaml
|
||||
target:
|
||||
kind: Prometheus
|
||||
|
@ -43,6 +43,10 @@ patches:
|
|||
target:
|
||||
kind: PrometheusRule
|
||||
name: node-exporter-rules
|
||||
- path: prometheus-operator-deployment-patches.yaml
|
||||
target:
|
||||
kind: Deployment
|
||||
name: prometheus-operator
|
||||
secretGenerator:
|
||||
- name: grafana-config
|
||||
namespace: monitoring
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
- op: add
|
||||
path: /spec/ingress/0/from/-
|
||||
value:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: traefik
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
- op: add
|
||||
path: /spec/ingress/-
|
||||
value:
|
||||
from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app: thanos-querier
|
||||
namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
ports:
|
||||
- port: 10901
|
||||
protocol: TCP
|
|
@ -0,0 +1,3 @@
|
|||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --config-reloader-cpu-limit=500m
|
|
@ -10,3 +10,18 @@
|
|||
- op: add
|
||||
path: /spec/externalUrl
|
||||
value: https://prometheus.k8s.home.finn.io
|
||||
- op: add
|
||||
path: /spec/thanos
|
||||
value:
|
||||
image: quay.io/thanos/thanos:v0.36.0
|
||||
objectStorageConfig:
|
||||
key: thanos.yaml
|
||||
name: thanos-objstore
|
||||
- op: add
|
||||
path: /spec/storage
|
||||
value:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
|
|
|
@ -52,9 +52,9 @@ spec:
|
|||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: DoesNotExist
|
||||
tolerations:
|
||||
- key: "rtlsdr"
|
||||
- key: rtlsdr
|
||||
value: "true"
|
||||
effect: "NoSchedule"
|
||||
effect: NoSchedule
|
||||
prepare:
|
||||
args:
|
||||
- prepare
|
||||
|
|
Loading…
Reference in a new issue