add monitoring to talos cluster
This commit is contained in:
parent
eabe62e578
commit
d4ea7e8ed5
14 changed files with 1168 additions and 0 deletions
|
@ -3,3 +3,12 @@ cluster:
|
|||
machine:
|
||||
install:
|
||||
disk: /dev/sde
|
||||
kubelet:
|
||||
extraMounts:
|
||||
- destination: /var/local-path-provisioner
|
||||
type: bind
|
||||
source: /var/local-path-provisioner
|
||||
options:
|
||||
- bind
|
||||
- rshared
|
||||
- rw
|
||||
|
|
|
@ -2,4 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
|||
kind: Kustomization
|
||||
resources:
|
||||
- operators
|
||||
- monitoring
|
||||
- rook
|
||||
|
|
356
talos/k8s/monitoring/kube-state-metrics.yaml
Normal file
356
talos/k8s/monitoring/kube-state-metrics.yaml
Normal file
|
@ -0,0 +1,356 @@
|
|||
# inspired by the various kubeStateMetrics-* files in https://github.com/prometheus-operator/kube-prometheus/tree/main/manifests
|
||||
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- serviceaccounts
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- autoscaling
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- policy
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- certificates.k8s.io
|
||||
resources:
|
||||
- certificatesigningrequests
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- discovery.k8s.io
|
||||
resources:
|
||||
- endpointslices
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
- ingressclasses
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- clusterrolebindings
|
||||
- clusterroles
|
||||
- rolebindings
|
||||
- roles
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/default-container: kube-state-metrics
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
spec:
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- --host=127.0.0.1
|
||||
- --port=8081
|
||||
- --telemetry-host=127.0.0.1
|
||||
- --telemetry-port=8082
|
||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0
|
||||
name: kube-state-metrics
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 250Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 190Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65534
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
- args:
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.18.2
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https-main
|
||||
resources:
|
||||
limits:
|
||||
cpu: 40m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 20m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
- args:
|
||||
- --secure-listen-address=:9443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.18.2
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-self
|
||||
resources:
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
serviceAccountName: kube-state-metrics
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
- port: 9443
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https-main
|
||||
port: 8443
|
||||
targetPort: https-main
|
||||
- name: https-self
|
||||
port: 9443
|
||||
targetPort: https-self
|
||||
selector:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
---
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kube_endpoint_address_not_ready|kube_endpoint_address_available
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https-main
|
||||
relabelings:
|
||||
- action: labeldrop
|
||||
regex: (pod|service|endpoint|namespace)
|
||||
scheme: https
|
||||
scrapeTimeout: 30s
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
port: https-self
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
8
talos/k8s/monitoring/kustomization.yaml
Normal file
8
talos/k8s/monitoring/kustomization.yaml
Normal file
|
@ -0,0 +1,8 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- kube-state-metrics.yaml
|
||||
- node-exporter.yaml
|
||||
- prom.yaml
|
||||
- servicemonitors.yaml
|
199
talos/k8s/monitoring/node-exporter.yaml
Normal file
199
talos/k8s/monitoring/node-exporter.yaml
Normal file
|
@ -0,0 +1,199 @@
|
|||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-exporter
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/default-container: node-exporter
|
||||
creationTimestamp: null
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.8.2
|
||||
spec:
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- --web.listen-address=0.0.0.0:9100
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --path.udev.data=/host/root/run/udev/data
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --no-collector.btrfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
- --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
|
||||
- --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
|
||||
image: quay.io/prometheus/node-exporter:v1.8.2
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: node-exporter
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: http
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 180Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_TIME
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /host/sys
|
||||
mountPropagation: HostToContainer
|
||||
name: sys
|
||||
readOnly: true
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
readOnly: true
|
||||
- args:
|
||||
- --secure-listen-address=[$(IP)]:9101
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:9100/
|
||||
env:
|
||||
- name: IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: status.podIP
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.18.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9101
|
||||
hostPort: 9101
|
||||
name: https
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
dnsPolicy: ClusterFirst
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
priorityClassName: system-cluster-critical
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext:
|
||||
runAsGroup: 65534
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
serviceAccount: node-exporter
|
||||
serviceAccountName: node-exporter
|
||||
terminationGracePeriodSeconds: 30
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /sys
|
||||
type: ""
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /
|
||||
type: ""
|
||||
name: root
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 0
|
||||
maxUnavailable: 10%
|
||||
type: RollingUpdate
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
name: node-exporter
|
||||
spec:
|
||||
clusterIP: None
|
||||
clusterIPs:
|
||||
- None
|
||||
internalTrafficPolicy: Cluster
|
||||
ipFamilies:
|
||||
- IPv4
|
||||
ipFamilyPolicy: SingleStack
|
||||
ports:
|
||||
- name: https
|
||||
port: 9100
|
||||
protocol: TCP
|
||||
targetPort: https
|
||||
selector:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-exporter
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: node-exporter
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: node-exporter
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: node-exporter
|
||||
rules:
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- authorization.k8s.io
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs:
|
||||
- create
|
111
talos/k8s/monitoring/prom.yaml
Normal file
111
talos/k8s/monitoring/prom.yaml
Normal file
|
@ -0,0 +1,111 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: talos-prom
|
||||
namespace: monitoring
|
||||
spec:
|
||||
enableFeatures: []
|
||||
evaluationInterval: 30s
|
||||
externalLabels:
|
||||
cluster: talos
|
||||
externalUrl: https://prometheus.k8s.home.finn.io
|
||||
image: quay.io/prometheus/prometheus:v2.54.0
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
podMetadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: talos-prom
|
||||
podMonitorNamespaceSelector: {}
|
||||
podMonitorSelector: {}
|
||||
portName: web
|
||||
probeNamespaceSelector: {}
|
||||
probeSelector: {}
|
||||
replicas: 2
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
retention: 24h
|
||||
ruleNamespaceSelector: {}
|
||||
ruleSelector: {}
|
||||
scrapeConfigNamespaceSelector: {}
|
||||
scrapeConfigSelector: {}
|
||||
scrapeInterval: 30s
|
||||
securityContext:
|
||||
fsGroup: 2000
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
serviceAccountName: prometheus-thanos
|
||||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector: {}
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
thanos:
|
||||
blockSize: 2h
|
||||
image: quay.io/thanos/thanos:v0.37.2
|
||||
objectStorageConfig:
|
||||
key: thanos.yaml
|
||||
name: thanos-objstore
|
||||
version: 2.54.0
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus-thanos
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus-self
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- nonResourceURLs:
|
||||
- /metrics
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus-self
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus-self
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-thanos
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-thanos-sidecar
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 10901
|
||||
targetPort: 10901
|
352
talos/k8s/monitoring/servicemonitors.yaml
Normal file
352
talos/k8s/monitoring/servicemonitors.yaml
Normal file
|
@ -0,0 +1,352 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: coredns
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 15s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: coredns_cache_misses_total
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: metrics
|
||||
jobLabel: app.kubernetes.io/name
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kube-apiserver
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers|storage_db_total_size_in_bytes)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(debugging|disk|server).*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_admission_controller_admission_latencies_seconds_.*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_admission_step_admission_latencies_seconds_.*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- le
|
||||
port: https
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
serverName: kubernetes
|
||||
jobLabel: component
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- default
|
||||
selector:
|
||||
matchLabels:
|
||||
component: apiserver
|
||||
provider: kubernetes
|
||||
---
|
||||
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kube-controller-manager
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers|storage_db_total_size_in_bytes)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(debugging|disk|request|server).*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https-metrics
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kube-controller-manager
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kube-scheduler
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
port: https-metrics
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kube-scheduler
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kubelet
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers|storage_db_total_size_in_bytes)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
honorTimestamps: false
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- pod
|
||||
- namespace
|
||||
- action: drop
|
||||
regex: (container_blkio_device_usage_total);.+
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- container
|
||||
path: /metrics/cadvisor
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
path: /metrics/probes
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
- action: replace
|
||||
sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kubelet
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: node-exporter
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 15s
|
||||
port: https
|
||||
relabelings:
|
||||
- action: replace
|
||||
regex: (.*)
|
||||
replacement: $1
|
||||
sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: instance
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: app.kubernetes.io/name
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-adapter
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-thanos
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
port: web
|
||||
- interval: 30s
|
||||
port: reloader-web
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/name: prometheus
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus-operator
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
port: https
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
|
@ -3,5 +3,7 @@ kind: Kustomization
|
|||
resources:
|
||||
- cert-manager
|
||||
- cert-manager-webhook-pdns
|
||||
- local-path-provisioner
|
||||
- prometheus-operator
|
||||
- rook
|
||||
- traefik
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
# based on https://www.talos.dev/v1.9/kubernetes-guides/configuration/local-storage/#local-path-provisioner
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- github.com/rancher/local-path-provisioner/deploy?ref=v0.0.31
|
||||
patches:
|
||||
- patch: |-
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: local-path-config
|
||||
namespace: local-path-storage
|
||||
data:
|
||||
config.json: |-
|
||||
{
|
||||
"nodePathMap":[
|
||||
{
|
||||
"node":"DEFAULT_PATH_FOR_NON_LISTED_NODES",
|
||||
"paths":["/var/local-path-provisioner"]
|
||||
}
|
||||
]
|
||||
}
|
||||
- patch: |-
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: local-path
|
||||
annotations:
|
||||
storageclass.kubernetes.io/is-default-class: "true"
|
||||
defaultVolumeType: local
|
||||
- patch: |-
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: local-path-storage
|
||||
labels:
|
||||
pod-security.kubernetes.io/enforce: privileged
|
|
@ -0,0 +1,6 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- https://github.com/prometheus-operator/prometheus-operator?ref=v0.79.1
|
6
talos/k8s/operators/prometheus-operator/namespace.yaml
Normal file
6
talos/k8s/operators/prometheus-operator/namespace.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: rook-ceph
|
||||
labels:
|
||||
pod-security.kubernetes.io/enforce: privileged
|
|
@ -13,3 +13,27 @@ metadata:
|
|||
spec:
|
||||
bucketName: dvn-backup
|
||||
storageClassName: rook-ceph-bucket
|
||||
---
|
||||
apiVersion: objectbucket.io/v1alpha1
|
||||
kind: ObjectBucketClaim
|
||||
metadata:
|
||||
name: media
|
||||
spec:
|
||||
bucketName: media
|
||||
storageClassName: rook-ceph-bucket
|
||||
---
|
||||
apiVersion: objectbucket.io/v1alpha1
|
||||
kind: ObjectBucketClaim
|
||||
metadata:
|
||||
name: janky.email-backups
|
||||
spec:
|
||||
bucketName: janky.email-backups
|
||||
storageClassName: rook-ceph-bucket
|
||||
---
|
||||
apiVersion: objectbucket.io/v1alpha1
|
||||
kind: ObjectBucketClaim
|
||||
metadata:
|
||||
name: loki
|
||||
spec:
|
||||
bucketName: loki
|
||||
storageClassName: rook-ceph-bucket
|
||||
|
|
|
@ -4,3 +4,4 @@ namespace: rook-ceph
|
|||
resources:
|
||||
- buckets.yaml
|
||||
- s3-pool.yaml
|
||||
- servicemonitor.yaml
|
||||
|
|
56
talos/k8s/rook/servicemonitor.yaml
Normal file
56
talos/k8s/rook/servicemonitor.yaml
Normal file
|
@ -0,0 +1,56 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: rook-ceph-exporter
|
||||
labels:
|
||||
team: rook
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- rook-ceph
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rook-ceph-exporter
|
||||
rook_cluster: rook-ceph
|
||||
endpoints:
|
||||
- port: ceph-exporter-http-metrics
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: rook-ceph-mgr
|
||||
labels:
|
||||
team: rook
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- rook-ceph
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rook-ceph-mgr
|
||||
rook_cluster: rook-ceph
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
honorLabels: true
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: csi-metrics
|
||||
labels:
|
||||
team: rook
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- rook-ceph
|
||||
selector:
|
||||
matchLabels:
|
||||
app: csi-metrics
|
||||
endpoints:
|
||||
- port: csi-http-metrics
|
||||
path: /metrics
|
||||
interval: 5s
|
Loading…
Add table
Reference in a new issue