add storage server

2025-01-09 21:39:20 -08:00 · 2025-01-09 21:39:20 -08:00 · eeba61f7e1
commit eeba61f7e1
parent aa00c87a5d
13 changed files with 17551 additions and 6 deletions
--- a/helm/render-all.sh
+++ b/helm/render-all.sh
@ -4,11 +4,21 @@ set -exuo pipefail
 cd -- "$( dirname -- "${BASH_SOURCE[0]}" )"

 header="# DO NOT EDIT: This file has been automatically generated by the script in helm/render-all.sh, edits may get overwritten"
-
-# operators
-for component in openbao external-secrets secrets-store-csi-driver; do
-    mkdir -p ../k8s/operators/${component}
-    echo "${header}" > ../k8s/operators/${component}/bundle.yaml
+render_helm() {
+    target="${1}"
+    component="${2}"
+    mkdir -p "${target}/${component}"
+    echo "${header}" > "${target}/${component}/bundle.yaml"
    rm -rf "${component}/charts" # it doesn't seem to update them otherwise
-    kubectl kustomize --enable-helm ${component}/ >> ../k8s/operators/${component}/bundle.yaml
+    kubectl kustomize --enable-helm "${component}/" >> "${target}/${component}/bundle.yaml"
+}
+
+# main k8s cluster operators
+for component in openbao external-secrets secrets-store-csi-driver; do
+    render_helm ../k8s/operators "${component}"
+done
+
+# cisco k8s cluster operators
+for component in rook; do
+    render_helm ../talos/k8s/operators "${component}"
 done
--- a/helm/rook/kustomization.yaml
+++ b/helm/rook/kustomization.yaml
@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+helmCharts:
+- name: rook-ceph
+  namespace: rook-ceph
+  releaseName: rook-ceph
+  version: v1.16.1
+  repo: https://charts.rook.io/release
--- a/talos/.envrc
+++ b/talos/.envrc
@ -0,0 +1,6 @@
+root=$(git rev-parse --show-toplevel)/talos
+export TALOSCONFIG=${root}/talosconfig
+kubeconfig=${root}/.kubeconfig
+if [ -f "${kubeconfig}" ]; then
+    export KUBECONFIG="${kubeconfig}"
+fi
--- a/talos/.gitignore
+++ b/talos/.gitignore
@ -0,0 +1 @@
+/talosconfig
--- a/talos/config_patch.yaml
+++ b/talos/config_patch.yaml
@ -0,0 +1,2 @@
+cluster:
+    allowSchedulingOnControlPlanes: true
--- a/talos/k8s/kustomization.yaml
+++ b/talos/k8s/kustomization.yaml
@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - operators
+  - external-account-rbac
--- a/talos/k8s/operators/kustomization.yaml
+++ b/talos/k8s/operators/kustomization.yaml
@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - rook
--- a/talos/k8s/operators/rook/bundle.yaml
+++ b/talos/k8s/operators/rook/bundle.yaml
--- a/talos/k8s/operators/rook/cluster.yaml
+++ b/talos/k8s/operators/rook/cluster.yaml
@ -0,0 +1,356 @@
+apiVersion: ceph.rook.io/v1
+kind: CephCluster
+metadata:
+  name: rook-ceph
+  namespace: rook-ceph
+spec:
+  cephVersion:
+    # The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
+    # v18 is Reef, v19 is Squid
+    # RECOMMENDATION: In production, use a specific version tag instead of the general v19 flag, which pulls the latest release and could result in different
+    # versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
+    # If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v19.2.0-20240927
+    # This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
+    image: quay.io/ceph/ceph:v19.2.0
+    # Whether to allow unsupported versions of Ceph. Currently Reef and Squid are supported.
+    # Future versions such as Tentacle (v20) would require this to be set to `true`.
+    # Do not set to true in production.
+    allowUnsupported: false
+  # The path on the host where configuration files will be persisted. Must be specified. If there are multiple clusters, the directory must be unique for each cluster.
+  # Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
+  # In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
+  dataDirHostPath: /var/lib/rook
+  # Whether or not upgrade should continue even if a check fails
+  # This means Ceph's status could be degraded and we don't recommend upgrading but you might decide otherwise
+  # Use at your OWN risk
+  # To understand Rook's upgrade process of Ceph, read https://rook.io/docs/rook/latest/ceph-upgrade.html#ceph-version-upgrades
+  skipUpgradeChecks: false
+  # Whether or not continue if PGs are not clean during an upgrade
+  continueUpgradeAfterChecksEvenIfNotHealthy: false
+  # WaitTimeoutForHealthyOSDInMinutes defines the time (in minutes) the operator would wait before an OSD can be stopped for upgrade or restart.
+  # If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one
+  # if `continueUpgradeAfterChecksEvenIfNotHealthy` is `false`. If `continueUpgradeAfterChecksEvenIfNotHealthy` is `true`, then operator would
+  # continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won't be applied if `skipUpgradeChecks` is `true`.
+  # The default wait timeout is 10 minutes.
+  waitTimeoutForHealthyOSDInMinutes: 10
+  # Whether or not requires PGs are clean before an OSD upgrade. If set to `true` OSD upgrade process won't start until PGs are healthy.
+  # This configuration will be ignored if `skipUpgradeChecks` is `true`.
+  # Default is false.
+  upgradeOSDRequiresHealthyPGs: false
+  mon:
+    # Set the number of mons to be started. Generally recommended to be 3.
+    # For highest availability, an odd number of mons should be specified.
+    count: 3
+    # The mons should be on unique nodes. For production, at least 3 nodes are recommended for this reason.
+    # Mons should only be allowed on the same node for test environments where data loss is acceptable.
+    allowMultiplePerNode: true
+  mgr:
+    # When higher availability of the mgr is needed, increase the count to 2.
+    # In that case, one mgr will be active and one in standby. When Ceph updates which
+    # mgr is active, Rook will update the mgr services to match the active mgr.
+    count: 2
+    allowMultiplePerNode: true
+    modules:
+      # List of modules to optionally enable or disable.
+      # Note the "dashboard" and "monitoring" modules are already configured by other settings in the cluster CR.
+      - name: rook
+        enabled: true
+  # enable the ceph dashboard for viewing cluster status
+  dashboard:
+    enabled: true
+    # serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
+    # urlPrefix: /ceph-dashboard
+    # serve the dashboard at the given port.
+    # port: 8443
+    # serve the dashboard using SSL
+    ssl: false
+    # The url of the Prometheus instance
+    # prometheusEndpoint: <protocol>://<prometheus-host>:<port>
+    # Whether SSL should be verified if the Prometheus server is using https
+    # prometheusEndpointSSLVerify: false
+  # enable prometheus alerting for cluster
+  monitoring:
+    # requires Prometheus to be pre-installed
+    enabled: false
+    # Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled.
+    # If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false.
+    metricsDisabled: false
+    # Ceph exporter metrics config.
+    exporter:
+      # Specifies which performance counters are exported.
+      # Corresponds to --prio-limit Ceph exporter flag
+      # 0 - all counters are exported
+      perfCountersPrioLimit: 5
+      # Time to wait before sending requests again to exporter server (seconds)
+      # Corresponds to --stats-period Ceph exporter flag
+      statsPeriodSeconds: 5
+  network:
+    connections:
+      # Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network.
+      # The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted.
+      # When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check.
+      # IMPORTANT: Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only,
+      # you can set the "mounter: rbd-nbd" in the rbd storage class, or "mounter: fuse" in the cephfs storage class.
+      # The nbd and fuse drivers are *not* recommended in production since restarting the csi driver pod will disconnect the volumes.
+      encryption:
+        enabled: false
+      # Whether to compress the data in transit across the wire. The default is false.
+      # See the kernel requirements above for encryption.
+      compression:
+        enabled: false
+      # Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled
+      # and clients will be required to connect to the Ceph cluster with the v2 port (3300).
+      # Requires a kernel that supports msgr v2 (kernel 5.11 or CentOS 8.4 or newer).
+      requireMsgr2: false
+    # enable host networking
+    #provider: host
+    # enable the Multus network provider
+    #provider: multus
+    #selectors:
+    #  The selector keys are required to be `public` and `cluster`.
+    #  Based on the configuration, the operator will do the following:
+    #    1. if only the `public` selector key is specified both public_network and cluster_network Ceph settings will listen on that interface
+    #    2. if both `public` and `cluster` selector keys are specified the first one will point to 'public_network' flag and the second one to 'cluster_network'
+    #
+    #  In order to work, each selector value must match a NetworkAttachmentDefinition object in Multus
+    #
+    #  public: public-conf --> NetworkAttachmentDefinition object name in Multus
+    #  cluster: cluster-conf --> NetworkAttachmentDefinition object name in Multus
+    # Provide internet protocol version. IPv6, IPv4 or empty string are valid options. Empty string would mean IPv4
+    #ipFamily: "IPv6"
+    # Ceph daemons to listen on both IPv4 and Ipv6 networks
+    #dualStack: false
+    # Enable multiClusterService to export the mon and OSD services to peer cluster.
+    # This is useful to support RBD mirroring between two clusters having overlapping CIDRs.
+    # Ensure that peer clusters are connected using an MCS API compatible application, like Globalnet Submariner.
+    #multiClusterService:
+    #  enabled: false
+
+  # enable the crash collector for ceph daemon crash collection
+  crashCollector:
+    disable: false
+    # Uncomment daysToRetain to prune ceph crash entries older than the
+    # specified number of days.
+    #daysToRetain: 30
+  # enable log collector, daemons will log on files and rotate
+  logCollector:
+    enabled: true
+    periodicity: daily # one of: hourly, daily, weekly, monthly
+    maxLogSize: 500M # SUFFIX may be 'M' or 'G'. Must be at least 1M.
+  # automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/Storage-Configuration/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
+  cleanupPolicy:
+    # Since cluster cleanup is destructive to data, confirmation is required.
+    # To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
+    # This value should only be set when the cluster is about to be deleted. After the confirmation is set,
+    # Rook will immediately stop configuring the cluster and only wait for the delete command.
+    # If the empty string is set, Rook will not destroy any data on hosts during uninstall.
+    confirmation: ""
+    # sanitizeDisks represents settings for sanitizing OSD disks on cluster deletion
+    sanitizeDisks:
+      # method indicates if the entire disk should be sanitized or simply ceph's metadata
+      # in both case, re-install is possible
+      # possible choices are 'complete' or 'quick' (default)
+      method: quick
+      # dataSource indicate where to get random bytes from to write on the disk
+      # possible choices are 'zero' (default) or 'random'
+      # using random sources will consume entropy from the system and will take much more time then the zero source
+      dataSource: zero
+      # iteration overwrite N times instead of the default (1)
+      # takes an integer value
+      iteration: 1
+    # allowUninstallWithVolumes defines how the uninstall should be performed
+    # If set to true, cephCluster deletion does not wait for the PVs to be deleted.
+    allowUninstallWithVolumes: false
+  # To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
+  # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
+  # tolerate taints with a key of 'storage-node'.
+  # placement:
+  #   all:
+  #     nodeAffinity:
+  #       requiredDuringSchedulingIgnoredDuringExecution:
+  #         nodeSelectorTerms:
+  #         - matchExpressions:
+  #           - key: role
+  #             operator: In
+  #             values:
+  #             - storage-node
+  #     podAffinity:
+  #     podAntiAffinity:
+  #     topologySpreadConstraints:
+  #     tolerations:
+  #     - key: storage-node
+  #       operator: Exists
+  # The above placement information can also be specified for mon, osd, and mgr components
+  #   mon:
+  # Monitor deployments may contain an anti-affinity rule for avoiding monitor
+  # collocation on the same node. This is a required rule when host network is used
+  # or when AllowMultiplePerNode is false. Otherwise this anti-affinity rule is a
+  # preferred rule with weight: 50.
+  #   osd:
+  #    prepareosd:
+  #    mgr:
+  #    cleanup:
+  annotations:
+  #   all:
+  #   mon:
+  #   mgr:
+  #   osd:
+  #   exporter:
+  #   crashcollector:
+  #   cleanup:
+  #   prepareosd:
+  # cmdreporter is for jobs to detect ceph and csi versions, and check network status
+  #   cmdreporter:
+  # clusterMetadata annotations will be applied to only `rook-ceph-mon-endpoints` configmap and the `rook-ceph-mon` and `rook-ceph-admin-keyring` secrets.
+  # And clusterMetadata annotations will not be merged with `all` annotations.
+  #    clusterMetadata:
+  #       kubed.appscode.com/sync: "true"
+  # If no mgr annotations are set, prometheus scrape annotations will be set by default.
+  #   mgr:
+  labels:
+  #   all:
+  #   mon:
+  #   osd:
+  #   cleanup:
+  #   mgr:
+  #   prepareosd:
+  # These labels are applied to ceph-exporter servicemonitor only
+  #   exporter:
+  # monitoring is a list of key-value pairs. It is injected into all the monitoring resources created by operator.
+  # These labels can be passed as LabelSelector to Prometheus
+  #   monitoring:
+  #   crashcollector:
+  resources:
+  #The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory
+  #   mgr:
+  #     limits:
+  #       memory: "1024Mi"
+  #     requests:
+  #       cpu: "500m"
+  #       memory: "1024Mi"
+  # The above example requests/limits can also be added to the other components
+  #   mon:
+  #   osd:
+  # For OSD it also is a possible to specify requests/limits based on device class
+  #   osd-hdd:
+  #   osd-ssd:
+  #   osd-nvme:
+  #   prepareosd:
+  #   mgr-sidecar:
+  #   crashcollector:
+  #   logcollector:
+  #   cleanup:
+  #   exporter:
+  #   cmd-reporter:
+  # The option to automatically remove OSDs that are out and are safe to destroy.
+  removeOSDsIfOutAndSafeToRemove: false
+  priorityClassNames:
+    #all: rook-ceph-default-priority-class
+    mon: system-node-critical
+    osd: system-node-critical
+    mgr: system-cluster-critical
+    #crashcollector: rook-ceph-crashcollector-priority-class
+  storage: # cluster level storage configuration and selection
+    useAllNodes: true
+    useAllDevices: true
+    #deviceFilter:
+    # devices:
+    config:
+      # crushRoot: "custom-root" # specify a non-default root label for the CRUSH map
+      # metadataDevice: "md0" # specify a non-rotational storage so ceph-volume will use it as block db device of bluestore.
+      # databaseSizeMB: "1024" # uncomment if the disks are smaller than 100 GB
+      # osdsPerDevice: "1" # this value can be overridden at the node or device level
+      # encryptedDevice: "true" # the default value for this option is "false"
+      # deviceClass: "myclass" # specify a device class for OSDs in the cluster
+    allowDeviceClassUpdate: false # whether to allow changing the device class of an OSD after it is created
+    allowOsdCrushWeightUpdate: false # whether to allow resizing the OSD crush weight after osd pvc is increased
+    # Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
+    # nodes below will be used as storage resources.  Each node's 'name' field should match their 'kubernetes.io/hostname' label.
+    # nodes:
+    #   - name: "172.17.4.201"
+    #     devices: # specific devices to use for storage can be specified for each node
+    #       - name: "sdb"
+    #       - name: "nvme01" # multiple osds can be created on high performance devices
+    #         config:
+    #           osdsPerDevice: "5"
+    #       - name: "/dev/disk/by-id/ata-ST4000DM004-XXXX" # devices can be specified using full udev paths
+    #     config: # configuration can be specified at the node level which overrides the cluster level config
+    #   - name: "172.17.4.301"
+    #     deviceFilter: "^sd."
+    # This looks right to me but it seems to ignore it and just use all devices anyway
+    # nodes:
+    #   - name: talos-b0x-u41
+    #     devices:
+    #       - name: sdb
+    # Whether to always schedule OSD pods on nodes declared explicitly in the "nodes" section, even if they are
+    # temporarily not schedulable. If set to true, consider adding placement tolerations for unschedulable nodes.
+    scheduleAlways: false
+    # when onlyApplyOSDPlacement is false, will merge both placement.All() and placement.osd
+    onlyApplyOSDPlacement: false
+    # Time for which an OSD pod will sleep before restarting, if it stopped due to flapping
+    # flappingRestartIntervalHours: 24
+    # The ratio at which Ceph should block IO if the OSDs are too full. The default is 0.95.
+    # fullRatio: 0.95
+    # The ratio at which Ceph should stop backfilling data if the OSDs are too full. The default is 0.90.
+    # backfillFullRatio: 0.90
+    # The ratio at which Ceph should raise a health warning if the OSDs are almost full. The default is 0.85.
+    # nearFullRatio: 0.85
+  # The section for configuring management of daemon disruptions during upgrade or fencing.
+  disruptionManagement:
+    # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
+    # via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will
+    # block eviction of OSDs by default and unblock them safely when drains are detected.
+    managePodBudgets: true
+    # A duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the
+    # default DOWN/OUT interval) when it is draining. This is only relevant when  `managePodBudgets` is `true`. The default value is `30` minutes.
+    osdMaintenanceTimeout: 30
+    # A duration in minutes that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up.
+    # Operator will continue with the next drain if the timeout exceeds. It only works if `managePodBudgets` is `true`.
+    # No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain.
+    pgHealthCheckTimeout: 0
+
+  # csi defines CSI Driver settings applied per cluster.
+  csi:
+    readAffinity:
+      # Enable read affinity to enable clients to optimize reads from an OSD in the same topology.
+      # Enabling the read affinity may cause the OSDs to consume some extra memory.
+      # For more details see this doc:
+      # https://rook.io/docs/rook/latest/Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-read-affinity-for-rbd-volumes
+      enabled: false
+
+    # cephfs driver specific settings.
+    cephfs:
+      # Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options.
+      # kernelMountOptions: ""
+      # Set CephFS Fuse mount options to use https://docs.ceph.com/en/latest/man/8/ceph-fuse/#options.
+      # fuseMountOptions: ""
+
+  # healthChecks
+  # Valid values for daemons are 'mon', 'osd', 'status'
+  healthCheck:
+    daemonHealth:
+      mon:
+        disabled: false
+        interval: 45s
+      osd:
+        disabled: false
+        interval: 60s
+      status:
+        disabled: false
+        interval: 60s
+    # Change pod liveness probe timing or threshold values. Works for all mon,mgr,osd daemons.
+    livenessProbe:
+      mon:
+        disabled: false
+      mgr:
+        disabled: false
+      osd:
+        disabled: false
+    # Change pod startup probe timing or threshold values. Works for all mon,mgr,osd daemons.
+    startupProbe:
+      mon:
+        disabled: false
+      mgr:
+        disabled: false
+      osd:
+        disabled: false
--- a/talos/k8s/operators/rook/kustomization.yaml
+++ b/talos/k8s/operators/rook/kustomization.yaml
@ -0,0 +1,9 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: rook-ceph
+resources:
+  - namespace.yaml
+  - bundle.yaml
+  - cluster.yaml
+  - toolbox.yaml
+  - pools.yaml
--- a/talos/k8s/operators/rook/namespace.yaml
+++ b/talos/k8s/operators/rook/namespace.yaml
@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: rook-ceph
+  labels:
+    # https://github.com/rook/rook/issues/11755#issuecomment-1444957300
+    pod-security.kubernetes.io/enforce: privileged
--- a/talos/k8s/operators/rook/pools.yaml
+++ b/talos/k8s/operators/rook/pools.yaml
@ -0,0 +1,34 @@
+apiVersion: ceph.rook.io/v1
+kind: CephObjectStore
+metadata:
+  name: muh-buckets
+  namespace: rook-ceph
+spec:
+  metadataPool:
+    failureDomain: osd
+    replicated:
+      size: 3
+  dataPool:
+    failureDomain: osd
+    erasureCoded:
+      dataChunks: 2
+      codingChunks: 1
+  preservePoolsOnDelete: true
+  gateway:
+    # sslCertificateRef:
+    # caBundleRef:
+    port: 80
+    # securePort: 443
+    instances: 1
+    # A key/value list of annotations
+    annotations:
+    #  key: value
+    resources:
+    #  limits:
+    #    cpu: "500m"
+    #    memory: "1024Mi"
+    #  requests:
+    #    cpu: "500m"
+    #    memory: "1024Mi"
+  #zone:
+    #name: zone-a
--- a/talos/k8s/operators/rook/toolbox.yaml
+++ b/talos/k8s/operators/rook/toolbox.yaml
@ -0,0 +1,136 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: rook-ceph-tools
+  namespace: rook-ceph # namespace:cluster
+  labels:
+    app: rook-ceph-tools
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: rook-ceph-tools
+  template:
+    metadata:
+      labels:
+        app: rook-ceph-tools
+    spec:
+      dnsPolicy: ClusterFirstWithHostNet
+      serviceAccountName: rook-ceph-default
+      containers:
+        - name: rook-ceph-tools
+          image: quay.io/ceph/ceph:v19
+          command:
+            - /bin/bash
+            - -c
+            - |
+              # Replicate the script from toolbox.sh inline so the ceph image
+              # can be run directly, instead of requiring the rook toolbox
+              CEPH_CONFIG="/etc/ceph/ceph.conf"
+              MON_CONFIG="/etc/rook/mon-endpoints"
+              KEYRING_FILE="/etc/ceph/keyring"
+
+              # create a ceph config file in its default location so ceph/rados tools can be used
+              # without specifying any arguments
+              write_endpoints() {
+                endpoints=$(cat ${MON_CONFIG})
+
+                # filter out the mon names
+                # external cluster can have numbers or hyphens in mon names, handling them in regex
+                # shellcheck disable=SC2001
+                mon_endpoints=$(echo "${endpoints}"| sed 's/[a-z0-9_-]\+=//g')
+
+                DATE=$(date)
+                echo "$DATE writing mon endpoints to ${CEPH_CONFIG}: ${endpoints}"
+                  cat <<EOF > ${CEPH_CONFIG}
+              [global]
+              mon_host = ${mon_endpoints}
+
+              [client.admin]
+              keyring = ${KEYRING_FILE}
+              EOF
+              }
+
+              # watch the endpoints config file and update if the mon endpoints ever change
+              watch_endpoints() {
+                # get the timestamp for the target of the soft link
+                real_path=$(realpath ${MON_CONFIG})
+                initial_time=$(stat -c %Z "${real_path}")
+                while true; do
+                  real_path=$(realpath ${MON_CONFIG})
+                  latest_time=$(stat -c %Z "${real_path}")
+
+                  if [[ "${latest_time}" != "${initial_time}" ]]; then
+                    write_endpoints
+                    initial_time=${latest_time}
+                  fi
+
+                  sleep 10
+                done
+              }
+
+              # read the secret from an env var (for backward compatibility), or from the secret file
+              ceph_secret=${ROOK_CEPH_SECRET}
+              if [[ "$ceph_secret" == "" ]]; then
+                ceph_secret=$(cat /var/lib/rook-ceph-mon/secret.keyring)
+              fi
+
+              # create the keyring file
+              cat <<EOF > ${KEYRING_FILE}
+              [${ROOK_CEPH_USERNAME}]
+              key = ${ceph_secret}
+              EOF
+
+              # write the initial config file
+              write_endpoints
+
+              # continuously update the mon endpoints if they fail over
+              watch_endpoints
+          imagePullPolicy: IfNotPresent
+          tty: true
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 2016
+            runAsGroup: 2016
+            capabilities:
+              drop: ["ALL"]
+          env:
+            - name: ROOK_CEPH_USERNAME
+              valueFrom:
+                secretKeyRef:
+                  name: rook-ceph-mon
+                  key: ceph-username
+          volumeMounts:
+            - mountPath: /etc/ceph
+              name: ceph-config
+            - name: mon-endpoint-volume
+              mountPath: /etc/rook
+            - name: ceph-admin-secret
+              mountPath: /var/lib/rook-ceph-mon
+              readOnly: true
+            - name: host
+              mountPath: /host
+      volumes:
+        - name: ceph-admin-secret
+          secret:
+            secretName: rook-ceph-mon
+            optional: false
+            items:
+              - key: ceph-secret
+                path: secret.keyring
+        - name: mon-endpoint-volume
+          configMap:
+            name: rook-ceph-mon-endpoints
+            items:
+              - key: data
+                path: mon-endpoints
+        - name: ceph-config
+          emptyDir: {}
+        - name: host
+          hostPath:
+            path: /
+      tolerations:
+        - key: "node.kubernetes.io/unreachable"
+          operator: "Exists"
+          effect: "NoExecute"
+          tolerationSeconds: 5