(dev-v2.6-archive) Merge pull request #674 from aiyengar2/fix_monitoring_chart_hardened

Support monitoring in hardened RKE clusters (partially cherry picked from commit 34fdd14bca)
2020-09-22 10:04:58 -07:00 · 2020-09-22 10:04:58 -07:00 · 2e262303da
parent 826d5dc2df
commit 2e262303da
2 changed files with 96 additions and 38 deletions
--- a/packages/rancher-monitoring/overlay/CHANGELOG.md
+++ b/packages/rancher-monitoring/overlay/CHANGELOG.md
@ -38,3 +38,4 @@ All notable changes from the upstream Prometheus Operator chart will be added to
 - Modified the default `<serviceMonitor|podMonitor|rule>SelectorNilUsesHelmValues` to default to `false`. As a result, we look for all CRs with any labels in all namespaces by default rather than just the ones tagged with the label `release: rancher-monitoring`.
 - Modified the default images used by the `rancher-monitoring` chart to point to Rancher mirrors of the original images from upstream.
 - Modified the behavior of the chart to create the Alertmanager Config Secret via a pre-install hook instead of using the normal Helm lifecycle to manage the secret. The benefit of this approach is that all changes to the Config Secret done on a live cluster will never get overridden on a `helm upgrade` since the secret only gets created on a `helm install`. If you would like the secret to be cleaned up on an `helm uninstall`, enable `alertmanager.cleanupOnUninstall`; however, this is disabled by default to prevent the loss of alerting configuration on an uninstall. This secret will never be modified on a `helm upgrade`.
+- Modified the default `securityContext` for `Pod` templates across the chart to `{"runAsNonRoot": "true", "runAsUser": "1000"}` and set `grafana.rbac.pspUseAppArmor=false` in order to make it possible to deploy this chart on a hardened cluster without AppArmor installed.
--- a/packages/rancher-monitoring/rancher-monitoring.patch
+++ b/packages/rancher-monitoring/rancher-monitoring.patch
@ -196,6 +196,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
 diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/grafana/values.yaml packages/rancher-monitoring/charts/charts/grafana/values.yaml
 --- packages/rancher-monitoring/charts-original/charts/grafana/values.yaml
 +++ packages/rancher-monitoring/charts/charts/grafana/values.yaml
+@@ -1,7 +1,7 @@
+ rbac:
+   create: true
+   pspEnabled: true
+-  pspUseAppArmor: true
+  pspUseAppArmor: false
+   namespaced: false
+   extraRoleRules: []
+   # - apiGroups: []
@@ -49,7 +49,7 @@
 # schedulerName: "default-scheduler"
 
@ -205,7 +214,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
   tag: 7.1.5
   sha: ""
   pullPolicy: IfNotPresent
-@@ -63,7 +63,7 @@
+@@ -63,12 +63,15 @@
 
 testFramework:
   enabled: true
@ -213,8 +222,17 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
 +  image: "rancher/bats-bats"
   tag: "v1.1.0"
   imagePullPolicy: IfNotPresent
-   securityContext: {}
-@@ -91,7 +91,7 @@
+-  securityContext: {}
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 1000
+ 
+ securityContext:
+  runAsNonRoot: true
+   runAsUser: 472
+   runAsGroup: 472
+   fsGroup: 472
+@@ -91,7 +94,7 @@
 # priorityClassName:
 
 downloadDashboardsImage:
@ -223,7 +241,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
   tag: 7.70.0
   sha: ""
   pullPolicy: IfNotPresent
-@@ -244,7 +244,7 @@
+@@ -244,7 +247,7 @@
   ## initChownData container image
   ##
   image:
@ -232,7 +250,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
     tag: "1.31.1"
     sha: ""
     pullPolicy: IfNotPresent
-@@ -486,7 +486,7 @@
+@@ -486,7 +489,7 @@
 ## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
 sidecar:
   image:
@ -244,7 +262,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
 diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/kube-state-metrics/templates/deployment.yaml packages/rancher-monitoring/charts/charts/kube-state-metrics/templates/deployment.yaml
 --- packages/rancher-monitoring/charts-original/charts/kube-state-metrics/templates/deployment.yaml
 +++ packages/rancher-monitoring/charts/charts/kube-state-metrics/templates/deployment.yaml
-@@ -154,7 +154,7 @@
+@@ -44,6 +44,7 @@
+         fsGroup: {{ .Values.securityContext.fsGroup }}
+         runAsGroup: {{ .Values.securityContext.runAsGroup }}
+         runAsUser: {{ .Values.securityContext.runAsUser }}
+        runAsNonRoot: {{ .Values.securityContext.runAsNonRoot }}
+       {{- end }}
+     {{- if .Values.priorityClassName }}
+       priorityClassName: {{ .Values.priorityClassName }}
+@@ -154,7 +155,7 @@
         - --pod-namespace=$(POD_NAMESPACE)
 {{ end }}
         imagePullPolicy: {{ .Values.image.pullPolicy }}
@ -265,6 +291,14 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
   tag: v1.9.7
   pullPolicy: IfNotPresent
 
+@@ -73,6 +73,7 @@
+ 
+ securityContext:
+   enabled: true
+  runAsNonRoot: true
+   runAsGroup: 65534
+   runAsUser: 65534
+   fsGroup: 65534
 diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/prometheus-adapter/README.md packages/rancher-monitoring/charts/charts/prometheus-adapter/README.md
 --- packages/rancher-monitoring/charts-original/charts/prometheus-adapter/README.md
 +++ packages/rancher-monitoring/charts/charts/prometheus-adapter/README.md
@ -301,6 +335,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
   tag: v0.6.0
   pullPolicy: IfNotPresent
 
+@@ -139,3 +139,7 @@
+   # API server unable to communicate with metrics-server. As an example, this is required
+   # if you use Weave network on EKS
+   enabled: false
+
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 1000
+\ No newline at end of file
 diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/prometheus-node-exporter/OWNERS packages/rancher-monitoring/charts/charts/prometheus-node-exporter/OWNERS
 --- packages/rancher-monitoring/charts-original/charts/prometheus-node-exporter/OWNERS
 +++ packages/rancher-monitoring/charts/charts/prometheus-node-exporter/OWNERS
@ -577,7 +620,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
 diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/templates/alertmanager/cleanupSecret.yaml packages/rancher-monitoring/charts/templates/alertmanager/cleanupSecret.yaml
 --- packages/rancher-monitoring/charts-original/templates/alertmanager/cleanupSecret.yaml
 +++ packages/rancher-monitoring/charts/templates/alertmanager/cleanupSecret.yaml
-@@ -0,0 +1,82 @@
+@@ -0,0 +1,86 @@
 +{{- if and (.Values.alertmanager.enabled) (not .Values.alertmanager.alertmanagerSpec.useExistingSecret) (.Values.alertmanager.secret.cleanupOnUninstall) }}
 +apiVersion: batch/v1
 +kind: Job
@ -599,6 +642,10 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
 +        app: {{ template "kube-prometheus-stack.name" . }}-alertmanager
 +    spec:
 +      serviceAccountName: alertmanager-{{ template "kube-prometheus-stack.fullname" . }}-post-delete
+{{- if .Values.alertmanager.secret.securityContext }}
+      securityContext:
+{{ toYaml .Values.alertmanager.secret.securityContext | indent 8 }}
+{{- end }}
 +      containers:
 +        - name: delete-secret
 +          image: {{ template "system_default_registry" . }}{{ .Values.alertmanager.secret.image.repository }}:{{ .Values.alertmanager.secret.image.tag }}
@ -686,7 +733,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
 {{ toYaml .Values.alertmanager.secret.annotations | indent 4 }}
 {{- end }}
   labels:
-@@ -20,4 +28,93 @@
+@@ -20,4 +28,97 @@
 {{- range $key, $val := .Values.alertmanager.templateFiles }}
   {{ $key }}: {{ $val | b64enc | quote }}
 {{- end }}
@ -711,6 +758,10 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
 +        app: {{ template "kube-prometheus-stack.name" . }}-alertmanager
 +    spec:
 +      serviceAccountName: alertmanager-{{ template "kube-prometheus-stack.fullname" . }}-pre-install
+{{- if .Values.alertmanager.secret.securityContext }}
+      securityContext:
+{{ toYaml .Values.alertmanager.secret.securityContext | indent 8 }}
+{{- end }}
 +      containers:
 +        - name: copy-pre-install-secret
 +          image: {{ template "system_default_registry" . }}{{ .Values.alertmanager.secret.image.repository }}:{{ .Values.alertmanager.secret.image.tag }}
@ -1728,7 +1779,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
 diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/values.yaml packages/rancher-monitoring/charts/values.yaml
 --- packages/rancher-monitoring/charts-original/values.yaml
 +++ packages/rancher-monitoring/charts/values.yaml
-@@ -2,13 +2,271 @@
+@@ -2,13 +2,273 @@
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 
@ -1758,6 +1809,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 +  component: kube-controller-manager
 +  clients:
 +    port: 10011
+    useLocalhost: true
 +    nodeSelector:
 +      node-role.kubernetes.io/controlplane: "true"
 +    tolerations:
@ -1772,6 +1824,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 +  component: kube-scheduler
 +  clients:
 +    port: 10012
+    useLocalhost: true
 +    nodeSelector:
 +      node-role.kubernetes.io/controlplane: "true"
 +    tolerations:
@ -2002,7 +2055,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
 ## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6
 ##
-@@ -76,8 +334,19 @@
+@@ -76,8 +336,19 @@
 
 ##
 global:
@ -2022,7 +2075,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
     pspEnabled: true
     pspAnnotations: {}
       ## Specify pod annotations
-@@ -130,6 +399,22 @@
+@@ -130,6 +401,22 @@
   ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file
   ##      https://prometheus.io/webtools/alerting/routing-tree-editor/
   ##
@ -2045,7 +2098,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
   config:
     global:
       resolve_timeout: 5m
-@@ -145,6 +430,8 @@
+@@ -145,6 +432,8 @@
         receiver: 'null'
     receivers:
     - name: 'null'
@ -2054,7 +2107,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ## Pass the Alertmanager configuration directives through Helm's templating
   ## engine. If the Alertmanager configuration contains Alertmanager templates,
-@@ -160,25 +447,76 @@
+@@ -160,25 +449,76 @@
   ## ref: https://prometheus.io/docs/alerting/notifications/
   ##      https://prometheus.io/docs/alerting/notification_examples/
   ##
@ -2150,7 +2203,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ingress:
     enabled: false
-@@ -208,6 +546,21 @@
+@@ -208,6 +548,25 @@
   ## Configuration for Alertmanager secret
   ##
   secret:
@ -2168,11 +2221,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 +      repository: rancher/rancher-agent
 +      tag: v2.4.8
 +      pullPolicy: IfNotPresent
+
+    securityContext:
+      runAsNonRoot: true
+      runAsUser: 1000
 +
     annotations: {}
 
   ## Configuration for creating an Ingress that will map to each Alertmanager replica service
-@@ -334,7 +687,7 @@
+@@ -334,7 +693,7 @@
     ## Image of Alertmanager
     ##
     image:
@ -2181,7 +2238,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
       tag: v0.21.0
       sha: ""
 
-@@ -410,9 +763,13 @@
+@@ -410,9 +769,13 @@
     ## Define resources requests and limits for single Pods.
     ## ref: https://kubernetes.io/docs/user-guide/compute-resources/
     ##
@ -2198,7 +2255,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
     ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
     ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
-@@ -487,6 +844,9 @@
+@@ -487,6 +850,9 @@
   enabled: true
   namespaceOverride: ""
 
@ -2208,7 +2265,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
   ## Deploy default dashboards.
   ##
   defaultDashboardsEnabled: true
-@@ -530,6 +890,7 @@
+@@ -530,6 +896,7 @@
     dashboards:
       enabled: true
       label: grafana_dashboard
@ -2216,7 +2273,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
       ## Annotations for Grafana dashboard configmaps
       ##
-@@ -575,6 +936,19 @@
+@@ -575,6 +942,19 @@
   ##
   service:
     portName: service
@ -2236,7 +2293,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ## If true, create a serviceMonitor for grafana
   ##
-@@ -600,6 +974,14 @@
+@@ -600,6 +980,14 @@
     #   targetLabel: nodename
     #   replacement: $1
     #   action: replace
@ -2251,7 +2308,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
 ## Component scraping the kube api server
 ##
-@@ -756,7 +1138,7 @@
+@@ -756,7 +1144,7 @@
 ## Component scraping the kube controller manager
 ##
 kubeControllerManager:
@ -2260,7 +2317,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
   ##
-@@ -889,7 +1271,7 @@
+@@ -889,7 +1277,7 @@
 ## Component scraping etcd
 ##
 kubeEtcd:
@ -2269,7 +2326,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ## If your etcd is not deployed as a pod, specify IPs it can be found on
   ##
-@@ -949,7 +1331,7 @@
+@@ -949,7 +1337,7 @@
 ## Component scraping kube scheduler
 ##
 kubeScheduler:
@ -2278,7 +2335,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
   ##
-@@ -1002,7 +1384,7 @@
+@@ -1002,7 +1390,7 @@
 ## Component scraping kube proxy
 ##
 kubeProxy:
@ -2287,7 +2344,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   ## If your kube proxy is not deployed as a pod, specify IPs it can be found on
   ##
-@@ -1076,6 +1458,13 @@
+@@ -1076,6 +1464,13 @@
     create: true
   podSecurityPolicy:
     enabled: true
@ -2301,7 +2358,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
 ## Deploy node exporter as a daemonset to all nodes
 ##
-@@ -1125,6 +1514,16 @@
+@@ -1125,6 +1520,16 @@
   extraArgs:
     - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
     - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
@ -2318,7 +2375,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
 ## Manages Prometheus and Alertmanager components
 ##
-@@ -1138,7 +1537,7 @@
+@@ -1138,7 +1543,7 @@
   tlsProxy:
     enabled: true
     image:
@ -2327,7 +2384,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
       tag: v1.5.2
       sha: ""
       pullPolicy: IfNotPresent
-@@ -1156,7 +1555,7 @@
+@@ -1156,7 +1561,7 @@
     patch:
       enabled: true
       image:
@ -2336,7 +2393,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
         tag: v1.2.1
         sha: ""
         pullPolicy: IfNotPresent
-@@ -1285,13 +1684,13 @@
+@@ -1285,13 +1690,13 @@
 
   ## Resource limits & requests
   ##
@ -2357,7 +2414,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
   # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
   # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
-@@ -1335,7 +1734,7 @@
+@@ -1335,7 +1740,7 @@
   ## Prometheus-operator image
   ##
   image:
@ -2366,7 +2423,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
     tag: v0.38.1
     sha: ""
     pullPolicy: IfNotPresent
-@@ -1343,14 +1742,14 @@
+@@ -1343,14 +1748,14 @@
   ## Configmap-reload image to use for reloading configmaps
   ##
   configmapReloadImage:
@ -2383,7 +2440,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
     tag: v0.38.1
     sha: ""
 
-@@ -1366,14 +1765,6 @@
+@@ -1366,14 +1771,6 @@
   ##
   secretFieldSelector: ""
 
@ -2398,7 +2455,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 ## Deploy a Prometheus instance
 ##
 prometheus:
-@@ -1614,7 +2005,7 @@
+@@ -1614,7 +2011,7 @@
     ## Image of Prometheus.
     ##
     image:
@ -2407,7 +2464,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
       tag: v2.18.2
       sha: ""
 
-@@ -1666,6 +2057,11 @@
+@@ -1666,6 +2063,11 @@
     ##
     externalUrl: ""
 
@ -2419,7 +2476,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
     ## Define which Nodes the Pods are scheduled on.
     ## ref: https://kubernetes.io/docs/user-guide/node-selection/
     ##
-@@ -1698,7 +2094,7 @@
+@@ -1698,7 +2100,7 @@
     ## prometheus resource to be created with selectors based on values in the helm deployment,
     ## which will also match the PrometheusRule resources created
     ##
@ -2428,7 +2485,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
     ## PrometheusRules to be selected for target discovery.
     ## If {}, select all ServiceMonitors
-@@ -1723,7 +2119,7 @@
+@@ -1723,7 +2125,7 @@
     ## prometheus resource to be created with selectors based on values in the helm deployment,
     ## which will also match the servicemonitors created
     ##
@ -2437,7 +2494,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
     ## ServiceMonitors to be selected for target discovery.
     ## If {}, select all ServiceMonitors
-@@ -1743,7 +2139,7 @@
+@@ -1743,7 +2145,7 @@
     ## prometheus resource to be created with selectors based on values in the helm deployment,
     ## which will also match the podmonitors created
     ##
@ -2446,7 +2503,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
 
     ## PodMonitors to be selected for target discovery.
     ## If {}, select all PodMonitors
-@@ -1840,9 +2236,13 @@
+@@ -1840,9 +2242,13 @@
 
     ## Resource limits & requests
     ##