(dev-v2.6-archive) Merge pull request #674 from aiyengar2/fix_monitoring_chart_hardened

Support monitoring in hardened RKE clusters

(partially cherry picked from commit 34fdd14bca)
pull/1680/head
maggieliu 2020-09-22 10:04:58 -07:00 committed by Arvind Iyengar
parent 826d5dc2df
commit 2e262303da
No known key found for this signature in database
GPG Key ID: A8DD9BFD6C811498
2 changed files with 96 additions and 38 deletions

View File

@ -38,3 +38,4 @@ All notable changes from the upstream Prometheus Operator chart will be added to
- Modified the default `<serviceMonitor|podMonitor|rule>SelectorNilUsesHelmValues` to default to `false`. As a result, we look for all CRs with any labels in all namespaces by default rather than just the ones tagged with the label `release: rancher-monitoring`.
- Modified the default images used by the `rancher-monitoring` chart to point to Rancher mirrors of the original images from upstream.
- Modified the behavior of the chart to create the Alertmanager Config Secret via a pre-install hook instead of using the normal Helm lifecycle to manage the secret. The benefit of this approach is that all changes to the Config Secret done on a live cluster will never get overridden on a `helm upgrade` since the secret only gets created on a `helm install`. If you would like the secret to be cleaned up on an `helm uninstall`, enable `alertmanager.cleanupOnUninstall`; however, this is disabled by default to prevent the loss of alerting configuration on an uninstall. This secret will never be modified on a `helm upgrade`.
- Modified the default `securityContext` for `Pod` templates across the chart to `{"runAsNonRoot": "true", "runAsUser": "1000"}` and set `grafana.rbac.pspUseAppArmor=false` in order to make it possible to deploy this chart on a hardened cluster without AppArmor installed.

View File

@ -196,6 +196,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/grafana/values.yaml packages/rancher-monitoring/charts/charts/grafana/values.yaml
--- packages/rancher-monitoring/charts-original/charts/grafana/values.yaml
+++ packages/rancher-monitoring/charts/charts/grafana/values.yaml
@@ -1,7 +1,7 @@
rbac:
create: true
pspEnabled: true
- pspUseAppArmor: true
+ pspUseAppArmor: false
namespaced: false
extraRoleRules: []
# - apiGroups: []
@@ -49,7 +49,7 @@
# schedulerName: "default-scheduler"
@ -205,7 +214,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
tag: 7.1.5
sha: ""
pullPolicy: IfNotPresent
@@ -63,7 +63,7 @@
@@ -63,12 +63,15 @@
testFramework:
enabled: true
@ -213,8 +222,17 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
+ image: "rancher/bats-bats"
tag: "v1.1.0"
imagePullPolicy: IfNotPresent
securityContext: {}
@@ -91,7 +91,7 @@
- securityContext: {}
+ securityContext:
+ runAsNonRoot: true
+ runAsUser: 1000
securityContext:
+ runAsNonRoot: true
runAsUser: 472
runAsGroup: 472
fsGroup: 472
@@ -91,7 +94,7 @@
# priorityClassName:
downloadDashboardsImage:
@ -223,7 +241,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
tag: 7.70.0
sha: ""
pullPolicy: IfNotPresent
@@ -244,7 +244,7 @@
@@ -244,7 +247,7 @@
## initChownData container image
##
image:
@ -232,7 +250,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
tag: "1.31.1"
sha: ""
pullPolicy: IfNotPresent
@@ -486,7 +486,7 @@
@@ -486,7 +489,7 @@
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image:
@ -244,7 +262,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/kube-state-metrics/templates/deployment.yaml packages/rancher-monitoring/charts/charts/kube-state-metrics/templates/deployment.yaml
--- packages/rancher-monitoring/charts-original/charts/kube-state-metrics/templates/deployment.yaml
+++ packages/rancher-monitoring/charts/charts/kube-state-metrics/templates/deployment.yaml
@@ -154,7 +154,7 @@
@@ -44,6 +44,7 @@
fsGroup: {{ .Values.securityContext.fsGroup }}
runAsGroup: {{ .Values.securityContext.runAsGroup }}
runAsUser: {{ .Values.securityContext.runAsUser }}
+ runAsNonRoot: {{ .Values.securityContext.runAsNonRoot }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
@@ -154,7 +155,7 @@
- --pod-namespace=$(POD_NAMESPACE)
{{ end }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
@ -265,6 +291,14 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
tag: v1.9.7
pullPolicy: IfNotPresent
@@ -73,6 +73,7 @@
securityContext:
enabled: true
+ runAsNonRoot: true
runAsGroup: 65534
runAsUser: 65534
fsGroup: 65534
diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/prometheus-adapter/README.md packages/rancher-monitoring/charts/charts/prometheus-adapter/README.md
--- packages/rancher-monitoring/charts-original/charts/prometheus-adapter/README.md
+++ packages/rancher-monitoring/charts/charts/prometheus-adapter/README.md
@ -301,6 +335,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/cha
tag: v0.6.0
pullPolicy: IfNotPresent
@@ -139,3 +139,7 @@
# API server unable to communicate with metrics-server. As an example, this is required
# if you use Weave network on EKS
enabled: false
+
+securityContext:
+ runAsNonRoot: true
+ runAsUser: 1000
\ No newline at end of file
diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/charts/prometheus-node-exporter/OWNERS packages/rancher-monitoring/charts/charts/prometheus-node-exporter/OWNERS
--- packages/rancher-monitoring/charts-original/charts/prometheus-node-exporter/OWNERS
+++ packages/rancher-monitoring/charts/charts/prometheus-node-exporter/OWNERS
@ -577,7 +620,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/templates/alertmanager/cleanupSecret.yaml packages/rancher-monitoring/charts/templates/alertmanager/cleanupSecret.yaml
--- packages/rancher-monitoring/charts-original/templates/alertmanager/cleanupSecret.yaml
+++ packages/rancher-monitoring/charts/templates/alertmanager/cleanupSecret.yaml
@@ -0,0 +1,82 @@
@@ -0,0 +1,86 @@
+{{- if and (.Values.alertmanager.enabled) (not .Values.alertmanager.alertmanagerSpec.useExistingSecret) (.Values.alertmanager.secret.cleanupOnUninstall) }}
+apiVersion: batch/v1
+kind: Job
@ -599,6 +642,10 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
+ app: {{ template "kube-prometheus-stack.name" . }}-alertmanager
+ spec:
+ serviceAccountName: alertmanager-{{ template "kube-prometheus-stack.fullname" . }}-post-delete
+{{- if .Values.alertmanager.secret.securityContext }}
+ securityContext:
+{{ toYaml .Values.alertmanager.secret.securityContext | indent 8 }}
+{{- end }}
+ containers:
+ - name: delete-secret
+ image: {{ template "system_default_registry" . }}{{ .Values.alertmanager.secret.image.repository }}:{{ .Values.alertmanager.secret.image.tag }}
@ -686,7 +733,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
{{ toYaml .Values.alertmanager.secret.annotations | indent 4 }}
{{- end }}
labels:
@@ -20,4 +28,93 @@
@@ -20,4 +28,97 @@
{{- range $key, $val := .Values.alertmanager.templateFiles }}
{{ $key }}: {{ $val | b64enc | quote }}
{{- end }}
@ -711,6 +758,10 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
+ app: {{ template "kube-prometheus-stack.name" . }}-alertmanager
+ spec:
+ serviceAccountName: alertmanager-{{ template "kube-prometheus-stack.fullname" . }}-pre-install
+{{- if .Values.alertmanager.secret.securityContext }}
+ securityContext:
+{{ toYaml .Values.alertmanager.secret.securityContext | indent 8 }}
+{{- end }}
+ containers:
+ - name: copy-pre-install-secret
+ image: {{ template "system_default_registry" . }}{{ .Values.alertmanager.secret.image.repository }}:{{ .Values.alertmanager.secret.image.tag }}
@ -1728,7 +1779,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/tem
diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/values.yaml packages/rancher-monitoring/charts/values.yaml
--- packages/rancher-monitoring/charts-original/values.yaml
+++ packages/rancher-monitoring/charts/values.yaml
@@ -2,13 +2,271 @@
@@ -2,13 +2,273 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
@ -1758,6 +1809,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
+ component: kube-controller-manager
+ clients:
+ port: 10011
+ useLocalhost: true
+ nodeSelector:
+ node-role.kubernetes.io/controlplane: "true"
+ tolerations:
@ -1772,6 +1824,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
+ component: kube-scheduler
+ clients:
+ port: 10012
+ useLocalhost: true
+ nodeSelector:
+ node-role.kubernetes.io/controlplane: "true"
+ tolerations:
@ -2002,7 +2055,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6
##
@@ -76,8 +334,19 @@
@@ -76,8 +336,19 @@
##
global:
@ -2022,7 +2075,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
pspEnabled: true
pspAnnotations: {}
## Specify pod annotations
@@ -130,6 +399,22 @@
@@ -130,6 +401,22 @@
## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file
## https://prometheus.io/webtools/alerting/routing-tree-editor/
##
@ -2045,7 +2098,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
config:
global:
resolve_timeout: 5m
@@ -145,6 +430,8 @@
@@ -145,6 +432,8 @@
receiver: 'null'
receivers:
- name: 'null'
@ -2054,7 +2107,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Pass the Alertmanager configuration directives through Helm's templating
## engine. If the Alertmanager configuration contains Alertmanager templates,
@@ -160,25 +447,76 @@
@@ -160,25 +449,76 @@
## ref: https://prometheus.io/docs/alerting/notifications/
## https://prometheus.io/docs/alerting/notification_examples/
##
@ -2150,7 +2203,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
ingress:
enabled: false
@@ -208,6 +546,21 @@
@@ -208,6 +548,25 @@
## Configuration for Alertmanager secret
##
secret:
@ -2168,11 +2221,15 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
+ repository: rancher/rancher-agent
+ tag: v2.4.8
+ pullPolicy: IfNotPresent
+
+ securityContext:
+ runAsNonRoot: true
+ runAsUser: 1000
+
annotations: {}
## Configuration for creating an Ingress that will map to each Alertmanager replica service
@@ -334,7 +687,7 @@
@@ -334,7 +693,7 @@
## Image of Alertmanager
##
image:
@ -2181,7 +2238,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
tag: v0.21.0
sha: ""
@@ -410,9 +763,13 @@
@@ -410,9 +769,13 @@
## Define resources requests and limits for single Pods.
## ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
@ -2198,7 +2255,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
@@ -487,6 +844,9 @@
@@ -487,6 +850,9 @@
enabled: true
namespaceOverride: ""
@ -2208,7 +2265,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Deploy default dashboards.
##
defaultDashboardsEnabled: true
@@ -530,6 +890,7 @@
@@ -530,6 +896,7 @@
dashboards:
enabled: true
label: grafana_dashboard
@ -2216,7 +2273,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Annotations for Grafana dashboard configmaps
##
@@ -575,6 +936,19 @@
@@ -575,6 +942,19 @@
##
service:
portName: service
@ -2236,7 +2293,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## If true, create a serviceMonitor for grafana
##
@@ -600,6 +974,14 @@
@@ -600,6 +980,14 @@
# targetLabel: nodename
# replacement: $1
# action: replace
@ -2251,7 +2308,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Component scraping the kube api server
##
@@ -756,7 +1138,7 @@
@@ -756,7 +1144,7 @@
## Component scraping the kube controller manager
##
kubeControllerManager:
@ -2260,7 +2317,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
##
@@ -889,7 +1271,7 @@
@@ -889,7 +1277,7 @@
## Component scraping etcd
##
kubeEtcd:
@ -2269,7 +2326,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## If your etcd is not deployed as a pod, specify IPs it can be found on
##
@@ -949,7 +1331,7 @@
@@ -949,7 +1337,7 @@
## Component scraping kube scheduler
##
kubeScheduler:
@ -2278,7 +2335,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
##
@@ -1002,7 +1384,7 @@
@@ -1002,7 +1390,7 @@
## Component scraping kube proxy
##
kubeProxy:
@ -2287,7 +2344,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
##
@@ -1076,6 +1458,13 @@
@@ -1076,6 +1464,13 @@
create: true
podSecurityPolicy:
enabled: true
@ -2301,7 +2358,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Deploy node exporter as a daemonset to all nodes
##
@@ -1125,6 +1514,16 @@
@@ -1125,6 +1520,16 @@
extraArgs:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
@ -2318,7 +2375,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Manages Prometheus and Alertmanager components
##
@@ -1138,7 +1537,7 @@
@@ -1138,7 +1543,7 @@
tlsProxy:
enabled: true
image:
@ -2327,7 +2384,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
tag: v1.5.2
sha: ""
pullPolicy: IfNotPresent
@@ -1156,7 +1555,7 @@
@@ -1156,7 +1561,7 @@
patch:
enabled: true
image:
@ -2336,7 +2393,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
tag: v1.2.1
sha: ""
pullPolicy: IfNotPresent
@@ -1285,13 +1684,13 @@
@@ -1285,13 +1690,13 @@
## Resource limits & requests
##
@ -2357,7 +2414,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
@@ -1335,7 +1734,7 @@
@@ -1335,7 +1740,7 @@
## Prometheus-operator image
##
image:
@ -2366,7 +2423,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
tag: v0.38.1
sha: ""
pullPolicy: IfNotPresent
@@ -1343,14 +1742,14 @@
@@ -1343,14 +1748,14 @@
## Configmap-reload image to use for reloading configmaps
##
configmapReloadImage:
@ -2383,7 +2440,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
tag: v0.38.1
sha: ""
@@ -1366,14 +1765,6 @@
@@ -1366,14 +1771,6 @@
##
secretFieldSelector: ""
@ -2398,7 +2455,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Deploy a Prometheus instance
##
prometheus:
@@ -1614,7 +2005,7 @@
@@ -1614,7 +2011,7 @@
## Image of Prometheus.
##
image:
@ -2407,7 +2464,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
tag: v2.18.2
sha: ""
@@ -1666,6 +2057,11 @@
@@ -1666,6 +2063,11 @@
##
externalUrl: ""
@ -2419,7 +2476,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## Define which Nodes the Pods are scheduled on.
## ref: https://kubernetes.io/docs/user-guide/node-selection/
##
@@ -1698,7 +2094,7 @@
@@ -1698,7 +2100,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the PrometheusRule resources created
##
@ -2428,7 +2485,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## PrometheusRules to be selected for target discovery.
## If {}, select all ServiceMonitors
@@ -1723,7 +2119,7 @@
@@ -1723,7 +2125,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the servicemonitors created
##
@ -2437,7 +2494,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## ServiceMonitors to be selected for target discovery.
## If {}, select all ServiceMonitors
@@ -1743,7 +2139,7 @@
@@ -1743,7 +2145,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the podmonitors created
##
@ -2446,7 +2503,7 @@ diff -x '*.tgz' -x '*.lock' -uNr packages/rancher-monitoring/charts-original/val
## PodMonitors to be selected for target discovery.
## If {}, select all PodMonitors
@@ -1840,9 +2236,13 @@
@@ -1840,9 +2242,13 @@
## Resource limits & requests
##