From 5f1dde5a21e57a923b261b455c3aa465b6fc9221 Mon Sep 17 00:00:00 2001 From: Jiaqi Luo <6218999+jiaqiluo@users.noreply.github.com> Date: Fri, 19 Nov 2021 12:12:52 -0700 Subject: [PATCH] (dev-v2.6-archive) [rancher-monitoring-crd] code refactor (partially cherry picked from commit cfc243395342ab5d003bfc3cfe8a56559f9f541a) --- packages/rancher-backup-crd/package.yaml | 4 +- packages/rancher-backup/package.yaml | 4 +- .../rancher-cis-benchmark/charts/values.yaml | 2 +- .../generated-changes/patch/Chart.yaml.patch | 10 +- .../rancher-external-ip-webhook/package.yaml | 2 +- .../generated-changes/patch/Chart.yaml.patch | 6 +- .../generated-changes/patch/README.md.patch | 24 + .../patch/templates/_pod.tpl.patch | 16 +- .../image-renderer-deployment.yaml.patch | 2 +- .../templates/podsecuritypolicy.yaml.patch | 4 +- .../patch/templates/tests/test.yaml.patch | 11 + .../generated-changes/patch/values.yaml.patch | 37 +- packages/rancher-grafana/package.yaml | 3 +- .../patch/templates/deployment.yaml.patch | 22 +- .../patch/templates/role.yaml.patch | 9 + .../generated-changes/patch/values.yaml.patch | 4 +- .../rancher-kube-state-metrics/package.yaml | 3 +- .../generated-changes/exclude/Chart.lock | 10 +- .../exclude/hack/sync_grafana_dashboards.py | 44 +- .../exclude/hack/sync_prometheus_rules.py | 45 +- .../prometheus/rules/alertmanager.rules.yaml | 63 --- .../templates/prometheus/rules/etcd.yaml | 179 ------ .../prometheus/rules/general.rules.yaml | 56 -- .../templates/prometheus/rules/k8s.rules.yaml | 83 --- .../rules/kube-apiserver.rules.yaml | 39 -- .../kube-prometheus-node-alerting.rules.yaml | 47 -- .../kube-prometheus-node-recording.rules.yaml | 41 -- .../rules/kube-scheduler.rules.yaml | 63 --- .../prometheus/rules/kubernetes-absent.yaml | 159 ------ .../prometheus/rules/kubernetes-apps.yaml | 200 ------- .../rules/kubernetes-resources.yaml | 121 ---- .../prometheus/rules/kubernetes-storage.yaml | 72 --- .../prometheus/rules/kubernetes-system.yaml | 184 ------ .../prometheus/rules/node-network.yaml | 57 -- .../templates/prometheus/rules/node-time.yaml | 37 -- .../prometheus/rules/node.rules.yaml | 202 ------- .../prometheus/rules/prometheus-operator.yaml | 49 -- .../prometheus/rules/prometheus.rules.yaml | 139 ----- .../unittests/alertmanager/ingress_test.yaml | 97 ++++ .../generated-changes/overlay/app-README.md | 14 + .../rancher-monitoring/clusterrole.yaml | 1 - .../generated-changes/patch/Chart.yaml.patch | 20 +- .../generated-changes/patch/README.md.patch | 2 +- .../charts/windowsExporter/values.yaml.patch | 11 - .../alertmanager/alertmanager.yaml.patch | 2 +- .../templates/alertmanager/secret.yaml.patch | 2 +- .../serviceMonitor.yaml.patch | 10 + .../grafana/configmap-dashboards.yaml.patch | 2 +- .../grafana/configmaps-datasources.yaml.patch | 2 +- .../alertmanager-overview.yaml.patch | 23 + .../dashboards-1.14/apiserver.yaml.patch | 2 +- .../dashboards-1.14/cluster-total.yaml.patch | 11 +- .../controller-manager.yaml.patch | 33 +- .../grafana/dashboards-1.14/etcd.yaml.patch | 2 +- .../dashboards-1.14/k8s-coredns.yaml.patch | 2 +- .../k8s-resources-cluster.yaml.patch | 11 +- .../k8s-resources-namespace.yaml.patch | 2 +- .../k8s-resources-node.yaml.patch | 2 +- .../k8s-resources-pod.yaml.patch | 2 +- .../k8s-resources-workload.yaml.patch | 2 +- ...s-resources-workloads-namespace.yaml.patch | 2 +- .../dashboards-1.14/kubelet.yaml.patch | 527 +++++++++--------- .../namespace-by-pod.yaml.patch | 11 +- .../namespace-by-workload.yaml.patch | 11 +- .../node-cluster-rsrc-use.yaml.patch | 2 +- .../dashboards-1.14/node-rsrc-use.yaml.patch | 2 +- .../grafana/dashboards-1.14/nodes.yaml.patch | 2 +- .../persistentvolumesusage.yaml.patch | 2 +- .../dashboards-1.14/pod-total.yaml.patch | 11 +- .../prometheus-remote-write.yaml.patch | 2 +- .../dashboards-1.14/prometheus.yaml.patch | 2 +- .../grafana/dashboards-1.14/proxy.yaml.patch | 2 +- .../dashboards-1.14/scheduler.yaml.patch | 59 +- .../dashboards-1.14/statefulset.yaml.patch | 2 +- .../dashboards-1.14/workload-total.yaml.patch | 2 +- .../prometheus-operator/deployment.yaml.patch | 18 +- .../prometheus/prometheus.yaml.patch | 29 +- .../rules-1.14/alertmanager.rules.yaml.patch | 18 + .../rules-1.14/k8s.rules.yaml.patch | 6 +- .../rules-1.14/kubernetes-storage.yaml.patch | 22 +- .../generated-changes/patch/values.yaml.patch | 124 +++-- packages/rancher-monitoring/package.yaml | 5 +- .../crd-template/templates/jobs.yaml | 23 +- .../crd-template/templates/manifest.yaml | 10 +- .../templates/crd-template/values.yaml | 4 + .../exclude/ci/port-values.yaml | 3 + .../generated-changes/patch/Chart.yaml.patch | 13 +- .../patch/templates/daemonset.yaml.patch | 4 +- .../generated-changes/patch/values.yaml.patch | 4 +- packages/rancher-node-exporter/package.yaml | 3 +- .../exclude/ci/default-values.yaml | 0 .../exclude/ci/external-rules-values.yaml | 9 + .../generated-changes/patch/Chart.yaml.patch | 4 +- .../patch/templates/deployment.yaml.patch | 8 +- .../generated-changes/patch/values.yaml.patch | 6 +- .../rancher-prometheus-adapter/package.yaml | 3 +- packages/rancher-pushprox/charts/Chart.yaml | 2 +- .../charts/templates/_helpers.tpl | 1 - packages/rancher-pushprox/package.yaml | 2 +- .../charts/values.yaml | 2 +- .../rancher-windows-exporter/package.yaml | 1 + 101 files changed, 936 insertions(+), 2335 deletions(-) create mode 100644 packages/rancher-grafana/generated-changes/patch/README.md.patch create mode 100644 packages/rancher-grafana/generated-changes/patch/templates/tests/test.yaml.patch create mode 100644 packages/rancher-kube-state-metrics/generated-changes/patch/templates/role.yaml.patch delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml create mode 100644 packages/rancher-monitoring/generated-changes/exclude/unittests/alertmanager/ingress_test.yaml delete mode 100644 packages/rancher-monitoring/generated-changes/patch/charts/windowsExporter/values.yaml.patch create mode 100644 packages/rancher-monitoring/generated-changes/patch/templates/exporters/kube-state-metrics/serviceMonitor.yaml.patch create mode 100644 packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/alertmanager-overview.yaml.patch create mode 100644 packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/alertmanager.rules.yaml.patch create mode 100644 packages/rancher-node-exporter/generated-changes/exclude/ci/port-values.yaml create mode 100644 packages/rancher-prometheus-adapter/generated-changes/exclude/ci/default-values.yaml create mode 100644 packages/rancher-prometheus-adapter/generated-changes/exclude/ci/external-rules-values.yaml diff --git a/packages/rancher-backup-crd/package.yaml b/packages/rancher-backup-crd/package.yaml index 6a8744c0f..40845d254 100644 --- a/packages/rancher-backup-crd/package.yaml +++ b/packages/rancher-backup-crd/package.yaml @@ -1,2 +1,2 @@ -url: https://github.com/rancher/backup-restore-operator/releases/download/v2.1.0-rc1/rancher-backup-crd-2.1.0-rc1.tgz -version: 2.1.0 +url: https://github.com/rancher/backup-restore-operator/releases/download/v2.0.1/rancher-backup-crd-2.0.1.tgz +version: 2.0.1 diff --git a/packages/rancher-backup/package.yaml b/packages/rancher-backup/package.yaml index 6ce9512be..56fc5017f 100644 --- a/packages/rancher-backup/package.yaml +++ b/packages/rancher-backup/package.yaml @@ -1,2 +1,2 @@ -url: https://github.com/rancher/backup-restore-operator/releases/download/v2.1.0-rc1/rancher-backup-2.1.0-rc1.tgz -version: 2.1.0 +url: https://github.com/rancher/backup-restore-operator/releases/download/v2.0.1/rancher-backup-2.0.1.tgz +version: 2.0.1 diff --git a/packages/rancher-cis-benchmark/charts/values.yaml b/packages/rancher-cis-benchmark/charts/values.yaml index 69393ec52..77d8ce534 100644 --- a/packages/rancher-cis-benchmark/charts/values.yaml +++ b/packages/rancher-cis-benchmark/charts/values.yaml @@ -8,7 +8,7 @@ image: tag: v1.0.6 securityScan: repository: rancher/security-scan - tag: v0.2.5-rc2 + tag: v0.2.5-rc1 sonobuoy: repository: rancher/mirrored-sonobuoy-sonobuoy tag: v0.53.2 diff --git a/packages/rancher-external-ip-webhook/generated-changes/patch/Chart.yaml.patch b/packages/rancher-external-ip-webhook/generated-changes/patch/Chart.yaml.patch index ed437c3fb..565e57981 100644 --- a/packages/rancher-external-ip-webhook/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-external-ip-webhook/generated-changes/patch/Chart.yaml.patch @@ -1,7 +1,7 @@ --- charts-original/Chart.yaml +++ charts/Chart.yaml -@@ -4,12 +4,12 @@ - catalog.cattle.io/kube-version: < 1.22.0 +@@ -3,12 +3,12 @@ + catalog.cattle.io/display-name: External IP Webhook catalog.cattle.io/namespace: cattle-externalip-system catalog.cattle.io/os: linux - catalog.cattle.io/release-name: rancher-externalip-webhook @@ -9,14 +9,14 @@ + catalog.cattle.io/release-name: rancher-external-ip-webhook + catalog.cattle.io/ui-component: rancher-external-ip-webhook apiVersion: v1 - appVersion: v1.0.1-rc2 + appVersion: v1.0.1-rc1 description: | - Deploy the externalip-webhook to mitigate k8s CVE-2020-8554 + Deploy the external-ip-webhook to mitigate k8s CVE-2020-8554 home: https://github.com/rancher/externalip-webhook keywords: - cve -@@ -20,7 +20,7 @@ +@@ -19,7 +19,7 @@ maintainers: - email: raul@rancher.com name: rawmind0 @@ -24,4 +24,4 @@ +name: rancher-external-ip-webhook sources: - https://github.com/rancher/externalip-webhook - version: 1.0.1-rc2 + version: 1.0.1-rc1 diff --git a/packages/rancher-external-ip-webhook/package.yaml b/packages/rancher-external-ip-webhook/package.yaml index c983ee406..27d97515c 100644 --- a/packages/rancher-external-ip-webhook/package.yaml +++ b/packages/rancher-external-ip-webhook/package.yaml @@ -1,2 +1,2 @@ -url: https://github.com/rancher/externalip-webhook/releases/download/v1.0.1-rc2/rancher-externalip-webhook-1.0.1-rc2.tgz +url: https://github.com/rancher/externalip-webhook/releases/download/v1.0.1-rc1/rancher-externalip-webhook-1.0.1-rc1.tgz version: 100.0.1 diff --git a/packages/rancher-grafana/generated-changes/patch/Chart.yaml.patch b/packages/rancher-grafana/generated-changes/patch/Chart.yaml.patch index b1f759d6c..fd3d660a9 100644 --- a/packages/rancher-grafana/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-grafana/generated-changes/patch/Chart.yaml.patch @@ -10,9 +10,9 @@ apiVersion: v2 -name: grafana +name: rancher-grafana - version: 6.11.0 --appVersion: 7.5.5 -+appVersion: 7.5.8 + version: 6.16.14 +-appVersion: 8.2.1 ++appVersion: 7.5.11 kubeVersion: '^1.8.0-0' description: The leading tool for querying and visualizing time series and metrics. home: https://grafana.net diff --git a/packages/rancher-grafana/generated-changes/patch/README.md.patch b/packages/rancher-grafana/generated-changes/patch/README.md.patch new file mode 100644 index 000000000..beaff22f4 --- /dev/null +++ b/packages/rancher-grafana/generated-changes/patch/README.md.patch @@ -0,0 +1,24 @@ +--- charts-original/README.md ++++ charts/README.md +@@ -59,8 +59,8 @@ + | `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` | + | `priorityClassName` | Name of Priority Class to assign pods | `nil` | + | `image.repository` | Image repository | `grafana/grafana` | +-| `image.tag` | Image tag (`Must be >= 5.0.0`) | `8.0.3` | +-| `image.sha` | Image sha (optional) | `80c6d6ac633ba5ab3f722976fb1d9a138f87ca6a9934fcd26a5fc28cbde7dbfa` | ++| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.5.11` | ++| `image.sha` | Image sha (optional) | `` | + | `image.pullPolicy` | Image pull policy | `IfNotPresent` | + | `image.pullSecrets` | Image pull secrets | `{}` | + | `service.enabled` | Enable grafana service | `true` | +@@ -188,8 +188,8 @@ + | `rbac.extraRoleRules` | Additional rules to add to the Role | [] | + | `rbac.extraClusterRoleRules` | Additional rules to add to the ClusterRole | [] | + | `command` | Define command to be executed by grafana container at startup | `nil` | +-| `testFramework.enabled` | Whether to create test-related resources | `true` | +-| `testFramework.image` | `test-framework` image repository. | `bats/bats` | ++| `testFramework.enabled` | Whether to create test-related resources | `false` | ++| `testFramework.image` | `test-framework` image repository. | `rancher/mirrored-bats-bats` | + | `testFramework.tag` | `test-framework` image tag. | `v1.1.0` | + | `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` | + | `testFramework.securityContext` | `test-framework` securityContext | `{}` | diff --git a/packages/rancher-grafana/generated-changes/patch/templates/_pod.tpl.patch b/packages/rancher-grafana/generated-changes/patch/templates/_pod.tpl.patch index 50d29485e..df4dc850c 100644 --- a/packages/rancher-grafana/generated-changes/patch/templates/_pod.tpl.patch +++ b/packages/rancher-grafana/generated-changes/patch/templates/_pod.tpl.patch @@ -5,7 +5,7 @@ {{- define "grafana.pod" -}} {{- if .Values.schedulerName }} schedulerName: "{{ .Values.schedulerName }}" -@@ -21,9 +20,9 @@ +@@ -22,9 +21,9 @@ {{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }} - name: init-chown-data {{- if .Values.initChownData.image.sha }} @@ -17,7 +17,7 @@ {{- end }} imagePullPolicy: {{ .Values.initChownData.image.pullPolicy }} securityContext: -@@ -42,9 +41,9 @@ +@@ -43,9 +42,9 @@ {{- if .Values.dashboards }} - name: download-dashboards {{- if .Values.downloadDashboardsImage.sha }} @@ -29,7 +29,7 @@ {{- end }} imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }} command: ["/bin/sh"] -@@ -79,11 +78,16 @@ +@@ -80,11 +79,16 @@ {{- if .Values.sidecar.datasources.enabled }} - name: {{ template "grafana.name" . }}-sc-datasources {{- if .Values.sidecar.image.sha }} @@ -48,7 +48,7 @@ env: - name: METHOD value: LIST -@@ -118,9 +122,9 @@ +@@ -119,9 +123,9 @@ {{- if .Values.sidecar.notifiers.enabled }} - name: {{ template "grafana.name" . }}-sc-notifiers {{- if .Values.sidecar.image.sha }} @@ -60,7 +60,7 @@ {{- end }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} env: -@@ -163,9 +167,9 @@ +@@ -165,9 +169,9 @@ {{- if .Values.sidecar.dashboards.enabled }} - name: {{ template "grafana.name" . }}-sc-dashboard {{- if .Values.sidecar.image.sha }} @@ -72,7 +72,7 @@ {{- end }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} env: -@@ -205,9 +209,9 @@ +@@ -207,9 +211,9 @@ {{- end}} - name: {{ .Chart.Name }} {{- if .Values.image.sha }} @@ -84,7 +84,7 @@ {{- end }} imagePullPolicy: {{ .Values.image.pullPolicy }} {{- if .Values.command }} -@@ -307,7 +311,7 @@ +@@ -315,7 +319,7 @@ {{- end }} ports: - name: {{ .Values.service.portName }} @@ -93,7 +93,7 @@ protocol: TCP - name: {{ .Values.podPortName }} containerPort: 3000 -@@ -388,17 +392,17 @@ +@@ -402,17 +406,17 @@ {{- with .Values.extraContainers }} {{ tpl . $ | indent 2 }} {{- end }} diff --git a/packages/rancher-grafana/generated-changes/patch/templates/image-renderer-deployment.yaml.patch b/packages/rancher-grafana/generated-changes/patch/templates/image-renderer-deployment.yaml.patch index 09f917df0..f31b66d10 100644 --- a/packages/rancher-grafana/generated-changes/patch/templates/image-renderer-deployment.yaml.patch +++ b/packages/rancher-grafana/generated-changes/patch/templates/image-renderer-deployment.yaml.patch @@ -21,7 +21,7 @@ {{- end }} imagePullPolicy: {{ .Values.imageRenderer.image.pullPolicy }} {{- if .Values.imageRenderer.command }} -@@ -97,16 +99,16 @@ +@@ -101,16 +103,16 @@ resources: {{ toYaml . | indent 12 }} {{- end }} diff --git a/packages/rancher-grafana/generated-changes/patch/templates/podsecuritypolicy.yaml.patch b/packages/rancher-grafana/generated-changes/patch/templates/podsecuritypolicy.yaml.patch index 3c492293c..439672d3e 100644 --- a/packages/rancher-grafana/generated-changes/patch/templates/podsecuritypolicy.yaml.patch +++ b/packages/rancher-grafana/generated-changes/patch/templates/podsecuritypolicy.yaml.patch @@ -1,7 +1,7 @@ --- charts-original/templates/podsecuritypolicy.yaml +++ charts/templates/podsecuritypolicy.yaml -@@ -6,13 +6,9 @@ - namespace: {{ template "grafana.namespace" . }} +@@ -5,13 +5,9 @@ + name: {{ template "grafana.fullname" . }} labels: {{- include "grafana.labels" . | nindent 4 }} - annotations: diff --git a/packages/rancher-grafana/generated-changes/patch/templates/tests/test.yaml.patch b/packages/rancher-grafana/generated-changes/patch/templates/tests/test.yaml.patch new file mode 100644 index 000000000..22c24869f --- /dev/null +++ b/packages/rancher-grafana/generated-changes/patch/templates/tests/test.yaml.patch @@ -0,0 +1,11 @@ +--- charts-original/templates/tests/test.yaml ++++ charts/templates/tests/test.yaml +@@ -33,7 +33,7 @@ + {{- end }} + containers: + - name: {{ .Release.Name }}-test +- image: "{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}" ++ image: "{{ template "system_default_registry" . }}{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}" + imagePullPolicy: "{{ .Values.testFramework.imagePullPolicy}}" + command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"] + volumeMounts: diff --git a/packages/rancher-grafana/generated-changes/patch/values.yaml.patch b/packages/rancher-grafana/generated-changes/patch/values.yaml.patch index 9dd0a08e2..936efc134 100644 --- a/packages/rancher-grafana/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-grafana/generated-changes/patch/values.yaml.patch @@ -1,12 +1,10 @@ --- charts-original/values.yaml +++ charts/values.yaml -@@ -1,9 +1,25 @@ +@@ -1,9 +1,23 @@ +global: + cattle: + systemDefaultRegistry: "" + -+autoscaling: -+ enabled: false rbac: create: true ## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true) @@ -27,22 +25,24 @@ namespaced: false extraRoleRules: [] # - apiGroups: [] -@@ -68,8 +84,8 @@ +@@ -69,8 +83,8 @@ # schedulerName: "default-scheduler" image: - repository: grafana/grafana -- tag: 7.5.5 +- tag: 8.2.1 + repository: rancher/mirrored-grafana-grafana -+ tag: 7.5.8 ++ tag: 7.5.11 sha: "" pullPolicy: IfNotPresent -@@ -82,12 +98,15 @@ +@@ -82,13 +96,16 @@ + # - myRegistrKeySecretName testFramework: - enabled: true +- enabled: true - image: "bats/bats" ++ enabled: false + image: "rancher/mirrored-bats-bats" tag: "v1.1.0" imagePullPolicy: IfNotPresent @@ -56,7 +56,7 @@ runAsUser: 472 runAsGroup: 472 fsGroup: 472 -@@ -115,8 +134,8 @@ +@@ -116,8 +133,8 @@ # priorityClassName: downloadDashboardsImage: @@ -67,7 +67,16 @@ sha: "" pullPolicy: IfNotPresent -@@ -301,7 +320,7 @@ +@@ -189,7 +206,7 @@ + labels: {} + path: / + +- # pathType is only for k8s >= 1.1= ++ # pathType is only for k8s >= 1.18 + pathType: Prefix + + hosts: +@@ -303,7 +320,7 @@ ## initChownData container image ## image: @@ -76,18 +85,16 @@ tag: "1.31.1" sha: "" pullPolicy: IfNotPresent -@@ -602,8 +621,8 @@ +@@ -614,7 +631,7 @@ ## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards sidecar: image: - repository: quay.io/kiwigrid/k8s-sidecar -- tag: 1.10.7 + repository: rancher/mirrored-kiwigrid-k8s-sidecar -+ tag: 1.12.2 + tag: 1.12.3 sha: "" imagePullPolicy: IfNotPresent - resources: {} -@@ -690,9 +709,9 @@ +@@ -702,9 +719,9 @@ replicas: 1 image: # image-renderer Image repository diff --git a/packages/rancher-grafana/package.yaml b/packages/rancher-grafana/package.yaml index bbb109e4a..558aa312e 100644 --- a/packages/rancher-grafana/package.yaml +++ b/packages/rancher-grafana/package.yaml @@ -1,4 +1,5 @@ url: https://github.com/grafana/helm-charts.git subdirectory: charts/grafana -commit: bf7e1110a5ee9258190d0377fea319bb8e764e62 +commit: ad033c2a2b1f81284a924fb8627ca00700952fc5 version: 100.0.0 +doNotRelease: true diff --git a/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch b/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch index 881673c4d..0a3f3e1bd 100644 --- a/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch +++ b/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch @@ -8,27 +8,27 @@ {{- end }} {{- if .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }} -@@ -173,7 +174,7 @@ +@@ -107,7 +108,7 @@ readOnly: true - {{- end }} + {{- end }} imagePullPolicy: {{ .Values.image.pullPolicy }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: "{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}" ports: - - containerPort: 8080 - {{- if .Values.selfMonitor.enabled }} -@@ -207,12 +208,12 @@ + - containerPort: {{ .Values.service.port | default 8080}} + {{- if .Values.selfMonitor.enabled }} +@@ -141,12 +142,12 @@ affinity: {{ toYaml .Values.affinity | indent 8 }} - {{- end }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} - {{- if .Values.nodeSelector }} + {{- if .Values.nodeSelector }} - nodeSelector: {{ toYaml .Values.nodeSelector | indent 8 }} - {{- end }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} - {{- if .Values.tolerations }} + {{- if .Values.tolerations }} - tolerations: {{ toYaml .Values.tolerations | indent 8 }} - {{- end }} - {{- if .Values.kubeconfig.enabled}} + {{- end }} + {{- if .Values.kubeconfig.enabled}} diff --git a/packages/rancher-kube-state-metrics/generated-changes/patch/templates/role.yaml.patch b/packages/rancher-kube-state-metrics/generated-changes/patch/templates/role.yaml.patch new file mode 100644 index 000000000..7a53465e7 --- /dev/null +++ b/packages/rancher-kube-state-metrics/generated-changes/patch/templates/role.yaml.patch @@ -0,0 +1,9 @@ +--- charts-original/templates/role.yaml ++++ charts/templates/role.yaml +@@ -1,3 +1,6 @@ ++{{- if not (kindIs "slice" .Values.collectors) }} ++{{- fail "Collectors need to be a List since kube-state-metrics chart 3.2.2. Please check README for more information."}} ++{{- end }} + {{- if and (eq .Values.rbac.create true) (not .Values.rbac.useExistingRole) -}} + {{- range (split "," .Values.namespaces) }} + --- diff --git a/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch b/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch index 9bbec02b8..87357b0f6 100644 --- a/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch @@ -10,10 +10,10 @@ image: - repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics + repository: rancher/mirrored-kube-state-metrics-kube-state-metrics - tag: v2.0.0 + tag: v2.2.0 pullPolicy: IfNotPresent -@@ -84,6 +88,7 @@ +@@ -86,6 +90,7 @@ securityContext: enabled: true diff --git a/packages/rancher-kube-state-metrics/package.yaml b/packages/rancher-kube-state-metrics/package.yaml index b4475ee6f..9a1a19116 100644 --- a/packages/rancher-kube-state-metrics/package.yaml +++ b/packages/rancher-kube-state-metrics/package.yaml @@ -1,4 +1,5 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/kube-state-metrics -commit: 086f1f7f0870e110abf30aa6bfe7c141e83cc950 +commit: 3f371027f2c384cb2e58b46b2249b6bfa200b1e7 version: 100.0.0 +doNotRelease: true diff --git a/packages/rancher-monitoring/generated-changes/exclude/Chart.lock b/packages/rancher-monitoring/generated-changes/exclude/Chart.lock index b6d6f97d9..89fa5d500 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/Chart.lock +++ b/packages/rancher-monitoring/generated-changes/exclude/Chart.lock @@ -1,12 +1,12 @@ dependencies: - name: kube-state-metrics repository: https://prometheus-community.github.io/helm-charts - version: 3.1.1 + version: 3.5.2 - name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts - version: 1.18.1 + version: 2.0.4 - name: grafana repository: https://grafana.github.io/helm-charts - version: 6.12.0 -digest: sha256:11886645ff1ade77d0fefdca90afba4a92f2b535997280074a59828e8d1dab4e -generated: "2021-06-09T16:56:40.364303181+02:00" + version: 6.16.10 +digest: sha256:94dad976ca1630e9e3cd006fadb255783387b53bd9d0d19e105bd39d8e8e34be +generated: "2021-09-28T10:26:46.319411+07:00" diff --git a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py index 860f5a703..3efc77378 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py +++ b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """Fetch dashboards from provided urls into this chart.""" import json +import re import textwrap from os import makedirs, path @@ -26,16 +27,18 @@ def change_style(style, representer): # Source files list charts = [ { - 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml', + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml', 'destination': '../templates/grafana/dashboards-1.14', 'type': 'yaml', - 'min_kubernetes': '1.14.0-0' + 'min_kubernetes': '1.14.0-0', + 'multicluster_key': '.Values.grafana.sidecar.dashboards.multicluster.global.enabled', }, { 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/grafana.json', 'destination': '../templates/grafana/dashboards-1.14', 'type': 'json', - 'min_kubernetes': '1.14.0-0' + 'min_kubernetes': '1.14.0-0', + 'multicluster_key': '(or .Values.grafana.sidecar.dashboards.multicluster.global.enabled .Values.grafana.sidecar.dashboards.multicluster.etcd.enabled)' }, ] @@ -64,7 +67,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet apiVersion: v1 kind: ConfigMap metadata: - namespace: {{ template "kube-prometheus-stack.namespace" . }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} name: {{ printf "%%s-%%s" (include "kube-prometheus-stack.fullname" $) "%(name)s" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} @@ -103,20 +106,29 @@ def yaml_str_repr(struct, indent=2): text = textwrap.indent(text, ' ' * indent) return text - -def patch_json_for_multicluster_configuration(content): +def patch_dashboards_json(content, multicluster_key): try: content_struct = json.loads(content) + + # multicluster overwrite_list = [] for variable in content_struct['templating']['list']: if variable['name'] == 'cluster': variable['hide'] = ':multicluster:' overwrite_list.append(variable) content_struct['templating']['list'] = overwrite_list + + # fix drilldown links. See https://github.com/kubernetes-monitoring/kubernetes-mixin/issues/659 + for row in content_struct['rows']: + for panel in row['panels']: + for style in panel.get('styles', []): + if 'linkUrl' in style and style['linkUrl'].startswith('./d'): + style['linkUrl'] = style['linkUrl'].replace('./d', '/d') + content_array = [] original_content_lines = content.split('\n') for i, line in enumerate(json.dumps(content_struct, indent=4).split('\n')): - if ('[]' not in line and '{}' not in line) or line == original_content_lines[i]: + if (' []' not in line and ' {}' not in line) or line == original_content_lines[i]: content_array.append(line) continue @@ -136,7 +148,7 @@ def patch_json_for_multicluster_configuration(content): if multicluster != -1: content = ''.join(( content[:multicluster-1], - '\{\{ if .Values.grafana.sidecar.dashboards.multicluster \}\}0\{\{ else \}\}2\{\{ end \}\}', + '\{\{ if %s \}\}0\{\{ else \}\}2\{\{ end \}\}' % multicluster_key, content[multicluster + 15:] )) except (ValueError, KeyError): @@ -145,7 +157,12 @@ def patch_json_for_multicluster_configuration(content): return content -def write_group_to_file(resource_name, content, url, destination, min_kubernetes, max_kubernetes): +def patch_json_set_timezone_as_variable(content): + # content is no more in json format, so we have to replace using regex + return re.sub(r'"timezone"\s*:\s*"(?:\\.|[^\"])*"', '"timezone": "\{\{ .Values.grafana.defaultDashboardsTimezone \}\}"', content, flags=re.IGNORECASE) + + +def write_group_to_file(resource_name, content, url, destination, min_kubernetes, max_kubernetes, multicluster_key): # initialize header lines = header % { 'name': resource_name, @@ -155,7 +172,8 @@ def write_group_to_file(resource_name, content, url, destination, min_kubernetes 'max_kubernetes': max_kubernetes } - content = patch_json_for_multicluster_configuration(content) + content = patch_dashboards_json(content, multicluster_key) + content = patch_json_set_timezone_as_variable(content) filename_struct = {resource_name + '.json': (LiteralStr(content))} # rules themselves @@ -196,17 +214,17 @@ def main(): groups = yaml_text['items'] for group in groups: for resource, content in group['data'].items(): - write_group_to_file(resource.replace('.json', ''), content, chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes']) + write_group_to_file(resource.replace('.json', ''), content, chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key']) elif chart['type'] == 'json': json_text = json.loads(raw_text) # is it already a dashboard structure or is it nested (etcd case)? flat_structure = bool(json_text.get('annotations')) if flat_structure: resource = path.basename(chart['source']).replace('.json', '') - write_group_to_file(resource, json.dumps(json_text, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes']) + write_group_to_file(resource, json.dumps(json_text, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key']) else: for resource, content in json_text.items(): - write_group_to_file(resource.replace('.json', ''), json.dumps(content, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes']) + write_group_to_file(resource.replace('.json', ''), json.dumps(content, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key']) print("Finished") diff --git a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py index 59c711357..22fba0171 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py +++ b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py @@ -25,7 +25,37 @@ def change_style(style, representer): # Source files list charts = [ { - 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml', + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/alertmanager-prometheusRule.yaml', + 'destination': '../templates/prometheus/rules-1.14', + 'min_kubernetes': '1.14.0-0' + }, + { + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml', + 'destination': '../templates/prometheus/rules-1.14', + 'min_kubernetes': '1.14.0-0' + }, + { + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml', + 'destination': '../templates/prometheus/rules-1.14', + 'min_kubernetes': '1.14.0-0' + }, + { + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-state-metrics-prometheusRule.yaml', + 'destination': '../templates/prometheus/rules-1.14', + 'min_kubernetes': '1.14.0-0' + }, + { + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml', + 'destination': '../templates/prometheus/rules-1.14', + 'min_kubernetes': '1.14.0-0' + }, + { + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml', + 'destination': '../templates/prometheus/rules-1.14', + 'min_kubernetes': '1.14.0-0' + }, + { + 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-operator-prometheusRule.yaml', 'destination': '../templates/prometheus/rules-1.14', 'min_kubernetes': '1.14.0-0' }, @@ -34,18 +64,6 @@ charts = [ 'destination': '../templates/prometheus/rules-1.14', 'min_kubernetes': '1.14.0-0' }, - { - 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml', - 'destination': '../templates/prometheus/rules', - 'min_kubernetes': '1.10.0-0', - 'max_kubernetes': '1.14.0-0' - }, - { - 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml', - 'destination': '../templates/prometheus/rules', - 'min_kubernetes': '1.10.0-0', - 'max_kubernetes': '1.14.0-0' - }, ] # Additional conditions map @@ -93,6 +111,7 @@ alert_condition_map = { 'NodeExporterDown': '.Values.nodeExporter.enabled', 'CoreDNSDown': '.Values.kubeDns.enabled', 'AlertmanagerDown': '.Values.alertmanager.enabled', + 'AggregatedAPIDown': 'semverCompare ">=1.18.0-0" $kubeTargetVersion', } replacement_map = { diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml deleted file mode 100644 index 71159849c..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml +++ /dev/null @@ -1,63 +0,0 @@ -{{- /* -Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }} -{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} -{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} -{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: alertmanager.rules - rules: - - alert: AlertmanagerConfigInconsistent - annotations: - message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync. - expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1 - for: 5m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: AlertmanagerFailedReload - annotations: - message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}. - expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: AlertmanagerMembersInconsistent - annotations: - message: Alertmanager has not found all other members of the cluster. - expr: |- - alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} - != on (service) GROUP_LEFT() - count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) - for: 5m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml deleted file mode 100644 index ce4e87bf4..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml +++ /dev/null @@ -1,179 +0,0 @@ -{{- /* -Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: etcd - rules: - - alert: etcdInsufficientMembers - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' - expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2) - for: 3m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdNoLeader - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.' - expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 - for: 1m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighNumberOfLeaderChanges - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.' - expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighNumberOfFailedGRPCRequests - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) - / - sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) - > 1 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighNumberOfFailedGRPCRequests - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) - / - sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) - > 5 - for: 5m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdGRPCRequestsSlow - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le)) - > 0.15 - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdMemberCommunicationSlow - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.15 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighNumberOfFailedProposals - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighFsyncDurations - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.5 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighCommitDurations - annotations: - message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.25 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighNumberOfFailedHTTPRequests - annotations: - message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}' - expr: |- - sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m])) - BY (method) > 0.01 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHighNumberOfFailedHTTPRequests - annotations: - message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - expr: |- - sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m])) - BY (method) > 0.05 - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: etcdHTTPRequestsSlow - annotations: - message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow. - expr: |- - histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) - > 0.15 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml deleted file mode 100644 index cde6feb5c..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{{- /* -Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: general.rules - rules: - - alert: TargetDown - annotations: - message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.' - expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: Watchdog - annotations: - message: 'This is an alert meant to ensure that the entire alerting pipeline is functional. - - This alert is always firing, therefore it should always be firing in Alertmanager - - and always fire against a receiver. There are integrations with various notification - - mechanisms that send a notification when this alert is not firing. For example the - - "DeadMansSnitch" integration in PagerDuty. - - ' - expr: vector(1) - labels: - severity: none -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml deleted file mode 100644 index 08aa7fe2b..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml +++ /dev/null @@ -1,83 +0,0 @@ -{{- /* -Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: k8s.rules - rules: - - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace) - record: namespace:container_cpu_usage_seconds_total:sum_rate - - expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace) - record: namespace:container_memory_usage_bytes:sum - - expr: |- - sum by (namespace, pod_name, container_name) ( - rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m]) - ) - record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate - - expr: |- - sum by(namespace) ( - kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} - * on (endpoint, instance, job, namespace, pod, service) - group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) - ) - record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum - - expr: |- - sum by (namespace) ( - kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} - * on (endpoint, instance, job, namespace, pod, service) - group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) - ) - record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum - - expr: |- - sum( - label_replace( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, - "replicaset", "$1", "owner_name", "(.*)" - ) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) by (namespace, workload, pod) - labels: - workload_type: deployment - record: mixin_pod_workload - - expr: |- - sum( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) by (namespace, workload, pod) - labels: - workload_type: daemonset - record: mixin_pod_workload - - expr: |- - sum( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) by (namespace, workload, pod) - labels: - workload_type: statefulset - record: mixin_pod_workload -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml deleted file mode 100644 index e3a929692..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml +++ /dev/null @@ -1,39 +0,0 @@ -{{- /* -Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kube-apiserver.rules - rules: - - expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:apiserver_request_latencies:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:apiserver_request_latencies:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:apiserver_request_latencies:histogram_quantile -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml deleted file mode 100644 index a8d5400cb..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml +++ /dev/null @@ -1,47 +0,0 @@ -{{- /* -Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeAlerting }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kube-prometheus-node-alerting.rules - rules: - - alert: NodeDiskRunningFull - annotations: - message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 24 hours. - expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)' - for: 30m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: NodeDiskRunningFull - annotations: - message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 2 hours. - expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)' - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml deleted file mode 100644 index 87f072fd0..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml +++ /dev/null @@ -1,41 +0,0 @@ -{{- /* -Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kube-prometheus-node-recording.rules - rules: - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance) - record: instance:node_cpu:rate:sum - - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance) - record: instance:node_filesystem_usage:sum - - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - record: instance:node_network_receive_bytes:rate:sum - - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - record: instance:node_network_transmit_bytes:rate:sum - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) - record: instance:node_cpu:ratio - - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) - record: cluster:node_cpu:sum_rate5m - - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) - record: cluster:node_cpu:ratio -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml deleted file mode 100644 index 46c8d1d4a..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml +++ /dev/null @@ -1,63 +0,0 @@ -{{- /* -Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kube-scheduler.rules - rules: - - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile - - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile - - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_binding_latency:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile - - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_binding_latency:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile - - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_binding_latency:histogram_quantile -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml deleted file mode 100644 index 5c1ebce9e..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml +++ /dev/null @@ -1,159 +0,0 @@ -{{- /* -Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesAbsent }} -{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} -{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} -{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} -{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kubernetes-absent - rules: -{{- if .Values.alertmanager.enabled }} - - alert: AlertmanagerDown - annotations: - message: Alertmanager has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerdown - expr: absent(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.kubeDns.enabled }} - - alert: CoreDNSDown - annotations: - message: CoreDNS has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-corednsdown - expr: absent(up{job="kube-dns"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.kubeApiServer.enabled }} - - alert: KubeAPIDown - annotations: - message: KubeAPI has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown - expr: absent(up{job="apiserver"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.kubeControllerManager.enabled }} - - alert: KubeControllerManagerDown - annotations: - message: KubeControllerManager has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown - expr: absent(up{job="kube-controller-manager"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.kubeScheduler.enabled }} - - alert: KubeSchedulerDown - annotations: - message: KubeScheduler has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown - expr: absent(up{job="kube-scheduler"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.kubeStateMetrics.enabled }} - - alert: KubeStateMetricsDown - annotations: - message: KubeStateMetrics has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsdown - expr: absent(up{job="kube-state-metrics"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.prometheusOperator.kubeletService.enabled }} - - alert: KubeletDown - annotations: - message: Kubelet has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown - expr: absent(up{job="kubelet"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- if .Values.nodeExporter.enabled }} - - alert: NodeExporterDown - annotations: - message: NodeExporter has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeexporterdown - expr: absent(up{job="node-exporter"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} - - alert: PrometheusDown - annotations: - message: Prometheus has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusdown - expr: absent(up{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- if .Values.prometheusOperator.enabled }} - - alert: PrometheusOperatorDown - annotations: - message: PrometheusOperator has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoperatordown - expr: absent(up{job="{{ $operatorJob }}",namespace="{{ $namespace }}"} == 1) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml deleted file mode 100644 index e7a41ca2a..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml +++ /dev/null @@ -1,200 +0,0 @@ -{{- /* -Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }} -{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kubernetes-apps - rules: - - alert: KubePodCrashLooping - annotations: - message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping - expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0 - for: 1h - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubePodNotReady - annotations: - message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready - expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0 - for: 1h - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeDeploymentGenerationMismatch - annotations: - message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch - expr: |- - kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - != - kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeDeploymentReplicasMismatch - annotations: - message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch - expr: |- - kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - != - kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - for: 1h - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeStatefulSetReplicasMismatch - annotations: - message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch - expr: |- - kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - != - kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeStatefulSetGenerationMismatch - annotations: - message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch - expr: |- - kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - != - kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeStatefulSetUpdateNotRolledOut - annotations: - message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout - expr: |- - max without (revision) ( - kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - unless - kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - ) - * - ( - kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - != - kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - ) - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeDaemonSetRolloutStuck - annotations: - message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck - expr: |- - kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - / - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100 - for: 15m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeDaemonSetNotScheduled - annotations: - message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled - expr: |- - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - - - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeDaemonSetMisScheduled - annotations: - message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled - expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeCronJobRunning - annotations: - message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning - expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600 - for: 1h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeJobCompletion - annotations: - message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion - expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 - for: 1h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeJobFailed - annotations: - message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed - expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 - for: 1h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml deleted file mode 100644 index b34b442f3..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml +++ /dev/null @@ -1,121 +0,0 @@ -{{- /* -Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kubernetes-resources - rules: - - alert: KubeCPUOvercommit - annotations: - message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit - expr: |- - sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum) - / - sum(node:node_num_cpu:sum) - > - (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum) - for: 5m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeMemOvercommit - annotations: - message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit - expr: |- - sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) - / - sum(node_memory_MemTotal_bytes) - > - (count(node:node_num_cpu:sum)-1) - / - count(node:node_num_cpu:sum) - for: 5m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeCPUOvercommit - annotations: - message: Cluster has overcommitted CPU resource requests for Namespaces. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit - expr: |- - sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) - / - sum(node:node_num_cpu:sum) - > 1.5 - for: 5m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeMemOvercommit - annotations: - message: Cluster has overcommitted memory resource requests for Namespaces. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit - expr: |- - sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) - / - sum(node_memory_MemTotal_bytes{job="node-exporter"}) - > 1.5 - for: 5m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeQuotaExceeded - annotations: - message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded - expr: |- - 100 * kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - > 90 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: CPUThrottlingHigh - annotations: - message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh - expr: |- - 100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace) - / - sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace) - > 25 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml deleted file mode 100644 index 6469fffc5..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml +++ /dev/null @@ -1,72 +0,0 @@ -{{- /* -Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }} -{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kubernetes-storage - rules: - - alert: KubePersistentVolumeUsageCritical - annotations: - message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical - expr: |- - 100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} - < 3 - for: 1m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubePersistentVolumeFullInFourDays - annotations: - message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} printf "%0.2f" $value {{`}}`}}% is available. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays - expr: |- - 100 * ( - kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} - ) < 15 - and - predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0 - for: 5m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubePersistentVolumeErrors - annotations: - message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors - expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 - for: 5m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml deleted file mode 100644 index da232057b..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml +++ /dev/null @@ -1,184 +0,0 @@ -{{- /* -Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: kubernetes-system - rules: - - alert: KubeNodeNotReady - annotations: - message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than an hour.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready - expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 - for: 1h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeVersionMismatch - annotations: - message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch - expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1 - for: 1h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeClientErrors - annotations: - message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}}% errors.' - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors - expr: |- - (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) - / - sum(rate(rest_client_requests_total[5m])) by (instance, job)) - * 100 > 1 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeClientErrors - annotations: - message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}} errors / second. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors - expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeletTooManyPods - annotations: - message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods - expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9 - for: 15m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeAPILatencyHigh - annotations: - message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh - expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeAPILatencyHigh - annotations: - message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh - expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeAPIErrorsHigh - annotations: - message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh - expr: |- - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) - / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3 - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeAPIErrorsHigh - annotations: - message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh - expr: |- - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) - / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeAPIErrorsHigh - annotations: - message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh - expr: |- - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) - / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeAPIErrorsHigh - annotations: - message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh - expr: |- - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) - / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeClientCertificateExpiration - annotations: - message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration - expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: KubeClientCertificateExpiration - annotations: - message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration - expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml deleted file mode 100644 index c75f1ae07..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml +++ /dev/null @@ -1,57 +0,0 @@ -{{- /* -Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: node-network - rules: - - alert: NetworkReceiveErrors - annotations: - message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing receive errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" - expr: rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 - for: 2m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: NetworkTransmitErrors - annotations: - message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing transmit errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" - expr: rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 - for: 2m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: NodeNetworkInterfaceFlapping - annotations: - message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" - expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 - for: 2m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml deleted file mode 100644 index b7a2fc92f..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml +++ /dev/null @@ -1,37 +0,0 @@ -{{- /* -Generated from 'node-time' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.time }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-time" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: node-time - rules: - - alert: ClockSkewDetected - annotations: - message: Clock skew detected on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}. Ensure NTP is configured correctly on this host. - expr: abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03 - for: 2m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml deleted file mode 100644 index 2bc7af3a9..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml +++ /dev/null @@ -1,202 +0,0 @@ -{{- /* -Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: node.rules - rules: - - expr: sum(min(kube_pod_info) by (node)) - record: ':kube_pod_info_node_count:' - - expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod) - record: 'node_namespace_pod:kube_pod_info:' - - expr: |- - count by (node) (sum by (node, cpu) ( - node_cpu_seconds_total{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - )) - record: node:node_num_cpu:sum - - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) - record: :node_cpu_utilisation:avg1m - - expr: |- - 1 - avg by (node) ( - rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info:) - record: node:node_cpu_utilisation:avg1m - - expr: |- - node:node_cpu_utilisation:avg1m - * - node:node_num_cpu:sum - / - scalar(sum(node:node_num_cpu:sum)) - record: node:cluster_cpu_utilisation:ratio - - expr: |- - sum(node_load1{job="node-exporter"}) - / - sum(node:node_num_cpu:sum) - record: ':node_cpu_saturation_load1:' - - expr: |- - sum by (node) ( - node_load1{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - node:node_num_cpu:sum - record: 'node:node_cpu_saturation_load1:' - - expr: |- - 1 - - sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - / - sum(node_memory_MemTotal_bytes{job="node-exporter"}) - record: ':node_memory_utilisation:' - - expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - record: :node_memory_MemFreeCachedBuffers_bytes:sum - - expr: sum(node_memory_MemTotal_bytes{job="node-exporter"}) - record: :node_memory_MemTotal_bytes:sum - - expr: |- - sum by (node) ( - (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_available:sum - - expr: |- - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_bytes_total:sum - - expr: |- - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - node:node_memory_bytes_total:sum - record: node:node_memory_utilisation:ratio - - expr: |- - (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) - / - scalar(sum(node:node_memory_bytes_total:sum)) - record: node:cluster_memory_utilisation:ratio - - expr: |- - 1e3 * sum( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - ) - record: :node_memory_swap_io_bytes:sum_rate - - expr: |- - 1 - - sum by (node) ( - (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - / - sum by (node) ( - node_memory_MemTotal_bytes{job="node-exporter"} - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: 'node:node_memory_utilisation:' - - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) - record: 'node:node_memory_utilisation_2:' - - expr: |- - 1e3 * sum by (node) ( - (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) - + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_memory_swap_io_bytes:sum_rate - - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_utilisation:avg_irate - - expr: |- - avg by (node) ( - irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_utilisation:avg_irate - - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) - record: :node_disk_saturation:avg_irate - - expr: |- - avg by (node) ( - irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_disk_saturation:avg_irate - - expr: |- - max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - record: 'node:node_filesystem_usage:' - - expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) - record: 'node:node_filesystem_avail:' - - expr: |- - sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_utilisation:sum_irate - - expr: |- - sum by (node) ( - (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_utilisation:sum_irate - - expr: |- - sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + - sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - record: :node_net_saturation:sum_irate - - expr: |- - sum by (node) ( - (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + - irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) - * on (namespace, pod) group_left(node) - node_namespace_pod:kube_pod_info: - ) - record: node:node_net_saturation:sum_irate - - expr: |- - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: 'node:node_inodes_total:' - - expr: |- - max( - max( - kube_pod_info{job="kube-state-metrics", host_ip!=""} - ) by (node, host_ip) - * on (host_ip) group_right (node) - label_replace( - (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" - ) - ) by (node) - record: 'node:node_inodes_free:' -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml deleted file mode 100644 index a8a8915b6..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml +++ /dev/null @@ -1,49 +0,0 @@ -{{- /* -Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }} -{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} -{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: prometheus-operator - rules: - - alert: PrometheusOperatorReconcileErrors - annotations: - message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace. - expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusOperatorNodeLookupErrors - annotations: - message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace. - expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml deleted file mode 100644 index 0480c83b5..000000000 --- a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml +++ /dev/null @@ -1,139 +0,0 @@ -{{- /* -Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml -Do not change in-place! In order to change this file first read following link: -https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack -*/ -}} -{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} -{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} -{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }} - namespace: {{ template "kube-prometheus-stack.namespace" . }} - labels: - app: {{ template "kube-prometheus-stack.name" . }} -{{ include "kube-prometheus-stack.labels" . | indent 4 }} -{{- if .Values.defaultRules.labels }} -{{ toYaml .Values.defaultRules.labels | indent 4 }} -{{- end }} -{{- if .Values.defaultRules.annotations }} - annotations: -{{ toYaml .Values.defaultRules.annotations | indent 4 }} -{{- end }} -spec: - groups: - - name: prometheus.rules - rules: - - alert: PrometheusConfigReloadFailed - annotations: - description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} - summary: Reloading Prometheus' configuration failed - expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusNotificationQueueRunningFull - annotations: - description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} - summary: Prometheus' alert notification queue is running full - expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusErrorSendingAlerts - annotations: - description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}} - summary: Errors while sending alert from Prometheus - expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusErrorSendingAlerts - annotations: - description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}} - summary: Errors while sending alerts from Prometheus - expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03 - for: 10m - labels: - severity: critical -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusNotConnectedToAlertmanagers - annotations: - description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers - summary: Prometheus is not connected to any Alertmanagers - expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusTSDBReloadsFailing - annotations: - description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.' - summary: Prometheus has issues reloading data blocks from disk - expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 - for: 12h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusTSDBCompactionsFailing - annotations: - description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.' - summary: Prometheus has issues compacting sample blocks - expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 - for: 12h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusTSDBWALCorruptions - annotations: - description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).' - summary: Prometheus write-ahead log is corrupted - expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0 - for: 4h - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusNotIngestingSamples - annotations: - description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples. - summary: Prometheus isn't ingesting samples - expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} - - alert: PrometheusTargetScrapesDuplicate - annotations: - description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values' - summary: Prometheus has many samples rejected - expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 - for: 10m - labels: - severity: warning -{{- if .Values.defaultRules.additionalRuleLabels }} -{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} -{{- end }} -{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/unittests/alertmanager/ingress_test.yaml b/packages/rancher-monitoring/generated-changes/exclude/unittests/alertmanager/ingress_test.yaml new file mode 100644 index 000000000..986c7563f --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/unittests/alertmanager/ingress_test.yaml @@ -0,0 +1,97 @@ +suite: test ingress +templates: + - alertmanager/ingress.yaml +tests: + - it: should be empty if alertmanager is not enabled + set: + alertmanager.enabled: false + alertmanager.ingress.enabled: true + asserts: + - hasDocuments: + count: 0 + - it: should be empty if ingress is not enabled + set: + alertmanager.enabled: true + alertmanager.ingress.enabled: false + asserts: + - hasDocuments: + count: 0 + - it: should have apiVersion extensions/v1beta1 for k8s < 1.16 + set: + alertmanager.enabled: true + alertmanager.ingress.enabled: true + capabilities: + majorVersion: 1 + minorVersion: 15 + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Ingress + - isAPIVersion: + of: extensions/v1beta1 + - it: should have apiVersion networking.k8s.io/v1beta1 for k8s >= 1.16 < 1.19 + set: + alertmanager.enabled: true + alertmanager.ingress.enabled: true + capabilities: + majorVersion: 1 + minorVersion: 16 + apiVersions: + - networking.k8s.io/v1beta1 + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Ingress + - isAPIVersion: + of: networking.k8s.io/v1beta1 + - it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.19 < 1.22 + set: + alertmanager.enabled: true + alertmanager.ingress.enabled: true + capabilities: + majorVersion: 1 + minorVersion: 10 + apiVersions: + - networking.k8s.io/v1 + - networking.k8s.io/v1beta1 + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Ingress + - isAPIVersion: + of: networking.k8s.io/v1beta1 + - it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.22 + set: + alertmanager.enabled: true + alertmanager.ingress.enabled: true + capabilities: + majorVersion: 1 + minorVersion: 22 + apiVersions: + - networking.k8s.io/v1 + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Ingress + - isAPIVersion: + of: networking.k8s.io/v1 + + - it: should have explicit pathType ImplementationSpecific for networking.k8s.io/v1 by default + set: + alertmanager.enabled: true + alertmanager.ingress.enabled: true + capabilities: + majorVersion: 1 + minorVersion: 19 + apiVersions: + - networking.k8s.io/v1 + asserts: + - hasDocuments: + count: 1 + - equal: + path: spec.rules[0].http.paths[0].pathType + value: ImplementationSpecific diff --git a/packages/rancher-monitoring/generated-changes/overlay/app-README.md b/packages/rancher-monitoring/generated-changes/overlay/app-README.md index af77e04ec..fab28916f 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/app-README.md +++ b/packages/rancher-monitoring/generated-changes/overlay/app-README.md @@ -13,3 +13,17 @@ The chart installs the following components: - [Prometheus Adapter](https://github.com/helm/charts/tree/master/stable/prometheus-adapter) - The adapter allows a user to expose custom metrics, resource metrics, and external metrics on the default [Prometheus](https://prometheus.io/) instance to the Kubernetes API Server. For more information, review the Helm README of this chart. + +## Upgrading from 100.0.0+up16.6.0 to 100.1.0+up19.0.3 + +### Noticeable changes: +Grafana: +- `sidecar.dashboards.searchNamespace`, `sidecar.datasources.searchNamespace` and `sidecar.notifiers.searchNamespace` support a list of namespaces now. + +Kube-state-metrics +- the type of `collectors` is changed from Dictionary to List. +- `kubeStateMetrics.serviceMonitor.namespaceOverride` was replaced by `kube-state-metrics.namespaceOverride`. + +### Known issues: +- Occasionally, the upgrade fails with errors related to the webhook `prometheusrulemutate.monitoring.coreos.com`. This is a known issue in the upstream, and the workaround is to trigger the upgrade one more time. [32416](https://github.com/rancher/rancher/issues/32416#issuecomment-828881726) + diff --git a/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/clusterrole.yaml b/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/clusterrole.yaml index a115de7ca..be556cb4e 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/clusterrole.yaml +++ b/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/clusterrole.yaml @@ -43,7 +43,6 @@ metadata: rbac.authorization.k8s.io/aggregate-to-edit: "true" {{- end }} rules: -rules: - apiGroups: - monitoring.coreos.com resources: diff --git a/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch index 89c47b28a..7b1e315b0 100644 --- a/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch @@ -1,6 +1,6 @@ --- charts-original/Chart.yaml +++ charts/Chart.yaml -@@ -1,3 +1,35 @@ +@@ -1,3 +1,38 @@ +apiVersion: v2 +description: Collects several related Helm charts, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator. +icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png @@ -20,12 +20,15 @@ + - name: Arvind + email: arvind.iyengar@suse.com + url: "" ++ - name: Jack ++ email: jiaqi.luo@suse.com ++ url: "https://github.com/jiaqiluo" +name: rancher-monitoring +sources: + - https://github.com/prometheus-community/helm-charts + - https://github.com/prometheus-operator/kube-prometheus -+version: 16.6.0 -+appVersion: 0.48.0 ++version: 19.0.3 ++appVersion: 0.50.0 +kubeVersion: ">=1.16.0-0" +home: https://github.com/prometheus-operator/kube-prometheus +keywords: @@ -36,12 +39,12 @@ annotations: artifacthub.io/links: | - name: Chart Source -@@ -5,8 +37,16 @@ +@@ -5,8 +40,19 @@ - name: Upstream Project url: https://github.com/prometheus-operator/kube-prometheus artifacthub.io/operator: "true" -apiVersion: v2 --appVersion: 0.48.0 +-appVersion: 0.50.0 + catalog.cattle.io/certified: rancher + catalog.cattle.io/namespace: cattle-monitoring-system + catalog.cattle.io/release-name: rancher-monitoring @@ -52,10 +55,13 @@ + catalog.cattle.io/auto-install: rancher-monitoring-crd=match + catalog.cattle.io/requests-cpu: "4500m" + catalog.cattle.io/requests-memory: "4000Mi" ++ catalog.cattle.io/rancher-version: ">= 2.6.0-0 <=2.6.99-0" ++ catalog.cattle.io/kube-version: ">=1.16.0-0" ++ catalog.cattle.io/upstream-version: "19.0.3" dependencies: - condition: grafana.enabled name: grafana -@@ -71,34 +111,6 @@ +@@ -71,34 +117,6 @@ - condition: rkeScheduler.enabled name: rkeScheduler repository: file://./charts/rkeScheduler @@ -90,4 +96,4 @@ -- https://github.com/prometheus-community/helm-charts -- https://github.com/prometheus-operator/kube-prometheus -type: application --version: 16.6.0 +-version: 19.0.3 diff --git a/packages/rancher-monitoring/generated-changes/patch/README.md.patch b/packages/rancher-monitoring/generated-changes/patch/README.md.patch index 927bd6ed9..340d746e8 100644 --- a/packages/rancher-monitoring/generated-changes/patch/README.md.patch +++ b/packages/rancher-monitoring/generated-changes/patch/README.md.patch @@ -1,6 +1,6 @@ --- charts-original/README.md +++ charts/README.md -@@ -193,7 +193,39 @@ +@@ -230,7 +230,39 @@ helm show values prometheus-community/kube-prometheus-stack ``` diff --git a/packages/rancher-monitoring/generated-changes/patch/charts/windowsExporter/values.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/charts/windowsExporter/values.yaml.patch deleted file mode 100644 index 3c3857f21..000000000 --- a/packages/rancher-monitoring/generated-changes/patch/charts/windowsExporter/values.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/charts/windowsExporter/values.yaml -+++ charts/charts/windowsExporter/values.yaml -@@ -25,7 +25,7 @@ - port: 9796 - image: - repository: rancher/windows_exporter-package -- tag: v0.0.2 -+ tag: v0.0.3 - os: "windows" - - # Specify the IP addresses of nodes that you want to collect metrics from diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch index 98457ddbe..e09bf5a8a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch @@ -25,7 +25,7 @@ {{- end }} paused: {{ .Values.alertmanager.alertmanagerSpec.paused }} @@ -104,8 +106,8 @@ - - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]} + - {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]} {{- end }} {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/secret.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/secret.yaml.patch index bc412ea90..d285cb60a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/secret.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/secret.yaml.patch @@ -20,7 +20,7 @@ {{ toYaml .Values.alertmanager.secret.annotations | indent 4 }} {{- end }} labels: -@@ -20,4 +26,4 @@ +@@ -24,4 +30,4 @@ {{- range $key, $val := .Values.alertmanager.templateFiles }} {{ $key }}: {{ $val | b64enc | quote }} {{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kube-state-metrics/serviceMonitor.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kube-state-metrics/serviceMonitor.yaml.patch new file mode 100644 index 000000000..1ba06c443 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kube-state-metrics/serviceMonitor.yaml.patch @@ -0,0 +1,10 @@ +--- charts-original/templates/exporters/kube-state-metrics/serviceMonitor.yaml ++++ charts/templates/exporters/kube-state-metrics/serviceMonitor.yaml +@@ -1,4 +1,7 @@ + {{- if .Values.kubeStateMetrics.enabled }} ++{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }} ++{{- fail "kubeStateMetrics.serviceMonitor.namespaceOverride was removed. Please use kube-state-metrics.namespaceOverride instead." }} ++{{- end }} + apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmap-dashboards.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmap-dashboards.yaml.patch index ebe82603c..f61eb104e 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmap-dashboards.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmap-dashboards.yaml.patch @@ -4,7 +4,7 @@ kind: ConfigMap metadata: name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }} -- namespace: {{ template "kube-prometheus-stack.namespace" $ }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" $ }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} labels: {{- if $.Values.grafana.sidecar.dashboards.label }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmaps-datasources.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmaps-datasources.yaml.patch index 149de4213..5483a89ca 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmaps-datasources.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/configmaps-datasources.yaml.patch @@ -4,7 +4,7 @@ kind: ConfigMap metadata: name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ default .Values.grafana.sidecar.datasources.searchNamespace (include "kube-prometheus-stack.namespace" .) }} {{- if .Values.grafana.sidecar.datasources.annotations }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/alertmanager-overview.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/alertmanager-overview.yaml.patch new file mode 100644 index 000000000..338bc5a1f --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/alertmanager-overview.yaml.patch @@ -0,0 +1,23 @@ +--- charts-original/templates/grafana/dashboards-1.14/alertmanager-overview.yaml ++++ charts/templates/grafana/dashboards-1.14/alertmanager-overview.yaml +@@ -5,10 +5,11 @@ + */ -}} + {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} + {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} ++{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }} + apiVersion: v1 + kind: ConfigMap + metadata: +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} ++ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }} + annotations: + {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} +@@ -607,4 +608,5 @@ + "uid": "alertmanager-overview", + "version": 0 + } +-{{- end }} +\ No newline at end of file ++{{- end }} ++{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/apiserver.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/apiserver.yaml.patch index 6a2e763ec..06c5a5ca3 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/apiserver.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/apiserver.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch index 6355241d7..b12fc84f8 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch @@ -4,8 +4,17 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} +@@ -1831,7 +1831,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch index cf6f2788a..96d13e702 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch @@ -10,7 +10,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }} annotations: @@ -32,29 +32,29 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", -+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", +- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)", ++ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", @@ -282,7 +287,7 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", -+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", +- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)", ++ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", @@ -388,7 +393,7 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))", +- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", @@ -494,28 +499,28 @@ "steppedLine": false, "targets": [ @@ -133,12 +133,21 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -1100,7 +1105,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, @@ -1126,7 +1131,7 @@ "options": [ ], -- "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", -+ "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)", +- "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", ++ "query": "label_values(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)", "refresh": 2, "regex": "", "sort": 1, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch index 2d2b35a18..324cf8fdb 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch @@ -10,7 +10,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch index 35ee92b1b..0233264a4 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml.patch index d5119291a..2571162ef 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml.patch @@ -4,8 +4,17 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} +@@ -2973,7 +2973,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml.patch index 3ce475b39..fdb109a6e 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-node.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-node.yaml.patch index 262ac9081..112aa7495 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-node.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-node.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml.patch index 68241b00b..1b47db48a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml.patch index 5d91141fc..b377b2206 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml.patch index 2f7abde96..a0d7afeff 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch index c3331e5ac..d8ccb5a6f 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch @@ -10,268 +10,277 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -107,7 +108,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", -+ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", -@@ -191,7 +192,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", -+ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -275,7 +276,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", -+ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -359,7 +360,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", -+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -443,7 +444,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", -+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -527,7 +528,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", -+ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -609,7 +610,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", -+ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", -@@ -702,7 +703,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", -+ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", -@@ -808,7 +809,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", -@@ -914,14 +915,14 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} pod", - "refId": "A" - }, - { -- "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} worker", -@@ -1014,14 +1015,14 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} pod", - "refId": "A" - }, - { -- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} worker", -@@ -1129,7 +1130,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", -+ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", -@@ -1224,7 +1225,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", -+ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", -@@ -1332,7 +1333,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", -@@ -1438,7 +1439,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", -+ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}operation_type{{`}}`}}", -@@ -1531,7 +1532,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", -@@ -1638,7 +1639,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -1731,7 +1732,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -1837,7 +1838,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -1943,28 +1944,28 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "2xx", - "refId": "A" - }, - { -- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "3xx", - "refId": "B" - }, - { -- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "4xx", - "refId": "C" - }, - { -- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "5xx", -@@ -2070,7 +2071,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", -@@ -2176,7 +2177,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", -+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -2269,7 +2270,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", -+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -2362,7 +2363,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", -+ "expr": "go_goroutines{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -2482,7 +2483,7 @@ +@@ -87,7 +88,7 @@ + "pluginVersion": "7", + "targets": [ + { +- "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", ++ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", +@@ -144,7 +145,7 @@ + "pluginVersion": "7", + "targets": [ + { +- "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", ++ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -201,7 +202,7 @@ + "pluginVersion": "7", + "targets": [ + { +- "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", ++ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -258,7 +259,7 @@ + "pluginVersion": "7", + "targets": [ + { +- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", ++ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -315,7 +316,7 @@ + "pluginVersion": "7", + "targets": [ + { +- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", ++ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -372,7 +373,7 @@ + "pluginVersion": "7", + "targets": [ + { +- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", ++ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -431,7 +432,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", ++ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", +@@ -526,7 +527,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", ++ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", +@@ -621,7 +622,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", +@@ -716,14 +717,14 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} pod", + "refId": "A" + }, + { +- "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} worker", +@@ -818,14 +819,14 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} pod", + "refId": "A" + }, + { +- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} worker", +@@ -922,7 +923,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", ++ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", +@@ -1019,7 +1020,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", ++ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", +@@ -1116,7 +1117,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", +@@ -1211,7 +1212,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", ++ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}operation_type{{`}}`}}", +@@ -1306,7 +1307,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", +@@ -1402,7 +1403,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -1497,7 +1498,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -1592,7 +1593,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -1687,28 +1688,28 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", +@@ -1803,7 +1804,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", +@@ -1898,7 +1899,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", ++ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -1993,7 +1994,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", ++ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -2088,7 +2089,7 @@ + "steppedLine": false, + "targets": [ + { +- "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}", ++ "expr": "go_goroutines{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -2177,7 +2178,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, +@@ -2203,7 +2204,7 @@ "options": [ ], @@ -280,7 +289,7 @@ "refresh": 2, "regex": "", "sort": 1, -@@ -2530,4 +2531,5 @@ +@@ -2251,4 +2252,5 @@ "uid": "3138fa155d5915769fbded898ac09fd9", "version": 0 } diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch index e2e2d07d0..553e76836 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch @@ -4,8 +4,17 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} +@@ -1301,7 +1301,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch index 35ebc6fcf..08a6bef8a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch @@ -4,8 +4,17 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} +@@ -1541,7 +1541,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml.patch index cd85c57ef..7c7c58418 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-rsrc-use.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-rsrc-use.yaml.patch index f514e83ec..427519860 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-rsrc-use.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/node-rsrc-use.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/nodes.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/nodes.yaml.patch index 263330bc3..0eef4b398 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/nodes.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/nodes.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch index e7c77d5e0..af4cceb4c 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch index 96bbe5747..560d7d8f2 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch @@ -4,8 +4,17 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} +@@ -1033,7 +1033,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml.patch index dc6c3bd10..bc67b2883 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus.yaml.patch index 0971545bb..432cca4c9 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/prometheus.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch index 3d47af0fe..121ae00f0 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch @@ -10,7 +10,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch index dac1a5db5..db5a1375f 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch @@ -10,7 +10,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }} annotations: @@ -32,68 +32,68 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", ++ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} e2e", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e", "refId": "A" }, { -- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", ++ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} binding", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding", "refId": "B" }, { -- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", ++ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm", "refId": "C" }, { -- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)", ++ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} volume", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume", @@ -290,28 +295,28 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} e2e", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e", "refId": "A" }, { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} binding", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding", "refId": "B" }, { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm", "refId": "C" }, { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{`{{`}}instance{{`}}`}} volume", + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume", @@ -417,28 +422,28 @@ "steppedLine": false, "targets": [ @@ -172,6 +172,15 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", +@@ -1023,7 +1028,7 @@ + "options": [ + + ], +- "query": "label_values(up{job=\"kube-scheduler\"}, cluster)", ++ "query": "label_values(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, @@ -1049,7 +1054,7 @@ "options": [ diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch index a607cdf2e..a61e33419 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "statefulset" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch index b8a643abd..9858e8aee 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch @@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} +- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} annotations: diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/deployment.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/deployment.yaml.patch index 0165ec39d..4335f8b1f 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/deployment.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/deployment.yaml.patch @@ -22,9 +22,21 @@ - - --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }} + - --prometheus-config-reloader={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }} {{- end }} - - --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }} - - --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }} -@@ -130,16 +130,16 @@ + - --config-reloader-cpu-request={{ .Values.prometheusOperator.configReloaderCpu }} + - --config-reloader-cpu-limit={{ .Values.prometheusOperator.configReloaderCpu }} +@@ -81,9 +81,9 @@ + - --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.thanosImage.sha }} +- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }} ++ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }} + {{- else }} +- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }} ++ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }} + {{- end }} + {{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }} + - --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }} +@@ -137,16 +137,16 @@ hostNetwork: true dnsPolicy: ClusterFirstWithHostNet {{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch index bc1274417..f40c3f8e3 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch @@ -1,15 +1,24 @@ --- charts-original/templates/prometheus/prometheus.yaml +++ charts/templates/prometheus/prometheus.yaml -@@ -32,7 +32,7 @@ - {{ toYaml .Values.prometheus.prometheusSpec.apiserverConfig | indent 4}} +@@ -33,13 +33,13 @@ {{- end }} {{- if .Values.prometheus.prometheusSpec.image }} -- image: {{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }} -+ image: {{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }} + {{- if and .Values.prometheus.prometheusSpec.image.tag .Values.prometheus.prometheusSpec.image.sha }} +- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}" ++ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}" + {{- else if .Values.prometheus.prometheusSpec.image.sha }} +- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}" ++ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}" + {{- else if .Values.prometheus.prometheusSpec.image.tag }} +- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}" ++ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}" + {{- else }} +- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}" ++ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}" + {{- end }} version: {{ .Values.prometheus.prometheusSpec.image.tag }} {{- if .Values.prometheus.prometheusSpec.image.sha }} - sha: {{ .Values.prometheus.prometheusSpec.image.sha }} -@@ -56,11 +56,13 @@ +@@ -64,11 +64,13 @@ externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}" {{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }} externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}" @@ -24,7 +33,7 @@ {{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }} {{- end }} paused: {{ .Values.prometheus.prometheusSpec.paused }} -@@ -232,8 +234,8 @@ +@@ -244,8 +246,8 @@ - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]} {{- end }} {{- end }} @@ -34,7 +43,7 @@ {{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }} {{- end }} {{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }} -@@ -266,7 +268,7 @@ +@@ -284,7 +286,7 @@ {{- end }} {{- if .Values.prometheus.prometheusSpec.containers }} containers: @@ -43,7 +52,7 @@ {{- end }} {{- if .Values.prometheus.prometheusSpec.initContainers }} initContainers: -@@ -282,6 +284,7 @@ +@@ -300,6 +302,7 @@ {{- if .Values.prometheus.prometheusSpec.disableCompaction }} disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }} {{- end }} @@ -51,7 +60,7 @@ portName: {{ .Values.prometheus.prometheusSpec.portName }} {{- end }} {{- if .Values.prometheus.prometheusSpec.volumes }} -@@ -326,3 +329,4 @@ +@@ -356,3 +359,4 @@ {{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} {{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/alertmanager.rules.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/alertmanager.rules.yaml.patch new file mode 100644 index 000000000..ac73ebd2d --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/alertmanager.rules.yaml.patch @@ -0,0 +1,18 @@ +--- charts-original/templates/prometheus/rules-1.14/alertmanager.rules.yaml ++++ charts/templates/prometheus/rules-1.14/alertmanager.rules.yaml +@@ -7,6 +7,7 @@ + {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }} + {{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} + {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} ++{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }} + apiVersion: monitoring.coreos.com/v1 + kind: PrometheusRule + metadata: +@@ -172,4 +173,5 @@ + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} + {{- end }} +-{{- end }} +\ No newline at end of file ++{{- end }} ++{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch index fa81d2ae6..f647a1983 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch @@ -4,12 +4,12 @@ rules: - expr: |- sum by (cluster, namespace, pod, container) ( -- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) -+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) +- irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) ++ irate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) ) - record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate - expr: |- - container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/kubernetes-storage.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/kubernetes-storage.yaml.patch index f6b8f2f45..c44a7c489 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/kubernetes-storage.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/kubernetes-storage.yaml.patch @@ -1,18 +1,25 @@ --- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml +++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml -@@ -31,9 +31,9 @@ +@@ -31,13 +31,12 @@ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup summary: PersistentVolume is filling up. expr: |- -- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} +- ( +- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} +- / +- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} +- ) < 0.03 + kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} - / -- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} ++ / + kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} - < 0.03 ++ < 0.03 + and +- kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 ++ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 for: 1m labels: -@@ -48,12 +48,12 @@ + severity: critical +@@ -51,14 +50,14 @@ summary: PersistentVolume is filling up. expr: |- ( @@ -23,6 +30,9 @@ + kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} ) < 0.15 and +- kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 ++ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + and - predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 for: 1h diff --git a/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch index 62f839a48..ab5dd038e 100644 --- a/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch @@ -450,7 +450,7 @@ + tag: v1.20.2 + pullPolicy: IfNotPresent rbac: -+ ## Create RBAC resources for ServiceAccounts and users ++ ## Create RBAC resources for ServiceAccounts and users + ## create: true + @@ -463,7 +463,7 @@ pspEnabled: true pspAnnotations: {} ## Specify pod annotations -@@ -187,25 +625,76 @@ +@@ -187,25 +625,77 @@ ## ref: https://prometheus.io/docs/alerting/notifications/ ## https://prometheus.io/docs/alerting/notification_examples/ ## @@ -477,7 +477,7 @@ - # {{- $root := . -}} - # {{ range .Alerts }} - # *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` -- # *Cluster:* {{ template "cluster" $root }} +- # *Cluster:* {{ template "cluster" $root }} - # *Description:* {{ .Annotations.description }} - # *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:> - # *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:> @@ -486,6 +486,7 @@ - # {{ end }} - # {{ end }} - # {{ end }} ++ + templateFiles: + rancher_defaults.tmpl: |- + {{- define "slack.rancher.text" -}} @@ -559,7 +560,7 @@ ingress: enabled: false -@@ -395,7 +884,7 @@ +@@ -397,7 +887,7 @@ ## Image of Alertmanager ## image: @@ -568,7 +569,7 @@ tag: v0.22.2 sha: "" -@@ -507,9 +996,13 @@ +@@ -509,9 +999,13 @@ ## Define resources requests and limits for single Pods. ## ref: https://kubernetes.io/docs/user-guide/compute-resources/ ## @@ -585,7 +586,7 @@ ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. -@@ -613,6 +1106,30 @@ +@@ -625,6 +1119,30 @@ enabled: true namespaceOverride: "" @@ -616,7 +617,7 @@ ## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled ## forceDeployDatasources: false -@@ -625,6 +1142,18 @@ +@@ -637,6 +1155,18 @@ ## defaultDashboardsEnabled: true @@ -632,10 +633,10 @@ + # Ignore if useExistingNamespace is true + cleanupOnUninstall: false + - adminPassword: prom-operator - - ingress: -@@ -664,6 +1193,7 @@ + ## Timezone for the default dashboards + ## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg + ## +@@ -681,6 +1211,7 @@ dashboards: enabled: true label: grafana_dashboard @@ -643,7 +644,7 @@ ## Annotations for Grafana dashboard configmaps ## -@@ -716,7 +1246,60 @@ +@@ -739,7 +1270,60 @@ ## Passed to grafana subchart and used by servicemonitor below ## service: @@ -667,7 +668,7 @@ + image: + repository: rancher/mirrored-library-nginx + tag: 1.21.1-alpine -+ ++ + ## Enable an Specify container in extraContainers. This is meant to allow adding an authentication proxy to a grafana pod + extraContainers: | + - name: grafana-proxy @@ -705,11 +706,10 @@ ## If true, create a serviceMonitor for grafana ## -@@ -746,6 +1329,14 @@ - # targetLabel: nodename +@@ -773,6 +1357,17 @@ # replacement: $1 # action: replace -+ + + resources: + limits: + memory: 200Mi @@ -717,10 +717,14 @@ + requests: + memory: 100Mi + cpu: 100m - ++ ++ testFramework: ++ enabled: false ++ ## Component scraping the kube api server ## -@@ -907,7 +1498,7 @@ + kubeApiServer: +@@ -952,7 +1547,7 @@ ## Component scraping the kube controller manager ## kubeControllerManager: @@ -729,7 +733,7 @@ ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on ## -@@ -1054,7 +1645,7 @@ +@@ -1110,7 +1705,7 @@ ## Component scraping etcd ## kubeEtcd: @@ -738,7 +742,7 @@ ## If your etcd is not deployed as a pod, specify IPs it can be found on ## -@@ -1119,7 +1710,7 @@ +@@ -1177,7 +1772,7 @@ ## Component scraping kube scheduler ## kubeScheduler: @@ -747,7 +751,7 @@ ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on ## -@@ -1177,7 +1768,7 @@ +@@ -1237,7 +1832,7 @@ ## Component scraping kube proxy ## kubeProxy: @@ -756,7 +760,7 @@ ## If your kube proxy is not deployed as a pod, specify IPs it can be found on ## -@@ -1266,6 +1857,13 @@ +@@ -1337,6 +1932,13 @@ create: true podSecurityPolicy: enabled: true @@ -770,7 +774,7 @@ ## Deploy node exporter as a daemonset to all nodes ## -@@ -1319,6 +1917,16 @@ +@@ -1392,6 +1994,16 @@ extraArgs: - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ @@ -787,7 +791,7 @@ ## Manages Prometheus and Alertmanager components ## -@@ -1331,8 +1939,8 @@ +@@ -1404,8 +2016,8 @@ enabled: true # Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants tlsMinVersion: VersionTLS13 @@ -798,16 +802,19 @@ ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted ## rules from making their way into prometheus and potentially preventing the container from starting -@@ -1349,7 +1957,7 @@ +@@ -1422,9 +2034,9 @@ patch: enabled: true image: -- repository: jettech/kube-webhook-certgen -+ repository: rancher/mirrored-jettech-kube-webhook-certgen - tag: v1.5.2 - sha: "" +- repository: k8s.gcr.io/ingress-nginx/kube-webhook-certgen ++ repository: rancher/mirrored-ingress-nginx-kube-webhook-certgen + tag: v1.0 +- sha: "f3b6b39a6062328c095337b4cadcefd1612348fdd5190b1dcbcb9b9e90bd8068" ++ sha: "" pullPolicy: IfNotPresent -@@ -1498,13 +2106,13 @@ + resources: {} + ## Provide a priority class name to the webhook patching job +@@ -1571,13 +2183,13 @@ ## Resource limits & requests ## @@ -828,25 +835,34 @@ # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working -@@ -1557,7 +2165,7 @@ +@@ -1630,7 +2242,7 @@ ## Prometheus-operator image ## image: - repository: quay.io/prometheus-operator/prometheus-operator + repository: rancher/mirrored-prometheus-operator-prometheus-operator - tag: v0.48.0 + tag: v0.50.0 sha: "" pullPolicy: IfNotPresent -@@ -1573,7 +2181,7 @@ +@@ -1646,7 +2258,7 @@ ## Prometheus-config-reloader image to use for config and rule reloading ## prometheusConfigReloaderImage: - repository: quay.io/prometheus-operator/prometheus-config-reloader + repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader - tag: v0.48.0 + tag: v0.50.0 sha: "" -@@ -1659,7 +2267,7 @@ +@@ -1661,7 +2273,7 @@ + ## Thanos side-car image when configured + ## + thanosImage: +- repository: quay.io/thanos/thanos ++ repository: rancher/mirrored-thanos-thanos + tag: v0.17.2 + sha: "" + +@@ -1781,7 +2393,7 @@ port: 9090 ## To be used with a proxy extraContainer port @@ -855,28 +871,16 @@ ## List of IP addresses at which the Prometheus server service is available ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips -@@ -1916,7 +2524,7 @@ +@@ -2054,7 +2666,7 @@ ## Image of Prometheus. ## image: - repository: quay.io/prometheus/prometheus + repository: rancher/mirrored-prometheus-prometheus - tag: v2.27.1 + tag: v2.28.1 sha: "" -@@ -1979,6 +2587,11 @@ - ## - externalUrl: "" - -+ ## Ignore NamespaceSelector settings from the PodMonitor and ServiceMonitor configs -+ ## If true, PodMonitors and ServiceMonitors can only discover Pods and Services within the namespace they are deployed into -+ ## -+ ignoreNamespaceSelectors: false -+ - ## Define which Nodes the Pods are scheduled on. - ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - ## -@@ -2011,7 +2624,7 @@ +@@ -2149,7 +2761,7 @@ ## prometheus resource to be created with selectors based on values in the helm deployment, ## which will also match the PrometheusRule resources created ## @@ -885,7 +889,7 @@ ## PrometheusRules to be selected for target discovery. ## If {}, select all PrometheusRules -@@ -2036,7 +2649,7 @@ +@@ -2174,7 +2786,7 @@ ## prometheus resource to be created with selectors based on values in the helm deployment, ## which will also match the servicemonitors created ## @@ -894,7 +898,7 @@ ## ServiceMonitors to be selected for target discovery. ## If {}, select all ServiceMonitors -@@ -2059,7 +2672,7 @@ +@@ -2197,7 +2809,7 @@ ## prometheus resource to be created with selectors based on values in the helm deployment, ## which will also match the podmonitors created ## @@ -903,7 +907,7 @@ ## PodMonitors to be selected for target discovery. ## If {}, select all PodMonitors -@@ -2190,9 +2803,13 @@ +@@ -2328,9 +2940,13 @@ ## Resource limits & requests ## @@ -912,7 +916,7 @@ - # memory: 400Mi + resources: + limits: -+ memory: 1500Mi ++ memory: 3000Mi + cpu: 1000m + requests: + memory: 750Mi @@ -920,7 +924,7 @@ ## Prometheus StorageSpec for persistent data ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md -@@ -2215,7 +2832,13 @@ +@@ -2353,7 +2969,13 @@ # medium: Memory # Additional volumes on the output StatefulSet definition. @@ -935,9 +939,9 @@ # Additional VolumeMounts on the output StatefulSet definition. volumeMounts: [] -@@ -2322,9 +2945,34 @@ - ## - thanos: {} +@@ -2475,9 +3097,34 @@ + # fileName: "objstore.yaml" + # objectStorageConfigFile: /var/secrets/object-store.yaml + proxy: + image: @@ -945,7 +949,7 @@ + tag: 1.21.1-alpine + ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod. - ## if using proxy extraContainer update targetPort with proxy container port + ## if using proxy extraContainer update targetPort with proxy container port - containers: [] + containers: | + - name: prometheus-proxy @@ -971,7 +975,7 @@ ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes ## (permissions, dir tree) on mounted volumes before starting prometheus -@@ -2332,7 +2980,7 @@ +@@ -2485,7 +3132,7 @@ ## PortName to use for Prometheus. ## diff --git a/packages/rancher-monitoring/package.yaml b/packages/rancher-monitoring/package.yaml index 60478ef7f..658c588f9 100644 --- a/packages/rancher-monitoring/package.yaml +++ b/packages/rancher-monitoring/package.yaml @@ -1,10 +1,11 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/kube-prometheus-stack -commit: ba91bdb2d79ca4419cf72078f5f4bfcc426d4599 -version: 100.0.1 +commit: c6208979d494156a3869d2e5faab669ce4301c68 +version: 100.1.0 additionalCharts: - workingDir: charts-crd crdOptions: templateDirectory: crd-template crdDirectory: crd-manifest addCRDValidationToMainChart: true + useTarArchive: true diff --git a/packages/rancher-monitoring/templates/crd-template/templates/jobs.yaml b/packages/rancher-monitoring/templates/crd-template/templates/jobs.yaml index 6167ddbe2..55ed791ae 100644 --- a/packages/rancher-monitoring/templates/crd-template/templates/jobs.yaml +++ b/packages/rancher-monitoring/templates/crd-template/templates/jobs.yaml @@ -49,8 +49,10 @@ spec: - -c - > echo "Applying CRDs..."; - kubectl apply -f /etc/config/crd-manifest.yaml; - + mkdir -p /etc/crd; + base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd; + kubectl apply -Rf /etc/crd; + echo "Waiting for CRDs to be recognized before finishing installation..."; {{- range $path, $_ := (.Files.Glob "crd-manifest/**.yaml") }} @@ -68,7 +70,13 @@ spec: mountPath: /etc/config restartPolicy: OnFailure nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + {{- if .Values.nodeSelector }} + {{- toYaml .Values.nodeSelector | nindent 8 }} + {{- end }} tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + {{- if .Values.tolerations }} + {{- toYaml .Values.tolerations | nindent 8 }} + {{- end }} volumes: - name: crd-manifest configMap: @@ -121,14 +129,23 @@ spec: - /bin/sh - -c - > - kubectl delete -f /etc/config/crd-manifest.yaml + echo "Deleting CRDs..."; + mkdir -p /etc/crd; + base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd; + kubectl delete -Rf /etc/crd; volumeMounts: - name: crd-manifest readOnly: true mountPath: /etc/config restartPolicy: OnFailure nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + {{- if .Values.nodeSelector }} + {{- toYaml .Values.nodeSelector | nindent 8 }} + {{- end }} tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + {{- if .Values.tolerations }} + {{- toYaml .Values.tolerations | nindent 8 }} + {{- end }} volumes: - name: crd-manifest configMap: diff --git a/packages/rancher-monitoring/templates/crd-template/templates/manifest.yaml b/packages/rancher-monitoring/templates/crd-template/templates/manifest.yaml index 31016b6ef..8dc9dfb44 100644 --- a/packages/rancher-monitoring/templates/crd-template/templates/manifest.yaml +++ b/packages/rancher-monitoring/templates/crd-template/templates/manifest.yaml @@ -4,11 +4,5 @@ metadata: name: {{ .Chart.Name }}-manifest namespace: {{ .Release.Namespace }} data: - crd-manifest.yaml: | - {{- $currentScope := . -}} - {{- $crds := (.Files.Glob "crd-manifest/**.yaml") -}} - {{- range $path, $_ := $crds -}} - {{- with $currentScope -}} - {{ .Files.Get $path | nindent 4 }} - --- - {{- end -}}{{- end -}} + crd-manifest.tgz.b64: + {{- .Files.Get "files/crd-manifest.tgz" | b64enc | indent 4 }} diff --git a/packages/rancher-monitoring/templates/crd-template/values.yaml b/packages/rancher-monitoring/templates/crd-template/values.yaml index 129d13914..a42750182 100644 --- a/packages/rancher-monitoring/templates/crd-template/values.yaml +++ b/packages/rancher-monitoring/templates/crd-template/values.yaml @@ -9,3 +9,7 @@ global: image: repository: rancher/shell tag: v0.1.8 + +nodeSelector: {} + +tolerations: [] diff --git a/packages/rancher-node-exporter/generated-changes/exclude/ci/port-values.yaml b/packages/rancher-node-exporter/generated-changes/exclude/ci/port-values.yaml new file mode 100644 index 000000000..dbfb4b67f --- /dev/null +++ b/packages/rancher-node-exporter/generated-changes/exclude/ci/port-values.yaml @@ -0,0 +1,3 @@ +service: + targetPort: 9102 + port: 9102 diff --git a/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch b/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch index 8a570e2ae..436ee0b91 100644 --- a/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch @@ -7,11 +7,16 @@ + catalog.rancher.io/release-name: rancher-node-exporter + catalog.cattle.io/hidden: "true" + catalog.cattle.io/os: linux - apiVersion: v1 - appVersion: 1.1.2 + apiVersion: v2 + appVersion: 1.2.2 description: A Helm chart for prometheus node-exporter -name: prometheus-node-exporter +name: rancher-node-exporter - version: 1.18.1 + version: 2.2.0 + type: application home: https://github.com/prometheus/node_exporter/ - sources: +@@ -16,3 +22,4 @@ + name: gianrubio + - name: vsliouniaev + - name: bismarck ++ diff --git a/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch b/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch index c2844c29c..84a966ffe 100644 --- a/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch +++ b/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch @@ -1,6 +1,6 @@ --- charts-original/templates/daemonset.yaml +++ charts/templates/daemonset.yaml -@@ -35,7 +35,7 @@ +@@ -36,7 +36,7 @@ {{- end }} containers: - name: node-exporter @@ -9,7 +9,7 @@ imagePullPolicy: {{ .Values.image.pullPolicy }} args: - --path.procfs=/host/proc -@@ -133,18 +133,18 @@ +@@ -134,18 +134,18 @@ affinity: {{ toYaml .Values.affinity | indent 8 }} {{- end }} diff --git a/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch b/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch index af47a4e7a..4a8d34565 100644 --- a/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch @@ -12,10 +12,10 @@ image: - repository: quay.io/prometheus/node-exporter + repository: rancher/mirrored-prometheus-node-exporter - tag: v1.1.2 + tag: v1.2.2 pullPolicy: IfNotPresent -@@ -126,6 +131,8 @@ +@@ -135,6 +140,8 @@ tolerations: - effect: NoSchedule operator: Exists diff --git a/packages/rancher-node-exporter/package.yaml b/packages/rancher-node-exporter/package.yaml index ab4afc92a..02ffc8499 100644 --- a/packages/rancher-node-exporter/package.yaml +++ b/packages/rancher-node-exporter/package.yaml @@ -1,4 +1,5 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/prometheus-node-exporter -commit: a05f7b8888d6174827b815aa097d64b94f00af3e +commit: cd13facd9c6042f7f15978024572cf297fbac6c0 version: 100.0.0 +doNotRelease: true diff --git a/packages/rancher-prometheus-adapter/generated-changes/exclude/ci/default-values.yaml b/packages/rancher-prometheus-adapter/generated-changes/exclude/ci/default-values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/packages/rancher-prometheus-adapter/generated-changes/exclude/ci/external-rules-values.yaml b/packages/rancher-prometheus-adapter/generated-changes/exclude/ci/external-rules-values.yaml new file mode 100644 index 000000000..2dafb5621 --- /dev/null +++ b/packages/rancher-prometheus-adapter/generated-changes/exclude/ci/external-rules-values.yaml @@ -0,0 +1,9 @@ +rules: + external: + - seriesQuery: '{__name__=~"^some_metric_count$"}' + resources: + template: <<.Resource>> + name: + matches: "" + as: "my_custom_metric" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) diff --git a/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch b/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch index 2d97a05e5..57dbb424d 100644 --- a/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch @@ -10,6 +10,6 @@ apiVersion: v1 -name: prometheus-adapter +name: rancher-prometheus-adapter - version: 2.14.0 - appVersion: v0.8.4 + version: 2.17.0 + appVersion: v0.9.0 description: A Helm chart for k8s prometheus adapter diff --git a/packages/rancher-prometheus-adapter/generated-changes/patch/templates/deployment.yaml.patch b/packages/rancher-prometheus-adapter/generated-changes/patch/templates/deployment.yaml.patch index a6d4a4b0a..187b7e6f5 100644 --- a/packages/rancher-prometheus-adapter/generated-changes/patch/templates/deployment.yaml.patch +++ b/packages/rancher-prometheus-adapter/generated-changes/patch/templates/deployment.yaml.patch @@ -1,6 +1,6 @@ --- charts-original/templates/deployment.yaml +++ charts/templates/deployment.yaml -@@ -40,7 +40,7 @@ +@@ -41,7 +41,7 @@ {{- end}} containers: - name: {{ .Chart.Name }} @@ -9,7 +9,7 @@ imagePullPolicy: {{ .Values.image.pullPolicy }} args: - /adapter -@@ -102,13 +102,17 @@ +@@ -105,8 +105,10 @@ name: volume-serving-cert readOnly: true {{- end }} @@ -22,6 +22,10 @@ affinity: {{- toYaml .Values.affinity | nindent 8 }} priorityClassName: {{ .Values.priorityClassName }} +@@ -114,8 +116,10 @@ + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- end }} - tolerations: - {{- toYaml .Values.tolerations | nindent 8 }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} diff --git a/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch b/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch index d28b16a4e..be5b3d20e 100644 --- a/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch @@ -9,8 +9,8 @@ affinity: {} image: -- repository: directxman12/k8s-prometheus-adapter-amd64 -+ repository: rancher/mirrored-directxman12-k8s-prometheus-adapter - tag: v0.8.4 +- repository: k8s.gcr.io/prometheus-adapter/prometheus-adapter ++ repository: rancher/mirrored-prometheus-adapter-prometheus-adapter + tag: v0.9.0 pullPolicy: IfNotPresent diff --git a/packages/rancher-prometheus-adapter/package.yaml b/packages/rancher-prometheus-adapter/package.yaml index f7e775914..916792a7e 100644 --- a/packages/rancher-prometheus-adapter/package.yaml +++ b/packages/rancher-prometheus-adapter/package.yaml @@ -1,4 +1,5 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/prometheus-adapter -commit: 50b719af447594abbae7beeb4d6458a19f8e9689 +commit: 843d2f3d302ca24d745abf539a86ec680ae4f8c0 version: 100.0.0 +doNotRelease: true diff --git a/packages/rancher-pushprox/charts/Chart.yaml b/packages/rancher-pushprox/charts/Chart.yaml index c54b3c733..11ff9e404 100644 --- a/packages/rancher-pushprox/charts/Chart.yaml +++ b/packages/rancher-pushprox/charts/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 -version: 0.1.4 +version: 0.1.5 appVersion: 0.1.0 annotations: catalog.rancher.io/certified: rancher diff --git a/packages/rancher-pushprox/charts/templates/_helpers.tpl b/packages/rancher-pushprox/charts/templates/_helpers.tpl index 458ad21cd..ba2e225a6 100644 --- a/packages/rancher-pushprox/charts/templates/_helpers.tpl +++ b/packages/rancher-pushprox/charts/templates/_helpers.tpl @@ -82,7 +82,6 @@ k8s-app: {{ template "pushProxy.proxy.name" . }} {{- define "pushProxy.serviceMonitor.labels" -}} app: {{ template "pushprox.serviceMonitor.name" . }} -release: {{ .Release.Name | quote }} {{ template "pushProxy.commonLabels" . }} {{- end -}} diff --git a/packages/rancher-pushprox/package.yaml b/packages/rancher-pushprox/package.yaml index 3b5eacdf6..98913ea9a 100644 --- a/packages/rancher-pushprox/package.yaml +++ b/packages/rancher-pushprox/package.yaml @@ -1,2 +1,2 @@ url: local -version: 100.0.0 +version: 100.0.1 diff --git a/packages/rancher-windows-exporter/charts/values.yaml b/packages/rancher-windows-exporter/charts/values.yaml index aa1fd1973..7085ebf2f 100755 --- a/packages/rancher-windows-exporter/charts/values.yaml +++ b/packages/rancher-windows-exporter/charts/values.yaml @@ -25,7 +25,7 @@ clients: port: 9796 image: repository: rancher/windows_exporter-package - tag: v0.0.2 + tag: v0.0.3 os: "windows" # Specify the IP addresses of nodes that you want to collect metrics from diff --git a/packages/rancher-windows-exporter/package.yaml b/packages/rancher-windows-exporter/package.yaml index 3b5eacdf6..7a5d89cef 100644 --- a/packages/rancher-windows-exporter/package.yaml +++ b/packages/rancher-windows-exporter/package.yaml @@ -1,2 +1,3 @@ url: local version: 100.0.0 +doNotRelease: true