(dev-v2.6-archive) [rancher-monitoring-crd] code refactor

(partially cherry picked from commit cfc2433953)
pull/1680/head
Jiaqi Luo 2021-11-19 12:12:52 -07:00 committed by Arvind Iyengar
parent 7f2f418b2c
commit 5f1dde5a21
No known key found for this signature in database
GPG Key ID: A8DD9BFD6C811498
101 changed files with 936 additions and 2335 deletions

View File

@ -1,2 +1,2 @@
url: https://github.com/rancher/backup-restore-operator/releases/download/v2.1.0-rc1/rancher-backup-crd-2.1.0-rc1.tgz url: https://github.com/rancher/backup-restore-operator/releases/download/v2.0.1/rancher-backup-crd-2.0.1.tgz
version: 2.1.0 version: 2.0.1

View File

@ -1,2 +1,2 @@
url: https://github.com/rancher/backup-restore-operator/releases/download/v2.1.0-rc1/rancher-backup-2.1.0-rc1.tgz url: https://github.com/rancher/backup-restore-operator/releases/download/v2.0.1/rancher-backup-2.0.1.tgz
version: 2.1.0 version: 2.0.1

View File

@ -8,7 +8,7 @@ image:
tag: v1.0.6 tag: v1.0.6
securityScan: securityScan:
repository: rancher/security-scan repository: rancher/security-scan
tag: v0.2.5-rc2 tag: v0.2.5-rc1
sonobuoy: sonobuoy:
repository: rancher/mirrored-sonobuoy-sonobuoy repository: rancher/mirrored-sonobuoy-sonobuoy
tag: v0.53.2 tag: v0.53.2

View File

@ -1,7 +1,7 @@
--- charts-original/Chart.yaml --- charts-original/Chart.yaml
+++ charts/Chart.yaml +++ charts/Chart.yaml
@@ -4,12 +4,12 @@ @@ -3,12 +3,12 @@
catalog.cattle.io/kube-version: < 1.22.0 catalog.cattle.io/display-name: External IP Webhook
catalog.cattle.io/namespace: cattle-externalip-system catalog.cattle.io/namespace: cattle-externalip-system
catalog.cattle.io/os: linux catalog.cattle.io/os: linux
- catalog.cattle.io/release-name: rancher-externalip-webhook - catalog.cattle.io/release-name: rancher-externalip-webhook
@ -9,14 +9,14 @@
+ catalog.cattle.io/release-name: rancher-external-ip-webhook + catalog.cattle.io/release-name: rancher-external-ip-webhook
+ catalog.cattle.io/ui-component: rancher-external-ip-webhook + catalog.cattle.io/ui-component: rancher-external-ip-webhook
apiVersion: v1 apiVersion: v1
appVersion: v1.0.1-rc2 appVersion: v1.0.1-rc1
description: | description: |
- Deploy the externalip-webhook to mitigate k8s CVE-2020-8554 - Deploy the externalip-webhook to mitigate k8s CVE-2020-8554
+ Deploy the external-ip-webhook to mitigate k8s CVE-2020-8554 + Deploy the external-ip-webhook to mitigate k8s CVE-2020-8554
home: https://github.com/rancher/externalip-webhook home: https://github.com/rancher/externalip-webhook
keywords: keywords:
- cve - cve
@@ -20,7 +20,7 @@ @@ -19,7 +19,7 @@
maintainers: maintainers:
- email: raul@rancher.com - email: raul@rancher.com
name: rawmind0 name: rawmind0
@ -24,4 +24,4 @@
+name: rancher-external-ip-webhook +name: rancher-external-ip-webhook
sources: sources:
- https://github.com/rancher/externalip-webhook - https://github.com/rancher/externalip-webhook
version: 1.0.1-rc2 version: 1.0.1-rc1

View File

@ -1,2 +1,2 @@
url: https://github.com/rancher/externalip-webhook/releases/download/v1.0.1-rc2/rancher-externalip-webhook-1.0.1-rc2.tgz url: https://github.com/rancher/externalip-webhook/releases/download/v1.0.1-rc1/rancher-externalip-webhook-1.0.1-rc1.tgz
version: 100.0.1 version: 100.0.1

View File

@ -10,9 +10,9 @@
apiVersion: v2 apiVersion: v2
-name: grafana -name: grafana
+name: rancher-grafana +name: rancher-grafana
version: 6.11.0 version: 6.16.14
-appVersion: 7.5.5 -appVersion: 8.2.1
+appVersion: 7.5.8 +appVersion: 7.5.11
kubeVersion: '^1.8.0-0' kubeVersion: '^1.8.0-0'
description: The leading tool for querying and visualizing time series and metrics. description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net home: https://grafana.net

View File

@ -0,0 +1,24 @@
--- charts-original/README.md
+++ charts/README.md
@@ -59,8 +59,8 @@
| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` |
| `priorityClassName` | Name of Priority Class to assign pods | `nil` |
| `image.repository` | Image repository | `grafana/grafana` |
-| `image.tag` | Image tag (`Must be >= 5.0.0`) | `8.0.3` |
-| `image.sha` | Image sha (optional) | `80c6d6ac633ba5ab3f722976fb1d9a138f87ca6a9934fcd26a5fc28cbde7dbfa` |
+| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.5.11` |
+| `image.sha` | Image sha (optional) | `` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `{}` |
| `service.enabled` | Enable grafana service | `true` |
@@ -188,8 +188,8 @@
| `rbac.extraRoleRules` | Additional rules to add to the Role | [] |
| `rbac.extraClusterRoleRules` | Additional rules to add to the ClusterRole | [] |
| `command` | Define command to be executed by grafana container at startup | `nil` |
-| `testFramework.enabled` | Whether to create test-related resources | `true` |
-| `testFramework.image` | `test-framework` image repository. | `bats/bats` |
+| `testFramework.enabled` | Whether to create test-related resources | `false` |
+| `testFramework.image` | `test-framework` image repository. | `rancher/mirrored-bats-bats` |
| `testFramework.tag` | `test-framework` image tag. | `v1.1.0` |
| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` |
| `testFramework.securityContext` | `test-framework` securityContext | `{}` |

View File

@ -5,7 +5,7 @@
{{- define "grafana.pod" -}} {{- define "grafana.pod" -}}
{{- if .Values.schedulerName }} {{- if .Values.schedulerName }}
schedulerName: "{{ .Values.schedulerName }}" schedulerName: "{{ .Values.schedulerName }}"
@@ -21,9 +20,9 @@ @@ -22,9 +21,9 @@
{{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }} {{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }}
- name: init-chown-data - name: init-chown-data
{{- if .Values.initChownData.image.sha }} {{- if .Values.initChownData.image.sha }}
@ -17,7 +17,7 @@
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.initChownData.image.pullPolicy }} imagePullPolicy: {{ .Values.initChownData.image.pullPolicy }}
securityContext: securityContext:
@@ -42,9 +41,9 @@ @@ -43,9 +42,9 @@
{{- if .Values.dashboards }} {{- if .Values.dashboards }}
- name: download-dashboards - name: download-dashboards
{{- if .Values.downloadDashboardsImage.sha }} {{- if .Values.downloadDashboardsImage.sha }}
@ -29,7 +29,7 @@
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }} imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }}
command: ["/bin/sh"] command: ["/bin/sh"]
@@ -79,11 +78,16 @@ @@ -80,11 +79,16 @@
{{- if .Values.sidecar.datasources.enabled }} {{- if .Values.sidecar.datasources.enabled }}
- name: {{ template "grafana.name" . }}-sc-datasources - name: {{ template "grafana.name" . }}-sc-datasources
{{- if .Values.sidecar.image.sha }} {{- if .Values.sidecar.image.sha }}
@ -48,7 +48,7 @@
env: env:
- name: METHOD - name: METHOD
value: LIST value: LIST
@@ -118,9 +122,9 @@ @@ -119,9 +123,9 @@
{{- if .Values.sidecar.notifiers.enabled }} {{- if .Values.sidecar.notifiers.enabled }}
- name: {{ template "grafana.name" . }}-sc-notifiers - name: {{ template "grafana.name" . }}-sc-notifiers
{{- if .Values.sidecar.image.sha }} {{- if .Values.sidecar.image.sha }}
@ -60,7 +60,7 @@
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
@@ -163,9 +167,9 @@ @@ -165,9 +169,9 @@
{{- if .Values.sidecar.dashboards.enabled }} {{- if .Values.sidecar.dashboards.enabled }}
- name: {{ template "grafana.name" . }}-sc-dashboard - name: {{ template "grafana.name" . }}-sc-dashboard
{{- if .Values.sidecar.image.sha }} {{- if .Values.sidecar.image.sha }}
@ -72,7 +72,7 @@
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env: env:
@@ -205,9 +209,9 @@ @@ -207,9 +211,9 @@
{{- end}} {{- end}}
- name: {{ .Chart.Name }} - name: {{ .Chart.Name }}
{{- if .Values.image.sha }} {{- if .Values.image.sha }}
@ -84,7 +84,7 @@
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.command }} {{- if .Values.command }}
@@ -307,7 +311,7 @@ @@ -315,7 +319,7 @@
{{- end }} {{- end }}
ports: ports:
- name: {{ .Values.service.portName }} - name: {{ .Values.service.portName }}
@ -93,7 +93,7 @@
protocol: TCP protocol: TCP
- name: {{ .Values.podPortName }} - name: {{ .Values.podPortName }}
containerPort: 3000 containerPort: 3000
@@ -388,17 +392,17 @@ @@ -402,17 +406,17 @@
{{- with .Values.extraContainers }} {{- with .Values.extraContainers }}
{{ tpl . $ | indent 2 }} {{ tpl . $ | indent 2 }}
{{- end }} {{- end }}

View File

@ -21,7 +21,7 @@
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.imageRenderer.image.pullPolicy }} imagePullPolicy: {{ .Values.imageRenderer.image.pullPolicy }}
{{- if .Values.imageRenderer.command }} {{- if .Values.imageRenderer.command }}
@@ -97,16 +99,16 @@ @@ -101,16 +103,16 @@
resources: resources:
{{ toYaml . | indent 12 }} {{ toYaml . | indent 12 }}
{{- end }} {{- end }}

View File

@ -1,7 +1,7 @@
--- charts-original/templates/podsecuritypolicy.yaml --- charts-original/templates/podsecuritypolicy.yaml
+++ charts/templates/podsecuritypolicy.yaml +++ charts/templates/podsecuritypolicy.yaml
@@ -6,13 +6,9 @@ @@ -5,13 +5,9 @@
namespace: {{ template "grafana.namespace" . }} name: {{ template "grafana.fullname" . }}
labels: labels:
{{- include "grafana.labels" . | nindent 4 }} {{- include "grafana.labels" . | nindent 4 }}
- annotations: - annotations:

View File

@ -0,0 +1,11 @@
--- charts-original/templates/tests/test.yaml
+++ charts/templates/tests/test.yaml
@@ -33,7 +33,7 @@
{{- end }}
containers:
- name: {{ .Release.Name }}-test
- image: "{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
imagePullPolicy: "{{ .Values.testFramework.imagePullPolicy}}"
command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"]
volumeMounts:

View File

@ -1,12 +1,10 @@
--- charts-original/values.yaml --- charts-original/values.yaml
+++ charts/values.yaml +++ charts/values.yaml
@@ -1,9 +1,25 @@ @@ -1,9 +1,23 @@
+global: +global:
+ cattle: + cattle:
+ systemDefaultRegistry: "" + systemDefaultRegistry: ""
+ +
+autoscaling:
+ enabled: false
rbac: rbac:
create: true create: true
## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true) ## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true)
@ -27,22 +25,24 @@
namespaced: false namespaced: false
extraRoleRules: [] extraRoleRules: []
# - apiGroups: [] # - apiGroups: []
@@ -68,8 +84,8 @@ @@ -69,8 +83,8 @@
# schedulerName: "default-scheduler" # schedulerName: "default-scheduler"
image: image:
- repository: grafana/grafana - repository: grafana/grafana
- tag: 7.5.5 - tag: 8.2.1
+ repository: rancher/mirrored-grafana-grafana + repository: rancher/mirrored-grafana-grafana
+ tag: 7.5.8 + tag: 7.5.11
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -82,12 +98,15 @@ @@ -82,13 +96,16 @@
# - myRegistrKeySecretName
testFramework: testFramework:
enabled: true - enabled: true
- image: "bats/bats" - image: "bats/bats"
+ enabled: false
+ image: "rancher/mirrored-bats-bats" + image: "rancher/mirrored-bats-bats"
tag: "v1.1.0" tag: "v1.1.0"
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
@ -56,7 +56,7 @@
runAsUser: 472 runAsUser: 472
runAsGroup: 472 runAsGroup: 472
fsGroup: 472 fsGroup: 472
@@ -115,8 +134,8 @@ @@ -116,8 +133,8 @@
# priorityClassName: # priorityClassName:
downloadDashboardsImage: downloadDashboardsImage:
@ -67,7 +67,16 @@
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -301,7 +320,7 @@ @@ -189,7 +206,7 @@
labels: {}
path: /
- # pathType is only for k8s >= 1.1=
+ # pathType is only for k8s >= 1.18
pathType: Prefix
hosts:
@@ -303,7 +320,7 @@
## initChownData container image ## initChownData container image
## ##
image: image:
@ -76,18 +85,16 @@
tag: "1.31.1" tag: "1.31.1"
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -602,8 +621,8 @@ @@ -614,7 +631,7 @@
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards ## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar: sidecar:
image: image:
- repository: quay.io/kiwigrid/k8s-sidecar - repository: quay.io/kiwigrid/k8s-sidecar
- tag: 1.10.7
+ repository: rancher/mirrored-kiwigrid-k8s-sidecar + repository: rancher/mirrored-kiwigrid-k8s-sidecar
+ tag: 1.12.2 tag: 1.12.3
sha: "" sha: ""
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
resources: {} @@ -702,9 +719,9 @@
@@ -690,9 +709,9 @@
replicas: 1 replicas: 1
image: image:
# image-renderer Image repository # image-renderer Image repository

View File

@ -1,4 +1,5 @@
url: https://github.com/grafana/helm-charts.git url: https://github.com/grafana/helm-charts.git
subdirectory: charts/grafana subdirectory: charts/grafana
commit: bf7e1110a5ee9258190d0377fea319bb8e764e62 commit: ad033c2a2b1f81284a924fb8627ca00700952fc5
version: 100.0.0 version: 100.0.0
doNotRelease: true

View File

@ -8,16 +8,16 @@
{{- end }} {{- end }}
{{- if .Values.priorityClassName }} {{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }}
@@ -173,7 +174,7 @@ @@ -107,7 +108,7 @@
readOnly: true readOnly: true
{{- end }} {{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
- image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: "{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}"
ports: ports:
- containerPort: 8080 - containerPort: {{ .Values.service.port | default 8080}}
{{- if .Values.selfMonitor.enabled }} {{- if .Values.selfMonitor.enabled }}
@@ -207,12 +208,12 @@ @@ -141,12 +142,12 @@
affinity: affinity:
{{ toYaml .Values.affinity | indent 8 }} {{ toYaml .Values.affinity | indent 8 }}
{{- end }} {{- end }}

View File

@ -0,0 +1,9 @@
--- charts-original/templates/role.yaml
+++ charts/templates/role.yaml
@@ -1,3 +1,6 @@
+{{- if not (kindIs "slice" .Values.collectors) }}
+{{- fail "Collectors need to be a List since kube-state-metrics chart 3.2.2. Please check README for more information."}}
+{{- end }}
{{- if and (eq .Values.rbac.create true) (not .Values.rbac.useExistingRole) -}}
{{- range (split "," .Values.namespaces) }}
---

View File

@ -10,10 +10,10 @@
image: image:
- repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics - repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics
+ repository: rancher/mirrored-kube-state-metrics-kube-state-metrics + repository: rancher/mirrored-kube-state-metrics-kube-state-metrics
tag: v2.0.0 tag: v2.2.0
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -84,6 +88,7 @@ @@ -86,6 +90,7 @@
securityContext: securityContext:
enabled: true enabled: true

View File

@ -1,4 +1,5 @@
url: https://github.com/prometheus-community/helm-charts.git url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/kube-state-metrics subdirectory: charts/kube-state-metrics
commit: 086f1f7f0870e110abf30aa6bfe7c141e83cc950 commit: 3f371027f2c384cb2e58b46b2249b6bfa200b1e7
version: 100.0.0 version: 100.0.0
doNotRelease: true

View File

@ -1,12 +1,12 @@
dependencies: dependencies:
- name: kube-state-metrics - name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
version: 3.1.1 version: 3.5.2
- name: prometheus-node-exporter - name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts repository: https://prometheus-community.github.io/helm-charts
version: 1.18.1 version: 2.0.4
- name: grafana - name: grafana
repository: https://grafana.github.io/helm-charts repository: https://grafana.github.io/helm-charts
version: 6.12.0 version: 6.16.10
digest: sha256:11886645ff1ade77d0fefdca90afba4a92f2b535997280074a59828e8d1dab4e digest: sha256:94dad976ca1630e9e3cd006fadb255783387b53bd9d0d19e105bd39d8e8e34be
generated: "2021-06-09T16:56:40.364303181+02:00" generated: "2021-09-28T10:26:46.319411+07:00"

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Fetch dashboards from provided urls into this chart.""" """Fetch dashboards from provided urls into this chart."""
import json import json
import re
import textwrap import textwrap
from os import makedirs, path from os import makedirs, path
@ -26,16 +27,18 @@ def change_style(style, representer):
# Source files list # Source files list
charts = [ charts = [
{ {
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml', 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml',
'destination': '../templates/grafana/dashboards-1.14', 'destination': '../templates/grafana/dashboards-1.14',
'type': 'yaml', 'type': 'yaml',
'min_kubernetes': '1.14.0-0' 'min_kubernetes': '1.14.0-0',
'multicluster_key': '.Values.grafana.sidecar.dashboards.multicluster.global.enabled',
}, },
{ {
'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/grafana.json', 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/grafana.json',
'destination': '../templates/grafana/dashboards-1.14', 'destination': '../templates/grafana/dashboards-1.14',
'type': 'json', 'type': 'json',
'min_kubernetes': '1.14.0-0' 'min_kubernetes': '1.14.0-0',
'multicluster_key': '(or .Values.grafana.sidecar.dashboards.multicluster.global.enabled .Values.grafana.sidecar.dashboards.multicluster.etcd.enabled)'
}, },
] ]
@ -64,7 +67,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }} namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
name: {{ printf "%%s-%%s" (include "kube-prometheus-stack.fullname" $) "%(name)s" | trunc 63 | trimSuffix "-" }} name: {{ printf "%%s-%%s" (include "kube-prometheus-stack.fullname" $) "%(name)s" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@ -103,20 +106,29 @@ def yaml_str_repr(struct, indent=2):
text = textwrap.indent(text, ' ' * indent) text = textwrap.indent(text, ' ' * indent)
return text return text
def patch_dashboards_json(content, multicluster_key):
def patch_json_for_multicluster_configuration(content):
try: try:
content_struct = json.loads(content) content_struct = json.loads(content)
# multicluster
overwrite_list = [] overwrite_list = []
for variable in content_struct['templating']['list']: for variable in content_struct['templating']['list']:
if variable['name'] == 'cluster': if variable['name'] == 'cluster':
variable['hide'] = ':multicluster:' variable['hide'] = ':multicluster:'
overwrite_list.append(variable) overwrite_list.append(variable)
content_struct['templating']['list'] = overwrite_list content_struct['templating']['list'] = overwrite_list
# fix drilldown links. See https://github.com/kubernetes-monitoring/kubernetes-mixin/issues/659
for row in content_struct['rows']:
for panel in row['panels']:
for style in panel.get('styles', []):
if 'linkUrl' in style and style['linkUrl'].startswith('./d'):
style['linkUrl'] = style['linkUrl'].replace('./d', '/d')
content_array = [] content_array = []
original_content_lines = content.split('\n') original_content_lines = content.split('\n')
for i, line in enumerate(json.dumps(content_struct, indent=4).split('\n')): for i, line in enumerate(json.dumps(content_struct, indent=4).split('\n')):
if ('[]' not in line and '{}' not in line) or line == original_content_lines[i]: if (' []' not in line and ' {}' not in line) or line == original_content_lines[i]:
content_array.append(line) content_array.append(line)
continue continue
@ -136,7 +148,7 @@ def patch_json_for_multicluster_configuration(content):
if multicluster != -1: if multicluster != -1:
content = ''.join(( content = ''.join((
content[:multicluster-1], content[:multicluster-1],
'\{\{ if .Values.grafana.sidecar.dashboards.multicluster \}\}0\{\{ else \}\}2\{\{ end \}\}', '\{\{ if %s \}\}0\{\{ else \}\}2\{\{ end \}\}' % multicluster_key,
content[multicluster + 15:] content[multicluster + 15:]
)) ))
except (ValueError, KeyError): except (ValueError, KeyError):
@ -145,7 +157,12 @@ def patch_json_for_multicluster_configuration(content):
return content return content
def write_group_to_file(resource_name, content, url, destination, min_kubernetes, max_kubernetes): def patch_json_set_timezone_as_variable(content):
# content is no more in json format, so we have to replace using regex
return re.sub(r'"timezone"\s*:\s*"(?:\\.|[^\"])*"', '"timezone": "\{\{ .Values.grafana.defaultDashboardsTimezone \}\}"', content, flags=re.IGNORECASE)
def write_group_to_file(resource_name, content, url, destination, min_kubernetes, max_kubernetes, multicluster_key):
# initialize header # initialize header
lines = header % { lines = header % {
'name': resource_name, 'name': resource_name,
@ -155,7 +172,8 @@ def write_group_to_file(resource_name, content, url, destination, min_kubernetes
'max_kubernetes': max_kubernetes 'max_kubernetes': max_kubernetes
} }
content = patch_json_for_multicluster_configuration(content) content = patch_dashboards_json(content, multicluster_key)
content = patch_json_set_timezone_as_variable(content)
filename_struct = {resource_name + '.json': (LiteralStr(content))} filename_struct = {resource_name + '.json': (LiteralStr(content))}
# rules themselves # rules themselves
@ -196,17 +214,17 @@ def main():
groups = yaml_text['items'] groups = yaml_text['items']
for group in groups: for group in groups:
for resource, content in group['data'].items(): for resource, content in group['data'].items():
write_group_to_file(resource.replace('.json', ''), content, chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes']) write_group_to_file(resource.replace('.json', ''), content, chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key'])
elif chart['type'] == 'json': elif chart['type'] == 'json':
json_text = json.loads(raw_text) json_text = json.loads(raw_text)
# is it already a dashboard structure or is it nested (etcd case)? # is it already a dashboard structure or is it nested (etcd case)?
flat_structure = bool(json_text.get('annotations')) flat_structure = bool(json_text.get('annotations'))
if flat_structure: if flat_structure:
resource = path.basename(chart['source']).replace('.json', '') resource = path.basename(chart['source']).replace('.json', '')
write_group_to_file(resource, json.dumps(json_text, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes']) write_group_to_file(resource, json.dumps(json_text, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key'])
else: else:
for resource, content in json_text.items(): for resource, content in json_text.items():
write_group_to_file(resource.replace('.json', ''), json.dumps(content, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes']) write_group_to_file(resource.replace('.json', ''), json.dumps(content, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key'])
print("Finished") print("Finished")

View File

@ -25,7 +25,37 @@ def change_style(style, representer):
# Source files list # Source files list
charts = [ charts = [
{ {
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml', 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/alertmanager-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-state-metrics-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-operator-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14', 'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0' 'min_kubernetes': '1.14.0-0'
}, },
@ -34,18 +64,6 @@ charts = [
'destination': '../templates/prometheus/rules-1.14', 'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0' 'min_kubernetes': '1.14.0-0'
}, },
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml',
'destination': '../templates/prometheus/rules',
'min_kubernetes': '1.10.0-0',
'max_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml',
'destination': '../templates/prometheus/rules',
'min_kubernetes': '1.10.0-0',
'max_kubernetes': '1.14.0-0'
},
] ]
# Additional conditions map # Additional conditions map
@ -93,6 +111,7 @@ alert_condition_map = {
'NodeExporterDown': '.Values.nodeExporter.enabled', 'NodeExporterDown': '.Values.nodeExporter.enabled',
'CoreDNSDown': '.Values.kubeDns.enabled', 'CoreDNSDown': '.Values.kubeDns.enabled',
'AlertmanagerDown': '.Values.alertmanager.enabled', 'AlertmanagerDown': '.Values.alertmanager.enabled',
'AggregatedAPIDown': 'semverCompare ">=1.18.0-0" $kubeTargetVersion',
} }
replacement_map = { replacement_map = {

View File

@ -1,63 +0,0 @@
{{- /*
Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
annotations:
message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync.
expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerFailedReload
annotations:
message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerMembersInconsistent
annotations:
message: Alertmanager has not found all other members of the cluster.
expr: |-
alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,179 +0,0 @@
{{- /*
Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: etcd
rules:
- alert: etcdInsufficientMembers
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
for: 3m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdNoLeader
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.'
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
for: 1m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfLeaderChanges
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.'
expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
/
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
/
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 5
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdGRPCRequestsSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
> 0.15
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdMemberCommunicationSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.15
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedProposals
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighFsyncDurations
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.5
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighCommitDurations
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.25
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
annotations:
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.01
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
annotations:
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.05
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHTTPRequestsSlow
annotations:
message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow.
expr: |-
histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
> 0.15
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,56 +0,0 @@
{{- /*
Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: general.rules
rules:
- alert: TargetDown
annotations:
message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.'
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: Watchdog
annotations:
message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
'
expr: vector(1)
labels:
severity: none
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,83 +0,0 @@
{{- /*
Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: k8s.rules
rules:
- expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |-
sum by (namespace, pod_name, container_name) (
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
)
record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
- expr: |-
sum by(namespace) (
kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
* on (endpoint, instance, job, namespace, pod, service)
group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
)
record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
- expr: |-
sum by (namespace) (
kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
* on (endpoint, instance, job, namespace, pod, service)
group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
)
record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
- expr: |-
sum(
label_replace(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
"replicaset", "$1", "owner_name", "(.*)"
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
labels:
workload_type: deployment
record: mixin_pod_workload
- expr: |-
sum(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
labels:
workload_type: daemonset
record: mixin_pod_workload
- expr: |-
sum(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
labels:
workload_type: statefulset
record: mixin_pod_workload
{{- end }}

View File

@ -1,39 +0,0 @@
{{- /*
Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-apiserver.rules
rules:
- expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
{{- end }}

View File

@ -1,47 +0,0 @@
{{- /*
Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeAlerting }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-prometheus-node-alerting.rules
rules:
- alert: NodeDiskRunningFull
annotations:
message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 24 hours.
expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)'
for: 30m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NodeDiskRunningFull
annotations:
message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 2 hours.
expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)'
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,41 +0,0 @@
{{- /*
Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
record: instance:node_cpu:rate:sum
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
record: instance:node_filesystem_usage:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
record: cluster:node_cpu:ratio
{{- end }}

View File

@ -1,63 +0,0 @@
{{- /*
Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-scheduler.rules
rules:
- expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
{{- end }}

View File

@ -1,159 +0,0 @@
{{- /*
Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesAbsent }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-absent
rules:
{{- if .Values.alertmanager.enabled }}
- alert: AlertmanagerDown
annotations:
message: Alertmanager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerdown
expr: absent(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeDns.enabled }}
- alert: CoreDNSDown
annotations:
message: CoreDNS has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-corednsdown
expr: absent(up{job="kube-dns"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeApiServer.enabled }}
- alert: KubeAPIDown
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown
expr: absent(up{job="apiserver"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeControllerManager.enabled }}
- alert: KubeControllerManagerDown
annotations:
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown
expr: absent(up{job="kube-controller-manager"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeScheduler.enabled }}
- alert: KubeSchedulerDown
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown
expr: absent(up{job="kube-scheduler"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeStateMetrics.enabled }}
- alert: KubeStateMetricsDown
annotations:
message: KubeStateMetrics has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsdown
expr: absent(up{job="kube-state-metrics"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.prometheusOperator.kubeletService.enabled }}
- alert: KubeletDown
annotations:
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown
expr: absent(up{job="kubelet"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.nodeExporter.enabled }}
- alert: NodeExporterDown
annotations:
message: NodeExporter has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeexporterdown
expr: absent(up{job="node-exporter"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
- alert: PrometheusDown
annotations:
message: Prometheus has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusdown
expr: absent(up{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- if .Values.prometheusOperator.enabled }}
- alert: PrometheusOperatorDown
annotations:
message: PrometheusOperator has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoperatordown
expr: absent(up{job="{{ $operatorJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -1,200 +0,0 @@
{{- /*
Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-apps
rules:
- alert: KubePodCrashLooping
annotations:
message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0
for: 1h
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePodNotReady
annotations:
message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0
for: 1h
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDeploymentGenerationMismatch
annotations:
message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
expr: |-
kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDeploymentReplicasMismatch
annotations:
message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
expr: |-
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 1h
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetReplicasMismatch
annotations:
message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
expr: |-
kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetGenerationMismatch
annotations:
message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
expr: |-
kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetUpdateNotRolledOut
annotations:
message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
expr: |-
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
*
(
kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetRolloutStuck
annotations:
message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
expr: |-
kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
/
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetNotScheduled
annotations:
message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
expr: |-
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
-
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetMisScheduled
annotations:
message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeCronJobRunning
annotations:
message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning
expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeJobCompletion
annotations:
message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeJobFailed
annotations:
message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,121 +0,0 @@
{{- /*
Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-resources
rules:
- alert: KubeCPUOvercommit
annotations:
message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
expr: |-
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
/
sum(node:node_num_cpu:sum)
>
(count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeMemOvercommit
annotations:
message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
expr: |-
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
/
sum(node_memory_MemTotal_bytes)
>
(count(node:node_num_cpu:sum)-1)
/
count(node:node_num_cpu:sum)
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeCPUOvercommit
annotations:
message: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
expr: |-
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
/
sum(node:node_num_cpu:sum)
> 1.5
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeMemOvercommit
annotations:
message: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
expr: |-
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/
sum(node_memory_MemTotal_bytes{job="node-exporter"})
> 1.5
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeQuotaExceeded
annotations:
message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded
expr: |-
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
> 90
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: CPUThrottlingHigh
annotations:
message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh
expr: |-
100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace)
/
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace)
> 25
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,72 +0,0 @@
{{- /*
Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-storage
rules:
- alert: KubePersistentVolumeUsageCritical
annotations:
message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical
expr: |-
100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
< 3
for: 1m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePersistentVolumeFullInFourDays
annotations:
message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} printf "%0.2f" $value {{`}}`}}% is available.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays
expr: |-
100 * (
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
) < 15
and
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePersistentVolumeErrors
annotations:
message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,184 +0,0 @@
{{- /*
Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-system
rules:
- alert: KubeNodeNotReady
annotations:
message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than an hour.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready
expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeVersionMismatch
annotations:
message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch
expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientErrors
annotations:
message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}}% errors.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
expr: |-
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
/
sum(rate(rest_client_requests_total[5m])) by (instance, job))
* 100 > 1
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientErrors
annotations:
message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}} errors / second.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeletTooManyPods
annotations:
message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods
expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPILatencyHigh
annotations:
message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPILatencyHigh
annotations:
message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientCertificateExpiration
annotations:
message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientCertificateExpiration
annotations:
message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,57 +0,0 @@
{{- /*
Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: node-network
rules:
- alert: NetworkReceiveErrors
annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing receive errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NetworkTransmitErrors
annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing transmit errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NodeNetworkInterfaceFlapping
annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,37 +0,0 @@
{{- /*
Generated from 'node-time' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.time }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-time" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: node-time
rules:
- alert: ClockSkewDetected
annotations:
message: Clock skew detected on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}. Ensure NTP is configured correctly on this host.
expr: abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,202 +0,0 @@
{{- /*
Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: node.rules
rules:
- expr: sum(min(kube_pod_info) by (node))
record: ':kube_pod_info_node_count:'
- expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
record: 'node_namespace_pod:kube_pod_info:'
- expr: |-
count by (node) (sum by (node, cpu) (
node_cpu_seconds_total{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
))
record: node:node_num_cpu:sum
- expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
record: :node_cpu_utilisation:avg1m
- expr: |-
1 - avg by (node) (
rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:)
record: node:node_cpu_utilisation:avg1m
- expr: |-
node:node_cpu_utilisation:avg1m
*
node:node_num_cpu:sum
/
scalar(sum(node:node_num_cpu:sum))
record: node:cluster_cpu_utilisation:ratio
- expr: |-
sum(node_load1{job="node-exporter"})
/
sum(node:node_num_cpu:sum)
record: ':node_cpu_saturation_load1:'
- expr: |-
sum by (node) (
node_load1{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
/
node:node_num_cpu:sum
record: 'node:node_cpu_saturation_load1:'
- expr: |-
1 -
sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
/
sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: ':node_memory_utilisation:'
- expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
record: :node_memory_MemFreeCachedBuffers_bytes:sum
- expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: :node_memory_MemTotal_bytes:sum
- expr: |-
sum by (node) (
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_bytes_available:sum
- expr: |-
sum by (node) (
node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_bytes_total:sum
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
/
node:node_memory_bytes_total:sum
record: node:node_memory_utilisation:ratio
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
/
scalar(sum(node:node_memory_bytes_total:sum))
record: node:cluster_memory_utilisation:ratio
- expr: |-
1e3 * sum(
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
)
record: :node_memory_swap_io_bytes:sum_rate
- expr: |-
1 -
sum by (node) (
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
/
sum by (node) (
node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: 'node:node_memory_utilisation:'
- expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
record: 'node:node_memory_utilisation_2:'
- expr: |-
1e3 * sum by (node) (
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_swap_io_bytes:sum_rate
- expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_utilisation:avg_irate
- expr: |-
avg by (node) (
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_utilisation:avg_irate
- expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_saturation:avg_irate
- expr: |-
avg by (node) (
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_saturation:avg_irate
- expr: |-
max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_usage:'
- expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_avail:'
- expr: |-
sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
record: :node_net_utilisation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_net_utilisation:sum_irate
- expr: |-
sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
record: :node_net_saturation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_net_saturation:sum_irate
- expr: |-
max(
max(
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
label_replace(
(max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
)
) by (node)
record: 'node:node_inodes_total:'
- expr: |-
max(
max(
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
label_replace(
(max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
)
) by (node)
record: 'node:node_inodes_free:'
{{- end }}

View File

@ -1,49 +0,0 @@
{{- /*
Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: prometheus-operator
rules:
- alert: PrometheusOperatorReconcileErrors
annotations:
message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusOperatorNodeLookupErrors
annotations:
message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,139 +0,0 @@
{{- /*
Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: prometheus.rules
rules:
- alert: PrometheusConfigReloadFailed
annotations:
description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
summary: Reloading Prometheus' configuration failed
expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotificationQueueRunningFull
annotations:
description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
summary: Prometheus' alert notification queue is running full
expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlerts
annotations:
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
summary: Errors while sending alert from Prometheus
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlerts
annotations:
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
summary: Errors while sending alerts from Prometheus
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
summary: Prometheus is not connected to any Alertmanagers
expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBReloadsFailing
annotations:
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.'
summary: Prometheus has issues reloading data blocks from disk
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
for: 12h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBCompactionsFailing
annotations:
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.'
summary: Prometheus has issues compacting sample blocks
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
for: 12h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBWALCorruptions
annotations:
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
summary: Prometheus write-ahead log is corrupted
expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
for: 4h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
summary: Prometheus isn't ingesting samples
expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTargetScrapesDuplicate
annotations:
description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
summary: Prometheus has many samples rejected
expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,97 @@
suite: test ingress
templates:
- alertmanager/ingress.yaml
tests:
- it: should be empty if alertmanager is not enabled
set:
alertmanager.enabled: false
alertmanager.ingress.enabled: true
asserts:
- hasDocuments:
count: 0
- it: should be empty if ingress is not enabled
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: false
asserts:
- hasDocuments:
count: 0
- it: should have apiVersion extensions/v1beta1 for k8s < 1.16
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 15
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: extensions/v1beta1
- it: should have apiVersion networking.k8s.io/v1beta1 for k8s >= 1.16 < 1.19
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 16
apiVersions:
- networking.k8s.io/v1beta1
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: networking.k8s.io/v1beta1
- it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.19 < 1.22
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 10
apiVersions:
- networking.k8s.io/v1
- networking.k8s.io/v1beta1
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: networking.k8s.io/v1beta1
- it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.22
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 22
apiVersions:
- networking.k8s.io/v1
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: networking.k8s.io/v1
- it: should have explicit pathType ImplementationSpecific for networking.k8s.io/v1 by default
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 19
apiVersions:
- networking.k8s.io/v1
asserts:
- hasDocuments:
count: 1
- equal:
path: spec.rules[0].http.paths[0].pathType
value: ImplementationSpecific

View File

@ -13,3 +13,17 @@ The chart installs the following components:
- [Prometheus Adapter](https://github.com/helm/charts/tree/master/stable/prometheus-adapter) - The adapter allows a user to expose custom metrics, resource metrics, and external metrics on the default [Prometheus](https://prometheus.io/) instance to the Kubernetes API Server. - [Prometheus Adapter](https://github.com/helm/charts/tree/master/stable/prometheus-adapter) - The adapter allows a user to expose custom metrics, resource metrics, and external metrics on the default [Prometheus](https://prometheus.io/) instance to the Kubernetes API Server.
For more information, review the Helm README of this chart. For more information, review the Helm README of this chart.
## Upgrading from 100.0.0+up16.6.0 to 100.1.0+up19.0.3
### Noticeable changes:
Grafana:
- `sidecar.dashboards.searchNamespace`, `sidecar.datasources.searchNamespace` and `sidecar.notifiers.searchNamespace` support a list of namespaces now.
Kube-state-metrics
- the type of `collectors` is changed from Dictionary to List.
- `kubeStateMetrics.serviceMonitor.namespaceOverride` was replaced by `kube-state-metrics.namespaceOverride`.
### Known issues:
- Occasionally, the upgrade fails with errors related to the webhook `prometheusrulemutate.monitoring.coreos.com`. This is a known issue in the upstream, and the workaround is to trigger the upgrade one more time. [32416](https://github.com/rancher/rancher/issues/32416#issuecomment-828881726)

View File

@ -43,7 +43,6 @@ metadata:
rbac.authorization.k8s.io/aggregate-to-edit: "true" rbac.authorization.k8s.io/aggregate-to-edit: "true"
{{- end }} {{- end }}
rules: rules:
rules:
- apiGroups: - apiGroups:
- monitoring.coreos.com - monitoring.coreos.com
resources: resources:

View File

@ -1,6 +1,6 @@
--- charts-original/Chart.yaml --- charts-original/Chart.yaml
+++ charts/Chart.yaml +++ charts/Chart.yaml
@@ -1,3 +1,35 @@ @@ -1,3 +1,38 @@
+apiVersion: v2 +apiVersion: v2
+description: Collects several related Helm charts, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator. +description: Collects several related Helm charts, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
+icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png +icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png
@ -20,12 +20,15 @@
+ - name: Arvind + - name: Arvind
+ email: arvind.iyengar@suse.com + email: arvind.iyengar@suse.com
+ url: "" + url: ""
+ - name: Jack
+ email: jiaqi.luo@suse.com
+ url: "https://github.com/jiaqiluo"
+name: rancher-monitoring +name: rancher-monitoring
+sources: +sources:
+ - https://github.com/prometheus-community/helm-charts + - https://github.com/prometheus-community/helm-charts
+ - https://github.com/prometheus-operator/kube-prometheus + - https://github.com/prometheus-operator/kube-prometheus
+version: 16.6.0 +version: 19.0.3
+appVersion: 0.48.0 +appVersion: 0.50.0
+kubeVersion: ">=1.16.0-0" +kubeVersion: ">=1.16.0-0"
+home: https://github.com/prometheus-operator/kube-prometheus +home: https://github.com/prometheus-operator/kube-prometheus
+keywords: +keywords:
@ -36,12 +39,12 @@
annotations: annotations:
artifacthub.io/links: | artifacthub.io/links: |
- name: Chart Source - name: Chart Source
@@ -5,8 +37,16 @@ @@ -5,8 +40,19 @@
- name: Upstream Project - name: Upstream Project
url: https://github.com/prometheus-operator/kube-prometheus url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true" artifacthub.io/operator: "true"
-apiVersion: v2 -apiVersion: v2
-appVersion: 0.48.0 -appVersion: 0.50.0
+ catalog.cattle.io/certified: rancher + catalog.cattle.io/certified: rancher
+ catalog.cattle.io/namespace: cattle-monitoring-system + catalog.cattle.io/namespace: cattle-monitoring-system
+ catalog.cattle.io/release-name: rancher-monitoring + catalog.cattle.io/release-name: rancher-monitoring
@ -52,10 +55,13 @@
+ catalog.cattle.io/auto-install: rancher-monitoring-crd=match + catalog.cattle.io/auto-install: rancher-monitoring-crd=match
+ catalog.cattle.io/requests-cpu: "4500m" + catalog.cattle.io/requests-cpu: "4500m"
+ catalog.cattle.io/requests-memory: "4000Mi" + catalog.cattle.io/requests-memory: "4000Mi"
+ catalog.cattle.io/rancher-version: ">= 2.6.0-0 <=2.6.99-0"
+ catalog.cattle.io/kube-version: ">=1.16.0-0"
+ catalog.cattle.io/upstream-version: "19.0.3"
dependencies: dependencies:
- condition: grafana.enabled - condition: grafana.enabled
name: grafana name: grafana
@@ -71,34 +111,6 @@ @@ -71,34 +117,6 @@
- condition: rkeScheduler.enabled - condition: rkeScheduler.enabled
name: rkeScheduler name: rkeScheduler
repository: file://./charts/rkeScheduler repository: file://./charts/rkeScheduler
@ -90,4 +96,4 @@
-- https://github.com/prometheus-community/helm-charts -- https://github.com/prometheus-community/helm-charts
-- https://github.com/prometheus-operator/kube-prometheus -- https://github.com/prometheus-operator/kube-prometheus
-type: application -type: application
-version: 16.6.0 -version: 19.0.3

View File

@ -1,6 +1,6 @@
--- charts-original/README.md --- charts-original/README.md
+++ charts/README.md +++ charts/README.md
@@ -193,7 +193,39 @@ @@ -230,7 +230,39 @@
helm show values prometheus-community/kube-prometheus-stack helm show values prometheus-community/kube-prometheus-stack
``` ```

View File

@ -1,11 +0,0 @@
--- charts-original/charts/windowsExporter/values.yaml
+++ charts/charts/windowsExporter/values.yaml
@@ -25,7 +25,7 @@
port: 9796
image:
repository: rancher/windows_exporter-package
- tag: v0.0.2
+ tag: v0.0.3
os: "windows"
# Specify the IP addresses of nodes that you want to collect metrics from

View File

@ -25,7 +25,7 @@
{{- end }} {{- end }}
paused: {{ .Values.alertmanager.alertmanagerSpec.paused }} paused: {{ .Values.alertmanager.alertmanagerSpec.paused }}
@@ -104,8 +106,8 @@ @@ -104,8 +106,8 @@
- {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]} - {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]}
{{- end }} {{- end }}
{{- end }} {{- end }}
+ tolerations: {{ include "linux-node-tolerations" . | nindent 4 }} + tolerations: {{ include "linux-node-tolerations" . | nindent 4 }}

View File

@ -20,7 +20,7 @@
{{ toYaml .Values.alertmanager.secret.annotations | indent 4 }} {{ toYaml .Values.alertmanager.secret.annotations | indent 4 }}
{{- end }} {{- end }}
labels: labels:
@@ -20,4 +26,4 @@ @@ -24,4 +30,4 @@
{{- range $key, $val := .Values.alertmanager.templateFiles }} {{- range $key, $val := .Values.alertmanager.templateFiles }}
{{ $key }}: {{ $val | b64enc | quote }} {{ $key }}: {{ $val | b64enc | quote }}
{{- end }} {{- end }}

View File

@ -0,0 +1,10 @@
--- charts-original/templates/exporters/kube-state-metrics/serviceMonitor.yaml
+++ charts/templates/exporters/kube-state-metrics/serviceMonitor.yaml
@@ -1,4 +1,7 @@
{{- if .Values.kubeStateMetrics.enabled }}
+{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
+{{- fail "kubeStateMetrics.serviceMonitor.namespaceOverride was removed. Please use kube-state-metrics.namespaceOverride instead." }}
+{{- end }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:

View File

@ -4,7 +4,7 @@
kind: ConfigMap kind: ConfigMap
metadata: metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }}
- namespace: {{ template "kube-prometheus-stack.namespace" $ }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" $ }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
labels: labels:
{{- if $.Values.grafana.sidecar.dashboards.label }} {{- if $.Values.grafana.sidecar.dashboards.label }}

View File

@ -4,7 +4,7 @@
kind: ConfigMap kind: ConfigMap
metadata: metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ default .Values.grafana.sidecar.datasources.searchNamespace (include "kube-prometheus-stack.namespace" .) }} + namespace: {{ default .Values.grafana.sidecar.datasources.searchNamespace (include "kube-prometheus-stack.namespace" .) }}
{{- if .Values.grafana.sidecar.datasources.annotations }} {{- if .Values.grafana.sidecar.datasources.annotations }}
annotations: annotations:

View File

@ -0,0 +1,23 @@
--- charts-original/templates/grafana/dashboards-1.14/alertmanager-overview.yaml
+++ charts/templates/grafana/dashboards-1.14/alertmanager-overview.yaml
@@ -5,10 +5,11 @@
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
+{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -607,4 +608,5 @@
"uid": "alertmanager-overview",
"version": 0
}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,8 +4,17 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1831,7 +1831,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -10,7 +10,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
@ -32,29 +32,29 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", - "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -282,7 +287,7 @@ @@ -282,7 +287,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", - "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", + "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -388,7 +393,7 @@ @@ -388,7 +393,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -494,28 +499,28 @@ @@ -494,28 +499,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -133,12 +133,21 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1100,7 +1105,7 @@
"options": [
],
- "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1126,7 +1131,7 @@ @@ -1126,7 +1131,7 @@
"options": [ "options": [
], ],
- "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", - "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)",
+ "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)", + "query": "label_values(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,

View File

@ -10,7 +10,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,8 +4,17 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -2973,7 +2973,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -10,13 +10,13 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -107,7 +108,7 @@ @@ -87,7 +88,7 @@
"tableColumn": "", "pluginVersion": "7",
"targets": [ "targets": [
{ {
- "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", - "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
@ -24,8 +24,8 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",
@@ -191,7 +192,7 @@ @@ -144,7 +145,7 @@
"tableColumn": "", "pluginVersion": "7",
"targets": [ "targets": [
{ {
- "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
@ -33,8 +33,8 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -275,7 +276,7 @@ @@ -201,7 +202,7 @@
"tableColumn": "", "pluginVersion": "7",
"targets": [ "targets": [
{ {
- "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})", - "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
@ -42,8 +42,8 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -359,7 +360,7 @@ @@ -258,7 +259,7 @@
"tableColumn": "", "pluginVersion": "7",
"targets": [ "targets": [
{ {
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
@ -51,8 +51,8 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -443,7 +444,7 @@ @@ -315,7 +316,7 @@
"tableColumn": "", "pluginVersion": "7",
"targets": [ "targets": [
{ {
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", - "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
@ -60,8 +60,8 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -527,7 +528,7 @@ @@ -372,7 +373,7 @@
"tableColumn": "", "pluginVersion": "7",
"targets": [ "targets": [
{ {
- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))", - "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
@ -69,7 +69,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -609,7 +610,7 @@ @@ -431,7 +432,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -78,7 +78,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -702,7 +703,7 @@ @@ -526,7 +527,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -87,7 +87,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -808,7 +809,7 @@ @@ -621,7 +622,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -96,7 +96,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -914,14 +915,14 @@ @@ -716,14 +717,14 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -113,7 +113,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} worker", "legendFormat": "{{`{{`}}instance{{`}}`}} worker",
@@ -1014,14 +1015,14 @@ @@ -818,14 +819,14 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -130,7 +130,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} worker", "legendFormat": "{{`{{`}}instance{{`}}`}} worker",
@@ -1129,7 +1130,7 @@ @@ -922,7 +923,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -139,7 +139,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1224,7 +1225,7 @@ @@ -1019,7 +1020,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -148,7 +148,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1332,7 +1333,7 @@ @@ -1116,7 +1117,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -157,7 +157,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1438,7 +1439,7 @@ @@ -1211,7 +1212,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -166,7 +166,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}operation_type{{`}}`}}", "legendFormat": "{{`{{`}}operation_type{{`}}`}}",
@@ -1531,7 +1532,7 @@ @@ -1306,7 +1307,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -175,7 +175,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -1638,7 +1639,7 @@ @@ -1402,7 +1403,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -184,7 +184,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1731,7 +1732,7 @@ @@ -1497,7 +1498,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -193,7 +193,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1837,7 +1838,7 @@ @@ -1592,7 +1593,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -202,7 +202,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1943,28 +1944,28 @@ @@ -1687,28 +1688,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -235,7 +235,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "5xx", "legendFormat": "5xx",
@@ -2070,7 +2071,7 @@ @@ -1803,7 +1804,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -244,7 +244,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}",
@@ -2176,7 +2177,7 @@ @@ -1898,7 +1899,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -253,7 +253,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2269,7 +2270,7 @@ @@ -1993,7 +1994,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -262,7 +262,7 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2362,7 +2363,7 @@ @@ -2088,7 +2089,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
@ -271,7 +271,16 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2482,7 +2483,7 @@ @@ -2177,7 +2178,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -2203,7 +2204,7 @@
"options": [ "options": [
], ],
@ -280,7 +289,7 @@
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 1, "sort": 1,
@@ -2530,4 +2531,5 @@ @@ -2251,4 +2252,5 @@
"uid": "3138fa155d5915769fbded898ac09fd9", "uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0 "version": 0
} }

View File

@ -4,8 +4,17 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1301,7 +1301,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -4,8 +4,17 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1541,7 +1541,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,8 +4,17 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1033,7 +1033,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -10,7 +10,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -10,7 +10,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }}
annotations: annotations:
@ -32,68 +32,68 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A" "refId": "A"
}, },
{ {
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B" "refId": "B"
}, },
{ {
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C" "refId": "C"
}, },
{ {
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", - "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", + "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
@@ -290,28 +295,28 @@ @@ -290,28 +295,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A" "refId": "A"
}, },
{ {
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B" "refId": "B"
}, },
{ {
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C" "refId": "C"
}, },
{ {
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", - "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", + "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume", "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
@@ -417,28 +422,28 @@ @@ -417,28 +422,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -172,6 +172,15 @@
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}", "legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1023,7 +1028,7 @@
"options": [
],
- "query": "label_values(up{job=\"kube-scheduler\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1049,7 +1054,7 @@ @@ -1049,7 +1054,7 @@
"options": [ "options": [

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "statefulset" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "statefulset" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }} - namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }}
annotations: annotations:

View File

@ -22,9 +22,21 @@
- - --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }} - - --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}
+ - --prometheus-config-reloader={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }} + - --prometheus-config-reloader={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}
{{- end }} {{- end }}
- --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }} - --config-reloader-cpu-request={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }} - --config-reloader-cpu-limit={{ .Values.prometheusOperator.configReloaderCpu }}
@@ -130,16 +130,16 @@ @@ -81,9 +81,9 @@
- --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }}
{{- end }}
{{- if .Values.prometheusOperator.thanosImage.sha }}
- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
+ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
{{- else }}
- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
+ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
{{- end }}
{{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }}
- --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }}
@@ -137,16 +137,16 @@
hostNetwork: true hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet dnsPolicy: ClusterFirstWithHostNet
{{- end }} {{- end }}

View File

@ -1,15 +1,24 @@
--- charts-original/templates/prometheus/prometheus.yaml --- charts-original/templates/prometheus/prometheus.yaml
+++ charts/templates/prometheus/prometheus.yaml +++ charts/templates/prometheus/prometheus.yaml
@@ -32,7 +32,7 @@ @@ -33,13 +33,13 @@
{{ toYaml .Values.prometheus.prometheusSpec.apiserverConfig | indent 4}}
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.image }} {{- if .Values.prometheus.prometheusSpec.image }}
- image: {{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }} {{- if and .Values.prometheus.prometheusSpec.image.tag .Values.prometheus.prometheusSpec.image.sha }}
+ image: {{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }} - image: "{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
{{- else if .Values.prometheus.prometheusSpec.image.sha }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
{{- else if .Values.prometheus.prometheusSpec.image.tag }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}"
{{- else }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}"
{{- end }}
version: {{ .Values.prometheus.prometheusSpec.image.tag }} version: {{ .Values.prometheus.prometheusSpec.image.tag }}
{{- if .Values.prometheus.prometheusSpec.image.sha }} {{- if .Values.prometheus.prometheusSpec.image.sha }}
sha: {{ .Values.prometheus.prometheusSpec.image.sha }} @@ -64,11 +64,13 @@
@@ -56,11 +56,13 @@
externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}" externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}"
{{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }} {{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }}
externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}" externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}"
@ -24,7 +33,7 @@
{{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }} {{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }}
{{- end }} {{- end }}
paused: {{ .Values.prometheus.prometheusSpec.paused }} paused: {{ .Values.prometheus.prometheusSpec.paused }}
@@ -232,8 +234,8 @@ @@ -244,8 +246,8 @@
- {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]} - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]}
{{- end }} {{- end }}
{{- end }} {{- end }}
@ -34,7 +43,7 @@
{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }} {{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }} {{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }}
@@ -266,7 +268,7 @@ @@ -284,7 +286,7 @@
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.containers }} {{- if .Values.prometheus.prometheusSpec.containers }}
containers: containers:
@ -43,7 +52,7 @@
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.initContainers }} {{- if .Values.prometheus.prometheusSpec.initContainers }}
initContainers: initContainers:
@@ -282,6 +284,7 @@ @@ -300,6 +302,7 @@
{{- if .Values.prometheus.prometheusSpec.disableCompaction }} {{- if .Values.prometheus.prometheusSpec.disableCompaction }}
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }} disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
{{- end }} {{- end }}
@ -51,7 +60,7 @@
portName: {{ .Values.prometheus.prometheusSpec.portName }} portName: {{ .Values.prometheus.prometheusSpec.portName }}
{{- end }} {{- end }}
{{- if .Values.prometheus.prometheusSpec.volumes }} {{- if .Values.prometheus.prometheusSpec.volumes }}
@@ -326,3 +329,4 @@ @@ -356,3 +359,4 @@
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} {{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
{{- end }} {{- end }}

View File

@ -0,0 +1,18 @@
--- charts-original/templates/prometheus/rules-1.14/alertmanager.rules.yaml
+++ charts/templates/prometheus/rules-1.14/alertmanager.rules.yaml
@@ -7,6 +7,7 @@
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
+{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
@@ -172,4 +173,5 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}

View File

@ -4,12 +4,12 @@
rules: rules:
- expr: |- - expr: |-
sum by (cluster, namespace, pod, container) ( sum by (cluster, namespace, pod, container) (
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) - irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) + irate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
) )
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
- expr: |- - expr: |-
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""} + container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}

View File

@ -1,18 +1,25 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml --- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml +++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml
@@ -31,9 +31,9 @@ @@ -31,13 +31,12 @@
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: |- expr: |-
- (
- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} - kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} - /
/
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
- ) < 0.03
+ kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ /
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
< 0.03 + < 0.03
and
- kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
for: 1m for: 1m
labels: labels:
@@ -48,12 +48,12 @@ severity: critical
@@ -51,14 +50,14 @@
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: |- expr: |-
( (
@ -23,6 +30,9 @@
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
) < 0.15 ) < 0.15
and and
- kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
and
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
for: 1h for: 1h

View File

@ -463,7 +463,7 @@
pspEnabled: true pspEnabled: true
pspAnnotations: {} pspAnnotations: {}
## Specify pod annotations ## Specify pod annotations
@@ -187,25 +625,76 @@ @@ -187,25 +625,77 @@
## ref: https://prometheus.io/docs/alerting/notifications/ ## ref: https://prometheus.io/docs/alerting/notifications/
## https://prometheus.io/docs/alerting/notification_examples/ ## https://prometheus.io/docs/alerting/notification_examples/
## ##
@ -486,6 +486,7 @@
- # {{ end }} - # {{ end }}
- # {{ end }} - # {{ end }}
- # {{ end }} - # {{ end }}
+
+ templateFiles: + templateFiles:
+ rancher_defaults.tmpl: |- + rancher_defaults.tmpl: |-
+ {{- define "slack.rancher.text" -}} + {{- define "slack.rancher.text" -}}
@ -559,7 +560,7 @@
ingress: ingress:
enabled: false enabled: false
@@ -395,7 +884,7 @@ @@ -397,7 +887,7 @@
## Image of Alertmanager ## Image of Alertmanager
## ##
image: image:
@ -568,7 +569,7 @@
tag: v0.22.2 tag: v0.22.2
sha: "" sha: ""
@@ -507,9 +996,13 @@ @@ -509,9 +999,13 @@
## Define resources requests and limits for single Pods. ## Define resources requests and limits for single Pods.
## ref: https://kubernetes.io/docs/user-guide/compute-resources/ ## ref: https://kubernetes.io/docs/user-guide/compute-resources/
## ##
@ -585,7 +586,7 @@
## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
@@ -613,6 +1106,30 @@ @@ -625,6 +1119,30 @@
enabled: true enabled: true
namespaceOverride: "" namespaceOverride: ""
@ -616,7 +617,7 @@
## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled ## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled
## ##
forceDeployDatasources: false forceDeployDatasources: false
@@ -625,6 +1142,18 @@ @@ -637,6 +1155,18 @@
## ##
defaultDashboardsEnabled: true defaultDashboardsEnabled: true
@ -632,10 +633,10 @@
+ # Ignore if useExistingNamespace is true + # Ignore if useExistingNamespace is true
+ cleanupOnUninstall: false + cleanupOnUninstall: false
+ +
adminPassword: prom-operator ## Timezone for the default dashboards
## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg
ingress: ##
@@ -664,6 +1193,7 @@ @@ -681,6 +1211,7 @@
dashboards: dashboards:
enabled: true enabled: true
label: grafana_dashboard label: grafana_dashboard
@ -643,7 +644,7 @@
## Annotations for Grafana dashboard configmaps ## Annotations for Grafana dashboard configmaps
## ##
@@ -716,7 +1246,60 @@ @@ -739,7 +1270,60 @@
## Passed to grafana subchart and used by servicemonitor below ## Passed to grafana subchart and used by servicemonitor below
## ##
service: service:
@ -705,11 +706,10 @@
## If true, create a serviceMonitor for grafana ## If true, create a serviceMonitor for grafana
## ##
@@ -746,6 +1329,14 @@ @@ -773,6 +1357,17 @@
# targetLabel: nodename
# replacement: $1 # replacement: $1
# action: replace # action: replace
+
+ resources: + resources:
+ limits: + limits:
+ memory: 200Mi + memory: 200Mi
@ -717,10 +717,14 @@
+ requests: + requests:
+ memory: 100Mi + memory: 100Mi
+ cpu: 100m + cpu: 100m
+
+ testFramework:
+ enabled: false
+
## Component scraping the kube api server ## Component scraping the kube api server
## ##
@@ -907,7 +1498,7 @@ kubeApiServer:
@@ -952,7 +1547,7 @@
## Component scraping the kube controller manager ## Component scraping the kube controller manager
## ##
kubeControllerManager: kubeControllerManager:
@ -729,7 +733,7 @@
## If your kube controller manager is not deployed as a pod, specify IPs it can be found on ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
## ##
@@ -1054,7 +1645,7 @@ @@ -1110,7 +1705,7 @@
## Component scraping etcd ## Component scraping etcd
## ##
kubeEtcd: kubeEtcd:
@ -738,7 +742,7 @@
## If your etcd is not deployed as a pod, specify IPs it can be found on ## If your etcd is not deployed as a pod, specify IPs it can be found on
## ##
@@ -1119,7 +1710,7 @@ @@ -1177,7 +1772,7 @@
## Component scraping kube scheduler ## Component scraping kube scheduler
## ##
kubeScheduler: kubeScheduler:
@ -747,7 +751,7 @@
## If your kube scheduler is not deployed as a pod, specify IPs it can be found on ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
## ##
@@ -1177,7 +1768,7 @@ @@ -1237,7 +1832,7 @@
## Component scraping kube proxy ## Component scraping kube proxy
## ##
kubeProxy: kubeProxy:
@ -756,7 +760,7 @@
## If your kube proxy is not deployed as a pod, specify IPs it can be found on ## If your kube proxy is not deployed as a pod, specify IPs it can be found on
## ##
@@ -1266,6 +1857,13 @@ @@ -1337,6 +1932,13 @@
create: true create: true
podSecurityPolicy: podSecurityPolicy:
enabled: true enabled: true
@ -770,7 +774,7 @@
## Deploy node exporter as a daemonset to all nodes ## Deploy node exporter as a daemonset to all nodes
## ##
@@ -1319,6 +1917,16 @@ @@ -1392,6 +1994,16 @@
extraArgs: extraArgs:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@ -787,7 +791,7 @@
## Manages Prometheus and Alertmanager components ## Manages Prometheus and Alertmanager components
## ##
@@ -1331,8 +1939,8 @@ @@ -1404,8 +2016,8 @@
enabled: true enabled: true
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants # Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
tlsMinVersion: VersionTLS13 tlsMinVersion: VersionTLS13
@ -798,16 +802,19 @@
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
## rules from making their way into prometheus and potentially preventing the container from starting ## rules from making their way into prometheus and potentially preventing the container from starting
@@ -1349,7 +1957,7 @@ @@ -1422,9 +2034,9 @@
patch: patch:
enabled: true enabled: true
image: image:
- repository: jettech/kube-webhook-certgen - repository: k8s.gcr.io/ingress-nginx/kube-webhook-certgen
+ repository: rancher/mirrored-jettech-kube-webhook-certgen + repository: rancher/mirrored-ingress-nginx-kube-webhook-certgen
tag: v1.5.2 tag: v1.0
sha: "" - sha: "f3b6b39a6062328c095337b4cadcefd1612348fdd5190b1dcbcb9b9e90bd8068"
+ sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -1498,13 +2106,13 @@ resources: {}
## Provide a priority class name to the webhook patching job
@@ -1571,13 +2183,13 @@
## Resource limits & requests ## Resource limits & requests
## ##
@ -828,25 +835,34 @@
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
@@ -1557,7 +2165,7 @@ @@ -1630,7 +2242,7 @@
## Prometheus-operator image ## Prometheus-operator image
## ##
image: image:
- repository: quay.io/prometheus-operator/prometheus-operator - repository: quay.io/prometheus-operator/prometheus-operator
+ repository: rancher/mirrored-prometheus-operator-prometheus-operator + repository: rancher/mirrored-prometheus-operator-prometheus-operator
tag: v0.48.0 tag: v0.50.0
sha: "" sha: ""
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -1573,7 +2181,7 @@ @@ -1646,7 +2258,7 @@
## Prometheus-config-reloader image to use for config and rule reloading ## Prometheus-config-reloader image to use for config and rule reloading
## ##
prometheusConfigReloaderImage: prometheusConfigReloaderImage:
- repository: quay.io/prometheus-operator/prometheus-config-reloader - repository: quay.io/prometheus-operator/prometheus-config-reloader
+ repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader + repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader
tag: v0.48.0 tag: v0.50.0
sha: "" sha: ""
@@ -1659,7 +2267,7 @@ @@ -1661,7 +2273,7 @@
## Thanos side-car image when configured
##
thanosImage:
- repository: quay.io/thanos/thanos
+ repository: rancher/mirrored-thanos-thanos
tag: v0.17.2
sha: ""
@@ -1781,7 +2393,7 @@
port: 9090 port: 9090
## To be used with a proxy extraContainer port ## To be used with a proxy extraContainer port
@ -855,28 +871,16 @@
## List of IP addresses at which the Prometheus server service is available ## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
@@ -1916,7 +2524,7 @@ @@ -2054,7 +2666,7 @@
## Image of Prometheus. ## Image of Prometheus.
## ##
image: image:
- repository: quay.io/prometheus/prometheus - repository: quay.io/prometheus/prometheus
+ repository: rancher/mirrored-prometheus-prometheus + repository: rancher/mirrored-prometheus-prometheus
tag: v2.27.1 tag: v2.28.1
sha: "" sha: ""
@@ -1979,6 +2587,11 @@ @@ -2149,7 +2761,7 @@
##
externalUrl: ""
+ ## Ignore NamespaceSelector settings from the PodMonitor and ServiceMonitor configs
+ ## If true, PodMonitors and ServiceMonitors can only discover Pods and Services within the namespace they are deployed into
+ ##
+ ignoreNamespaceSelectors: false
+
## Define which Nodes the Pods are scheduled on.
## ref: https://kubernetes.io/docs/user-guide/node-selection/
##
@@ -2011,7 +2624,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment, ## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the PrometheusRule resources created ## which will also match the PrometheusRule resources created
## ##
@ -885,7 +889,7 @@
## PrometheusRules to be selected for target discovery. ## PrometheusRules to be selected for target discovery.
## If {}, select all PrometheusRules ## If {}, select all PrometheusRules
@@ -2036,7 +2649,7 @@ @@ -2174,7 +2786,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment, ## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the servicemonitors created ## which will also match the servicemonitors created
## ##
@ -894,7 +898,7 @@
## ServiceMonitors to be selected for target discovery. ## ServiceMonitors to be selected for target discovery.
## If {}, select all ServiceMonitors ## If {}, select all ServiceMonitors
@@ -2059,7 +2672,7 @@ @@ -2197,7 +2809,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment, ## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the podmonitors created ## which will also match the podmonitors created
## ##
@ -903,7 +907,7 @@
## PodMonitors to be selected for target discovery. ## PodMonitors to be selected for target discovery.
## If {}, select all PodMonitors ## If {}, select all PodMonitors
@@ -2190,9 +2803,13 @@ @@ -2328,9 +2940,13 @@
## Resource limits & requests ## Resource limits & requests
## ##
@ -912,7 +916,7 @@
- # memory: 400Mi - # memory: 400Mi
+ resources: + resources:
+ limits: + limits:
+ memory: 1500Mi + memory: 3000Mi
+ cpu: 1000m + cpu: 1000m
+ requests: + requests:
+ memory: 750Mi + memory: 750Mi
@ -920,7 +924,7 @@
## Prometheus StorageSpec for persistent data ## Prometheus StorageSpec for persistent data
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
@@ -2215,7 +2832,13 @@ @@ -2353,7 +2969,13 @@
# medium: Memory # medium: Memory
# Additional volumes on the output StatefulSet definition. # Additional volumes on the output StatefulSet definition.
@ -935,9 +939,9 @@
# Additional VolumeMounts on the output StatefulSet definition. # Additional VolumeMounts on the output StatefulSet definition.
volumeMounts: [] volumeMounts: []
@@ -2322,9 +2945,34 @@ @@ -2475,9 +3097,34 @@
## # fileName: "objstore.yaml"
thanos: {} # objectStorageConfigFile: /var/secrets/object-store.yaml
+ proxy: + proxy:
+ image: + image:
@ -971,7 +975,7 @@
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
## (permissions, dir tree) on mounted volumes before starting prometheus ## (permissions, dir tree) on mounted volumes before starting prometheus
@@ -2332,7 +2980,7 @@ @@ -2485,7 +3132,7 @@
## PortName to use for Prometheus. ## PortName to use for Prometheus.
## ##

View File

@ -1,10 +1,11 @@
url: https://github.com/prometheus-community/helm-charts.git url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/kube-prometheus-stack subdirectory: charts/kube-prometheus-stack
commit: ba91bdb2d79ca4419cf72078f5f4bfcc426d4599 commit: c6208979d494156a3869d2e5faab669ce4301c68
version: 100.0.1 version: 100.1.0
additionalCharts: additionalCharts:
- workingDir: charts-crd - workingDir: charts-crd
crdOptions: crdOptions:
templateDirectory: crd-template templateDirectory: crd-template
crdDirectory: crd-manifest crdDirectory: crd-manifest
addCRDValidationToMainChart: true addCRDValidationToMainChart: true
useTarArchive: true

View File

@ -49,7 +49,9 @@ spec:
- -c - -c
- > - >
echo "Applying CRDs..."; echo "Applying CRDs...";
kubectl apply -f /etc/config/crd-manifest.yaml; mkdir -p /etc/crd;
base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd;
kubectl apply -Rf /etc/crd;
echo "Waiting for CRDs to be recognized before finishing installation..."; echo "Waiting for CRDs to be recognized before finishing installation...";
@ -68,7 +70,13 @@ spec:
mountPath: /etc/config mountPath: /etc/config
restartPolicy: OnFailure restartPolicy: OnFailure
nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} nodeSelector: {{ include "linux-node-selector" . | nindent 8 }}
{{- if .Values.nodeSelector }}
{{- toYaml .Values.nodeSelector | nindent 8 }}
{{- end }}
tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}
{{- if .Values.tolerations }}
{{- toYaml .Values.tolerations | nindent 8 }}
{{- end }}
volumes: volumes:
- name: crd-manifest - name: crd-manifest
configMap: configMap:
@ -121,14 +129,23 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
- > - >
kubectl delete -f /etc/config/crd-manifest.yaml echo "Deleting CRDs...";
mkdir -p /etc/crd;
base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd;
kubectl delete -Rf /etc/crd;
volumeMounts: volumeMounts:
- name: crd-manifest - name: crd-manifest
readOnly: true readOnly: true
mountPath: /etc/config mountPath: /etc/config
restartPolicy: OnFailure restartPolicy: OnFailure
nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} nodeSelector: {{ include "linux-node-selector" . | nindent 8 }}
{{- if .Values.nodeSelector }}
{{- toYaml .Values.nodeSelector | nindent 8 }}
{{- end }}
tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}
{{- if .Values.tolerations }}
{{- toYaml .Values.tolerations | nindent 8 }}
{{- end }}
volumes: volumes:
- name: crd-manifest - name: crd-manifest
configMap: configMap:

View File

@ -4,11 +4,5 @@ metadata:
name: {{ .Chart.Name }}-manifest name: {{ .Chart.Name }}-manifest
namespace: {{ .Release.Namespace }} namespace: {{ .Release.Namespace }}
data: data:
crd-manifest.yaml: | crd-manifest.tgz.b64:
{{- $currentScope := . -}} {{- .Files.Get "files/crd-manifest.tgz" | b64enc | indent 4 }}
{{- $crds := (.Files.Glob "crd-manifest/**.yaml") -}}
{{- range $path, $_ := $crds -}}
{{- with $currentScope -}}
{{ .Files.Get $path | nindent 4 }}
---
{{- end -}}{{- end -}}

View File

@ -9,3 +9,7 @@ global:
image: image:
repository: rancher/shell repository: rancher/shell
tag: v0.1.8 tag: v0.1.8
nodeSelector: {}
tolerations: []

View File

@ -0,0 +1,3 @@
service:
targetPort: 9102
port: 9102

View File

@ -7,11 +7,16 @@
+ catalog.rancher.io/release-name: rancher-node-exporter + catalog.rancher.io/release-name: rancher-node-exporter
+ catalog.cattle.io/hidden: "true" + catalog.cattle.io/hidden: "true"
+ catalog.cattle.io/os: linux + catalog.cattle.io/os: linux
apiVersion: v1 apiVersion: v2
appVersion: 1.1.2 appVersion: 1.2.2
description: A Helm chart for prometheus node-exporter description: A Helm chart for prometheus node-exporter
-name: prometheus-node-exporter -name: prometheus-node-exporter
+name: rancher-node-exporter +name: rancher-node-exporter
version: 1.18.1 version: 2.2.0
type: application
home: https://github.com/prometheus/node_exporter/ home: https://github.com/prometheus/node_exporter/
sources: @@ -16,3 +22,4 @@
name: gianrubio
- name: vsliouniaev
- name: bismarck
+

View File

@ -1,6 +1,6 @@
--- charts-original/templates/daemonset.yaml --- charts-original/templates/daemonset.yaml
+++ charts/templates/daemonset.yaml +++ charts/templates/daemonset.yaml
@@ -35,7 +35,7 @@ @@ -36,7 +36,7 @@
{{- end }} {{- end }}
containers: containers:
- name: node-exporter - name: node-exporter
@ -9,7 +9,7 @@
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
args: args:
- --path.procfs=/host/proc - --path.procfs=/host/proc
@@ -133,18 +133,18 @@ @@ -134,18 +134,18 @@
affinity: affinity:
{{ toYaml .Values.affinity | indent 8 }} {{ toYaml .Values.affinity | indent 8 }}
{{- end }} {{- end }}

View File

@ -12,10 +12,10 @@
image: image:
- repository: quay.io/prometheus/node-exporter - repository: quay.io/prometheus/node-exporter
+ repository: rancher/mirrored-prometheus-node-exporter + repository: rancher/mirrored-prometheus-node-exporter
tag: v1.1.2 tag: v1.2.2
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
@@ -126,6 +131,8 @@ @@ -135,6 +140,8 @@
tolerations: tolerations:
- effect: NoSchedule - effect: NoSchedule
operator: Exists operator: Exists

View File

@ -1,4 +1,5 @@
url: https://github.com/prometheus-community/helm-charts.git url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/prometheus-node-exporter subdirectory: charts/prometheus-node-exporter
commit: a05f7b8888d6174827b815aa097d64b94f00af3e commit: cd13facd9c6042f7f15978024572cf297fbac6c0
version: 100.0.0 version: 100.0.0
doNotRelease: true

View File

@ -0,0 +1,9 @@
rules:
external:
- seriesQuery: '{__name__=~"^some_metric_count$"}'
resources:
template: <<.Resource>>
name:
matches: ""
as: "my_custom_metric"
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)

View File

@ -10,6 +10,6 @@
apiVersion: v1 apiVersion: v1
-name: prometheus-adapter -name: prometheus-adapter
+name: rancher-prometheus-adapter +name: rancher-prometheus-adapter
version: 2.14.0 version: 2.17.0
appVersion: v0.8.4 appVersion: v0.9.0
description: A Helm chart for k8s prometheus adapter description: A Helm chart for k8s prometheus adapter

View File

@ -1,6 +1,6 @@
--- charts-original/templates/deployment.yaml --- charts-original/templates/deployment.yaml
+++ charts/templates/deployment.yaml +++ charts/templates/deployment.yaml
@@ -40,7 +40,7 @@ @@ -41,7 +41,7 @@
{{- end}} {{- end}}
containers: containers:
- name: {{ .Chart.Name }} - name: {{ .Chart.Name }}
@ -9,7 +9,7 @@
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
args: args:
- /adapter - /adapter
@@ -102,13 +102,17 @@ @@ -105,8 +105,10 @@
name: volume-serving-cert name: volume-serving-cert
readOnly: true readOnly: true
{{- end }} {{- end }}
@ -22,6 +22,10 @@
affinity: affinity:
{{- toYaml .Values.affinity | nindent 8 }} {{- toYaml .Values.affinity | nindent 8 }}
priorityClassName: {{ .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }}
@@ -114,8 +116,10 @@
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- end }}
- tolerations: - tolerations:
- {{- toYaml .Values.tolerations | nindent 8 }} - {{- toYaml .Values.tolerations | nindent 8 }}
+ tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}

View File

@ -9,8 +9,8 @@
affinity: {} affinity: {}
image: image:
- repository: directxman12/k8s-prometheus-adapter-amd64 - repository: k8s.gcr.io/prometheus-adapter/prometheus-adapter
+ repository: rancher/mirrored-directxman12-k8s-prometheus-adapter + repository: rancher/mirrored-prometheus-adapter-prometheus-adapter
tag: v0.8.4 tag: v0.9.0
pullPolicy: IfNotPresent pullPolicy: IfNotPresent

View File

@ -1,4 +1,5 @@
url: https://github.com/prometheus-community/helm-charts.git url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/prometheus-adapter subdirectory: charts/prometheus-adapter
commit: 50b719af447594abbae7beeb4d6458a19f8e9689 commit: 843d2f3d302ca24d745abf539a86ec680ae4f8c0
version: 100.0.0 version: 100.0.0
doNotRelease: true

View File

@ -1,5 +1,5 @@
apiVersion: v1 apiVersion: v1
version: 0.1.4 version: 0.1.5
appVersion: 0.1.0 appVersion: 0.1.0
annotations: annotations:
catalog.rancher.io/certified: rancher catalog.rancher.io/certified: rancher

View File

@ -82,7 +82,6 @@ k8s-app: {{ template "pushProxy.proxy.name" . }}
{{- define "pushProxy.serviceMonitor.labels" -}} {{- define "pushProxy.serviceMonitor.labels" -}}
app: {{ template "pushprox.serviceMonitor.name" . }} app: {{ template "pushprox.serviceMonitor.name" . }}
release: {{ .Release.Name | quote }}
{{ template "pushProxy.commonLabels" . }} {{ template "pushProxy.commonLabels" . }}
{{- end -}} {{- end -}}

View File

@ -1,2 +1,2 @@
url: local url: local
version: 100.0.0 version: 100.0.1

View File

@ -25,7 +25,7 @@ clients:
port: 9796 port: 9796
image: image:
repository: rancher/windows_exporter-package repository: rancher/windows_exporter-package
tag: v0.0.2 tag: v0.0.3
os: "windows" os: "windows"
# Specify the IP addresses of nodes that you want to collect metrics from # Specify the IP addresses of nodes that you want to collect metrics from

Some files were not shown because too many files have changed in this diff Show More