(dev-v2.6-archive) [rancher-monitoring-crd] code refactor

(partially cherry picked from commit cfc2433953)
pull/1680/head
Jiaqi Luo 2021-11-19 12:12:52 -07:00 committed by Arvind Iyengar
parent 7f2f418b2c
commit 5f1dde5a21
No known key found for this signature in database
GPG Key ID: A8DD9BFD6C811498
101 changed files with 936 additions and 2335 deletions

View File

@ -1,2 +1,2 @@
url: https://github.com/rancher/backup-restore-operator/releases/download/v2.1.0-rc1/rancher-backup-crd-2.1.0-rc1.tgz
version: 2.1.0
url: https://github.com/rancher/backup-restore-operator/releases/download/v2.0.1/rancher-backup-crd-2.0.1.tgz
version: 2.0.1

View File

@ -1,2 +1,2 @@
url: https://github.com/rancher/backup-restore-operator/releases/download/v2.1.0-rc1/rancher-backup-2.1.0-rc1.tgz
version: 2.1.0
url: https://github.com/rancher/backup-restore-operator/releases/download/v2.0.1/rancher-backup-2.0.1.tgz
version: 2.0.1

View File

@ -8,7 +8,7 @@ image:
tag: v1.0.6
securityScan:
repository: rancher/security-scan
tag: v0.2.5-rc2
tag: v0.2.5-rc1
sonobuoy:
repository: rancher/mirrored-sonobuoy-sonobuoy
tag: v0.53.2

View File

@ -1,7 +1,7 @@
--- charts-original/Chart.yaml
+++ charts/Chart.yaml
@@ -4,12 +4,12 @@
catalog.cattle.io/kube-version: < 1.22.0
@@ -3,12 +3,12 @@
catalog.cattle.io/display-name: External IP Webhook
catalog.cattle.io/namespace: cattle-externalip-system
catalog.cattle.io/os: linux
- catalog.cattle.io/release-name: rancher-externalip-webhook
@ -9,14 +9,14 @@
+ catalog.cattle.io/release-name: rancher-external-ip-webhook
+ catalog.cattle.io/ui-component: rancher-external-ip-webhook
apiVersion: v1
appVersion: v1.0.1-rc2
appVersion: v1.0.1-rc1
description: |
- Deploy the externalip-webhook to mitigate k8s CVE-2020-8554
+ Deploy the external-ip-webhook to mitigate k8s CVE-2020-8554
home: https://github.com/rancher/externalip-webhook
keywords:
- cve
@@ -20,7 +20,7 @@
@@ -19,7 +19,7 @@
maintainers:
- email: raul@rancher.com
name: rawmind0
@ -24,4 +24,4 @@
+name: rancher-external-ip-webhook
sources:
- https://github.com/rancher/externalip-webhook
version: 1.0.1-rc2
version: 1.0.1-rc1

View File

@ -1,2 +1,2 @@
url: https://github.com/rancher/externalip-webhook/releases/download/v1.0.1-rc2/rancher-externalip-webhook-1.0.1-rc2.tgz
url: https://github.com/rancher/externalip-webhook/releases/download/v1.0.1-rc1/rancher-externalip-webhook-1.0.1-rc1.tgz
version: 100.0.1

View File

@ -10,9 +10,9 @@
apiVersion: v2
-name: grafana
+name: rancher-grafana
version: 6.11.0
-appVersion: 7.5.5
+appVersion: 7.5.8
version: 6.16.14
-appVersion: 8.2.1
+appVersion: 7.5.11
kubeVersion: '^1.8.0-0'
description: The leading tool for querying and visualizing time series and metrics.
home: https://grafana.net

View File

@ -0,0 +1,24 @@
--- charts-original/README.md
+++ charts/README.md
@@ -59,8 +59,8 @@
| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` |
| `priorityClassName` | Name of Priority Class to assign pods | `nil` |
| `image.repository` | Image repository | `grafana/grafana` |
-| `image.tag` | Image tag (`Must be >= 5.0.0`) | `8.0.3` |
-| `image.sha` | Image sha (optional) | `80c6d6ac633ba5ab3f722976fb1d9a138f87ca6a9934fcd26a5fc28cbde7dbfa` |
+| `image.tag` | Image tag (`Must be >= 5.0.0`) | `7.5.11` |
+| `image.sha` | Image sha (optional) | `` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `{}` |
| `service.enabled` | Enable grafana service | `true` |
@@ -188,8 +188,8 @@
| `rbac.extraRoleRules` | Additional rules to add to the Role | [] |
| `rbac.extraClusterRoleRules` | Additional rules to add to the ClusterRole | [] |
| `command` | Define command to be executed by grafana container at startup | `nil` |
-| `testFramework.enabled` | Whether to create test-related resources | `true` |
-| `testFramework.image` | `test-framework` image repository. | `bats/bats` |
+| `testFramework.enabled` | Whether to create test-related resources | `false` |
+| `testFramework.image` | `test-framework` image repository. | `rancher/mirrored-bats-bats` |
| `testFramework.tag` | `test-framework` image tag. | `v1.1.0` |
| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` |
| `testFramework.securityContext` | `test-framework` securityContext | `{}` |

View File

@ -5,7 +5,7 @@
{{- define "grafana.pod" -}}
{{- if .Values.schedulerName }}
schedulerName: "{{ .Values.schedulerName }}"
@@ -21,9 +20,9 @@
@@ -22,9 +21,9 @@
{{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }}
- name: init-chown-data
{{- if .Values.initChownData.image.sha }}
@ -17,7 +17,7 @@
{{- end }}
imagePullPolicy: {{ .Values.initChownData.image.pullPolicy }}
securityContext:
@@ -42,9 +41,9 @@
@@ -43,9 +42,9 @@
{{- if .Values.dashboards }}
- name: download-dashboards
{{- if .Values.downloadDashboardsImage.sha }}
@ -29,7 +29,7 @@
{{- end }}
imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }}
command: ["/bin/sh"]
@@ -79,11 +78,16 @@
@@ -80,11 +79,16 @@
{{- if .Values.sidecar.datasources.enabled }}
- name: {{ template "grafana.name" . }}-sc-datasources
{{- if .Values.sidecar.image.sha }}
@ -48,7 +48,7 @@
env:
- name: METHOD
value: LIST
@@ -118,9 +122,9 @@
@@ -119,9 +123,9 @@
{{- if .Values.sidecar.notifiers.enabled }}
- name: {{ template "grafana.name" . }}-sc-notifiers
{{- if .Values.sidecar.image.sha }}
@ -60,7 +60,7 @@
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@@ -163,9 +167,9 @@
@@ -165,9 +169,9 @@
{{- if .Values.sidecar.dashboards.enabled }}
- name: {{ template "grafana.name" . }}-sc-dashboard
{{- if .Values.sidecar.image.sha }}
@ -72,7 +72,7 @@
{{- end }}
imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }}
env:
@@ -205,9 +209,9 @@
@@ -207,9 +211,9 @@
{{- end}}
- name: {{ .Chart.Name }}
{{- if .Values.image.sha }}
@ -84,7 +84,7 @@
{{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.command }}
@@ -307,7 +311,7 @@
@@ -315,7 +319,7 @@
{{- end }}
ports:
- name: {{ .Values.service.portName }}
@ -93,7 +93,7 @@
protocol: TCP
- name: {{ .Values.podPortName }}
containerPort: 3000
@@ -388,17 +392,17 @@
@@ -402,17 +406,17 @@
{{- with .Values.extraContainers }}
{{ tpl . $ | indent 2 }}
{{- end }}

View File

@ -21,7 +21,7 @@
{{- end }}
imagePullPolicy: {{ .Values.imageRenderer.image.pullPolicy }}
{{- if .Values.imageRenderer.command }}
@@ -97,16 +99,16 @@
@@ -101,16 +103,16 @@
resources:
{{ toYaml . | indent 12 }}
{{- end }}

View File

@ -1,7 +1,7 @@
--- charts-original/templates/podsecuritypolicy.yaml
+++ charts/templates/podsecuritypolicy.yaml
@@ -6,13 +6,9 @@
namespace: {{ template "grafana.namespace" . }}
@@ -5,13 +5,9 @@
name: {{ template "grafana.fullname" . }}
labels:
{{- include "grafana.labels" . | nindent 4 }}
- annotations:

View File

@ -0,0 +1,11 @@
--- charts-original/templates/tests/test.yaml
+++ charts/templates/tests/test.yaml
@@ -33,7 +33,7 @@
{{- end }}
containers:
- name: {{ .Release.Name }}-test
- image: "{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
imagePullPolicy: "{{ .Values.testFramework.imagePullPolicy}}"
command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"]
volumeMounts:

View File

@ -1,12 +1,10 @@
--- charts-original/values.yaml
+++ charts/values.yaml
@@ -1,9 +1,25 @@
@@ -1,9 +1,23 @@
+global:
+ cattle:
+ systemDefaultRegistry: ""
+
+autoscaling:
+ enabled: false
rbac:
create: true
## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true)
@ -27,22 +25,24 @@
namespaced: false
extraRoleRules: []
# - apiGroups: []
@@ -68,8 +84,8 @@
@@ -69,8 +83,8 @@
# schedulerName: "default-scheduler"
image:
- repository: grafana/grafana
- tag: 7.5.5
- tag: 8.2.1
+ repository: rancher/mirrored-grafana-grafana
+ tag: 7.5.8
+ tag: 7.5.11
sha: ""
pullPolicy: IfNotPresent
@@ -82,12 +98,15 @@
@@ -82,13 +96,16 @@
# - myRegistrKeySecretName
testFramework:
enabled: true
- enabled: true
- image: "bats/bats"
+ enabled: false
+ image: "rancher/mirrored-bats-bats"
tag: "v1.1.0"
imagePullPolicy: IfNotPresent
@ -56,7 +56,7 @@
runAsUser: 472
runAsGroup: 472
fsGroup: 472
@@ -115,8 +134,8 @@
@@ -116,8 +133,8 @@
# priorityClassName:
downloadDashboardsImage:
@ -67,7 +67,16 @@
sha: ""
pullPolicy: IfNotPresent
@@ -301,7 +320,7 @@
@@ -189,7 +206,7 @@
labels: {}
path: /
- # pathType is only for k8s >= 1.1=
+ # pathType is only for k8s >= 1.18
pathType: Prefix
hosts:
@@ -303,7 +320,7 @@
## initChownData container image
##
image:
@ -76,18 +85,16 @@
tag: "1.31.1"
sha: ""
pullPolicy: IfNotPresent
@@ -602,8 +621,8 @@
@@ -614,7 +631,7 @@
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image:
- repository: quay.io/kiwigrid/k8s-sidecar
- tag: 1.10.7
+ repository: rancher/mirrored-kiwigrid-k8s-sidecar
+ tag: 1.12.2
tag: 1.12.3
sha: ""
imagePullPolicy: IfNotPresent
resources: {}
@@ -690,9 +709,9 @@
@@ -702,9 +719,9 @@
replicas: 1
image:
# image-renderer Image repository

View File

@ -1,4 +1,5 @@
url: https://github.com/grafana/helm-charts.git
subdirectory: charts/grafana
commit: bf7e1110a5ee9258190d0377fea319bb8e764e62
commit: ad033c2a2b1f81284a924fb8627ca00700952fc5
version: 100.0.0
doNotRelease: true

View File

@ -8,27 +8,27 @@
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
@@ -173,7 +174,7 @@
@@ -107,7 +108,7 @@
readOnly: true
{{- end }}
{{- end }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
- image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}"
ports:
- containerPort: 8080
{{- if .Values.selfMonitor.enabled }}
@@ -207,12 +208,12 @@
- containerPort: {{ .Values.service.port | default 8080}}
{{- if .Values.selfMonitor.enabled }}
@@ -141,12 +142,12 @@
affinity:
{{ toYaml .Values.affinity | indent 8 }}
{{- end }}
{{- end }}
+ nodeSelector: {{ include "linux-node-selector" . | nindent 8 }}
{{- if .Values.nodeSelector }}
{{- if .Values.nodeSelector }}
- nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
{{- end }}
+ tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}
{{- if .Values.tolerations }}
{{- if .Values.tolerations }}
- tolerations:
{{ toYaml .Values.tolerations | indent 8 }}
{{- end }}
{{- if .Values.kubeconfig.enabled}}
{{- end }}
{{- if .Values.kubeconfig.enabled}}

View File

@ -0,0 +1,9 @@
--- charts-original/templates/role.yaml
+++ charts/templates/role.yaml
@@ -1,3 +1,6 @@
+{{- if not (kindIs "slice" .Values.collectors) }}
+{{- fail "Collectors need to be a List since kube-state-metrics chart 3.2.2. Please check README for more information."}}
+{{- end }}
{{- if and (eq .Values.rbac.create true) (not .Values.rbac.useExistingRole) -}}
{{- range (split "," .Values.namespaces) }}
---

View File

@ -10,10 +10,10 @@
image:
- repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics
+ repository: rancher/mirrored-kube-state-metrics-kube-state-metrics
tag: v2.0.0
tag: v2.2.0
pullPolicy: IfNotPresent
@@ -84,6 +88,7 @@
@@ -86,6 +90,7 @@
securityContext:
enabled: true

View File

@ -1,4 +1,5 @@
url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/kube-state-metrics
commit: 086f1f7f0870e110abf30aa6bfe7c141e83cc950
commit: 3f371027f2c384cb2e58b46b2249b6bfa200b1e7
version: 100.0.0
doNotRelease: true

View File

@ -1,12 +1,12 @@
dependencies:
- name: kube-state-metrics
repository: https://prometheus-community.github.io/helm-charts
version: 3.1.1
version: 3.5.2
- name: prometheus-node-exporter
repository: https://prometheus-community.github.io/helm-charts
version: 1.18.1
version: 2.0.4
- name: grafana
repository: https://grafana.github.io/helm-charts
version: 6.12.0
digest: sha256:11886645ff1ade77d0fefdca90afba4a92f2b535997280074a59828e8d1dab4e
generated: "2021-06-09T16:56:40.364303181+02:00"
version: 6.16.10
digest: sha256:94dad976ca1630e9e3cd006fadb255783387b53bd9d0d19e105bd39d8e8e34be
generated: "2021-09-28T10:26:46.319411+07:00"

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""Fetch dashboards from provided urls into this chart."""
import json
import re
import textwrap
from os import makedirs, path
@ -26,16 +27,18 @@ def change_style(style, representer):
# Source files list
charts = [
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml',
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml',
'destination': '../templates/grafana/dashboards-1.14',
'type': 'yaml',
'min_kubernetes': '1.14.0-0'
'min_kubernetes': '1.14.0-0',
'multicluster_key': '.Values.grafana.sidecar.dashboards.multicluster.global.enabled',
},
{
'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/grafana.json',
'destination': '../templates/grafana/dashboards-1.14',
'type': 'json',
'min_kubernetes': '1.14.0-0'
'min_kubernetes': '1.14.0-0',
'multicluster_key': '(or .Values.grafana.sidecar.dashboards.multicluster.global.enabled .Values.grafana.sidecar.dashboards.multicluster.etcd.enabled)'
},
]
@ -64,7 +67,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet
apiVersion: v1
kind: ConfigMap
metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
name: {{ printf "%%s-%%s" (include "kube-prometheus-stack.fullname" $) "%(name)s" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@ -103,20 +106,29 @@ def yaml_str_repr(struct, indent=2):
text = textwrap.indent(text, ' ' * indent)
return text
def patch_json_for_multicluster_configuration(content):
def patch_dashboards_json(content, multicluster_key):
try:
content_struct = json.loads(content)
# multicluster
overwrite_list = []
for variable in content_struct['templating']['list']:
if variable['name'] == 'cluster':
variable['hide'] = ':multicluster:'
overwrite_list.append(variable)
content_struct['templating']['list'] = overwrite_list
# fix drilldown links. See https://github.com/kubernetes-monitoring/kubernetes-mixin/issues/659
for row in content_struct['rows']:
for panel in row['panels']:
for style in panel.get('styles', []):
if 'linkUrl' in style and style['linkUrl'].startswith('./d'):
style['linkUrl'] = style['linkUrl'].replace('./d', '/d')
content_array = []
original_content_lines = content.split('\n')
for i, line in enumerate(json.dumps(content_struct, indent=4).split('\n')):
if ('[]' not in line and '{}' not in line) or line == original_content_lines[i]:
if (' []' not in line and ' {}' not in line) or line == original_content_lines[i]:
content_array.append(line)
continue
@ -136,7 +148,7 @@ def patch_json_for_multicluster_configuration(content):
if multicluster != -1:
content = ''.join((
content[:multicluster-1],
'\{\{ if .Values.grafana.sidecar.dashboards.multicluster \}\}0\{\{ else \}\}2\{\{ end \}\}',
'\{\{ if %s \}\}0\{\{ else \}\}2\{\{ end \}\}' % multicluster_key,
content[multicluster + 15:]
))
except (ValueError, KeyError):
@ -145,7 +157,12 @@ def patch_json_for_multicluster_configuration(content):
return content
def write_group_to_file(resource_name, content, url, destination, min_kubernetes, max_kubernetes):
def patch_json_set_timezone_as_variable(content):
# content is no more in json format, so we have to replace using regex
return re.sub(r'"timezone"\s*:\s*"(?:\\.|[^\"])*"', '"timezone": "\{\{ .Values.grafana.defaultDashboardsTimezone \}\}"', content, flags=re.IGNORECASE)
def write_group_to_file(resource_name, content, url, destination, min_kubernetes, max_kubernetes, multicluster_key):
# initialize header
lines = header % {
'name': resource_name,
@ -155,7 +172,8 @@ def write_group_to_file(resource_name, content, url, destination, min_kubernetes
'max_kubernetes': max_kubernetes
}
content = patch_json_for_multicluster_configuration(content)
content = patch_dashboards_json(content, multicluster_key)
content = patch_json_set_timezone_as_variable(content)
filename_struct = {resource_name + '.json': (LiteralStr(content))}
# rules themselves
@ -196,17 +214,17 @@ def main():
groups = yaml_text['items']
for group in groups:
for resource, content in group['data'].items():
write_group_to_file(resource.replace('.json', ''), content, chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'])
write_group_to_file(resource.replace('.json', ''), content, chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key'])
elif chart['type'] == 'json':
json_text = json.loads(raw_text)
# is it already a dashboard structure or is it nested (etcd case)?
flat_structure = bool(json_text.get('annotations'))
if flat_structure:
resource = path.basename(chart['source']).replace('.json', '')
write_group_to_file(resource, json.dumps(json_text, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'])
write_group_to_file(resource, json.dumps(json_text, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key'])
else:
for resource, content in json_text.items():
write_group_to_file(resource.replace('.json', ''), json.dumps(content, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'])
write_group_to_file(resource.replace('.json', ''), json.dumps(content, indent=4), chart['source'], chart['destination'], chart['min_kubernetes'], chart['max_kubernetes'], chart['multicluster_key'])
print("Finished")

View File

@ -25,7 +25,37 @@ def change_style(style, representer):
# Source files list
charts = [
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml',
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/alertmanager-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-state-metrics-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-operator-prometheusRule.yaml',
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
@ -34,18 +64,6 @@ charts = [
'destination': '../templates/prometheus/rules-1.14',
'min_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml',
'destination': '../templates/prometheus/rules',
'min_kubernetes': '1.10.0-0',
'max_kubernetes': '1.14.0-0'
},
{
'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml',
'destination': '../templates/prometheus/rules',
'min_kubernetes': '1.10.0-0',
'max_kubernetes': '1.14.0-0'
},
]
# Additional conditions map
@ -93,6 +111,7 @@ alert_condition_map = {
'NodeExporterDown': '.Values.nodeExporter.enabled',
'CoreDNSDown': '.Values.kubeDns.enabled',
'AlertmanagerDown': '.Values.alertmanager.enabled',
'AggregatedAPIDown': 'semverCompare ">=1.18.0-0" $kubeTargetVersion',
}
replacement_map = {

View File

@ -1,63 +0,0 @@
{{- /*
Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
annotations:
message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync.
expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerFailedReload
annotations:
message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerMembersInconsistent
annotations:
message: Alertmanager has not found all other members of the cluster.
expr: |-
alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,179 +0,0 @@
{{- /*
Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: etcd
rules:
- alert: etcdInsufficientMembers
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
for: 3m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdNoLeader
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.'
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
for: 1m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfLeaderChanges
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.'
expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
/
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
/
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 5
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdGRPCRequestsSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
> 0.15
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdMemberCommunicationSlow
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.15
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedProposals
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighFsyncDurations
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.5
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighCommitDurations
annotations:
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.25
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
annotations:
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.01
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
annotations:
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.05
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHTTPRequestsSlow
annotations:
message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow.
expr: |-
histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
> 0.15
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,56 +0,0 @@
{{- /*
Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: general.rules
rules:
- alert: TargetDown
annotations:
message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.'
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: Watchdog
annotations:
message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
'
expr: vector(1)
labels:
severity: none
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,83 +0,0 @@
{{- /*
Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: k8s.rules
rules:
- expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |-
sum by (namespace, pod_name, container_name) (
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
)
record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
- expr: |-
sum by(namespace) (
kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
* on (endpoint, instance, job, namespace, pod, service)
group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
)
record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
- expr: |-
sum by (namespace) (
kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
* on (endpoint, instance, job, namespace, pod, service)
group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
)
record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
- expr: |-
sum(
label_replace(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
"replicaset", "$1", "owner_name", "(.*)"
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
labels:
workload_type: deployment
record: mixin_pod_workload
- expr: |-
sum(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
labels:
workload_type: daemonset
record: mixin_pod_workload
- expr: |-
sum(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
labels:
workload_type: statefulset
record: mixin_pod_workload
{{- end }}

View File

@ -1,39 +0,0 @@
{{- /*
Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-apiserver.rules
rules:
- expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
{{- end }}

View File

@ -1,47 +0,0 @@
{{- /*
Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeAlerting }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-prometheus-node-alerting.rules
rules:
- alert: NodeDiskRunningFull
annotations:
message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 24 hours.
expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)'
for: 30m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NodeDiskRunningFull
annotations:
message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 2 hours.
expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)'
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,41 +0,0 @@
{{- /*
Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
record: instance:node_cpu:rate:sum
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
record: instance:node_filesystem_usage:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
record: cluster:node_cpu:ratio
{{- end }}

View File

@ -1,63 +0,0 @@
{{- /*
Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-scheduler.rules
rules:
- expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.99'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.9'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
labels:
quantile: '0.5'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
{{- end }}

View File

@ -1,159 +0,0 @@
{{- /*
Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesAbsent }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-absent
rules:
{{- if .Values.alertmanager.enabled }}
- alert: AlertmanagerDown
annotations:
message: Alertmanager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerdown
expr: absent(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeDns.enabled }}
- alert: CoreDNSDown
annotations:
message: CoreDNS has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-corednsdown
expr: absent(up{job="kube-dns"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeApiServer.enabled }}
- alert: KubeAPIDown
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown
expr: absent(up{job="apiserver"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeControllerManager.enabled }}
- alert: KubeControllerManagerDown
annotations:
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown
expr: absent(up{job="kube-controller-manager"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeScheduler.enabled }}
- alert: KubeSchedulerDown
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown
expr: absent(up{job="kube-scheduler"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeStateMetrics.enabled }}
- alert: KubeStateMetricsDown
annotations:
message: KubeStateMetrics has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsdown
expr: absent(up{job="kube-state-metrics"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.prometheusOperator.kubeletService.enabled }}
- alert: KubeletDown
annotations:
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown
expr: absent(up{job="kubelet"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.nodeExporter.enabled }}
- alert: NodeExporterDown
annotations:
message: NodeExporter has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeexporterdown
expr: absent(up{job="node-exporter"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
- alert: PrometheusDown
annotations:
message: Prometheus has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusdown
expr: absent(up{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- if .Values.prometheusOperator.enabled }}
- alert: PrometheusOperatorDown
annotations:
message: PrometheusOperator has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoperatordown
expr: absent(up{job="{{ $operatorJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -1,200 +0,0 @@
{{- /*
Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-apps
rules:
- alert: KubePodCrashLooping
annotations:
message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0
for: 1h
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePodNotReady
annotations:
message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0
for: 1h
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDeploymentGenerationMismatch
annotations:
message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
expr: |-
kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDeploymentReplicasMismatch
annotations:
message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
expr: |-
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 1h
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetReplicasMismatch
annotations:
message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
expr: |-
kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetGenerationMismatch
annotations:
message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
expr: |-
kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetUpdateNotRolledOut
annotations:
message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
expr: |-
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
*
(
kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
)
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetRolloutStuck
annotations:
message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
expr: |-
kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
/
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100
for: 15m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetNotScheduled
annotations:
message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
expr: |-
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
-
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetMisScheduled
annotations:
message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeCronJobRunning
annotations:
message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning
expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeJobCompletion
annotations:
message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeJobFailed
annotations:
message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,121 +0,0 @@
{{- /*
Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-resources
rules:
- alert: KubeCPUOvercommit
annotations:
message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
expr: |-
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
/
sum(node:node_num_cpu:sum)
>
(count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeMemOvercommit
annotations:
message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
expr: |-
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
/
sum(node_memory_MemTotal_bytes)
>
(count(node:node_num_cpu:sum)-1)
/
count(node:node_num_cpu:sum)
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeCPUOvercommit
annotations:
message: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
expr: |-
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
/
sum(node:node_num_cpu:sum)
> 1.5
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeMemOvercommit
annotations:
message: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
expr: |-
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/
sum(node_memory_MemTotal_bytes{job="node-exporter"})
> 1.5
for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeQuotaExceeded
annotations:
message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded
expr: |-
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
> 90
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: CPUThrottlingHigh
annotations:
message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh
expr: |-
100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace)
/
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace)
> 25
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,72 +0,0 @@
{{- /*
Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-storage
rules:
- alert: KubePersistentVolumeUsageCritical
annotations:
message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical
expr: |-
100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
< 3
for: 1m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePersistentVolumeFullInFourDays
annotations:
message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} printf "%0.2f" $value {{`}}`}}% is available.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays
expr: |-
100 * (
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
) < 15
and
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePersistentVolumeErrors
annotations:
message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,184 +0,0 @@
{{- /*
Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-system
rules:
- alert: KubeNodeNotReady
annotations:
message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than an hour.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready
expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeVersionMismatch
annotations:
message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch
expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
for: 1h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientErrors
annotations:
message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}}% errors.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
expr: |-
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
/
sum(rate(rest_client_requests_total[5m])) by (instance, job))
* 100 > 1
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientErrors
annotations:
message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}} errors / second.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeletTooManyPods
annotations:
message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods
expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
for: 15m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPILatencyHigh
annotations:
message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPILatencyHigh
annotations:
message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientCertificateExpiration
annotations:
message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientCertificateExpiration
annotations:
message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,57 +0,0 @@
{{- /*
Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: node-network
rules:
- alert: NetworkReceiveErrors
annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing receive errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NetworkTransmitErrors
annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing transmit errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NodeNetworkInterfaceFlapping
annotations:
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,37 +0,0 @@
{{- /*
Generated from 'node-time' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.time }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-time" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: node-time
rules:
- alert: ClockSkewDetected
annotations:
message: Clock skew detected on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}. Ensure NTP is configured correctly on this host.
expr: abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03
for: 2m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,202 +0,0 @@
{{- /*
Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: node.rules
rules:
- expr: sum(min(kube_pod_info) by (node))
record: ':kube_pod_info_node_count:'
- expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
record: 'node_namespace_pod:kube_pod_info:'
- expr: |-
count by (node) (sum by (node, cpu) (
node_cpu_seconds_total{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
))
record: node:node_num_cpu:sum
- expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
record: :node_cpu_utilisation:avg1m
- expr: |-
1 - avg by (node) (
rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:)
record: node:node_cpu_utilisation:avg1m
- expr: |-
node:node_cpu_utilisation:avg1m
*
node:node_num_cpu:sum
/
scalar(sum(node:node_num_cpu:sum))
record: node:cluster_cpu_utilisation:ratio
- expr: |-
sum(node_load1{job="node-exporter"})
/
sum(node:node_num_cpu:sum)
record: ':node_cpu_saturation_load1:'
- expr: |-
sum by (node) (
node_load1{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
/
node:node_num_cpu:sum
record: 'node:node_cpu_saturation_load1:'
- expr: |-
1 -
sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
/
sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: ':node_memory_utilisation:'
- expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
record: :node_memory_MemFreeCachedBuffers_bytes:sum
- expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: :node_memory_MemTotal_bytes:sum
- expr: |-
sum by (node) (
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_bytes_available:sum
- expr: |-
sum by (node) (
node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_bytes_total:sum
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
/
node:node_memory_bytes_total:sum
record: node:node_memory_utilisation:ratio
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
/
scalar(sum(node:node_memory_bytes_total:sum))
record: node:cluster_memory_utilisation:ratio
- expr: |-
1e3 * sum(
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
)
record: :node_memory_swap_io_bytes:sum_rate
- expr: |-
1 -
sum by (node) (
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
/
sum by (node) (
node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: 'node:node_memory_utilisation:'
- expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
record: 'node:node_memory_utilisation_2:'
- expr: |-
1e3 * sum by (node) (
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_memory_swap_io_bytes:sum_rate
- expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_utilisation:avg_irate
- expr: |-
avg by (node) (
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_utilisation:avg_irate
- expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_saturation:avg_irate
- expr: |-
avg by (node) (
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_saturation:avg_irate
- expr: |-
max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_usage:'
- expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_avail:'
- expr: |-
sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
record: :node_net_utilisation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_net_utilisation:sum_irate
- expr: |-
sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
record: :node_net_saturation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_net_saturation:sum_irate
- expr: |-
max(
max(
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
label_replace(
(max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
)
) by (node)
record: 'node:node_inodes_total:'
- expr: |-
max(
max(
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
label_replace(
(max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
)
) by (node)
record: 'node:node_inodes_free:'
{{- end }}

View File

@ -1,49 +0,0 @@
{{- /*
Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: prometheus-operator
rules:
- alert: PrometheusOperatorReconcileErrors
annotations:
message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusOperatorNodeLookupErrors
annotations:
message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -1,139 +0,0 @@
{{- /*
Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: prometheus.rules
rules:
- alert: PrometheusConfigReloadFailed
annotations:
description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
summary: Reloading Prometheus' configuration failed
expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotificationQueueRunningFull
annotations:
description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
summary: Prometheus' alert notification queue is running full
expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlerts
annotations:
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
summary: Errors while sending alert from Prometheus
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlerts
annotations:
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
summary: Errors while sending alerts from Prometheus
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
for: 10m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
summary: Prometheus is not connected to any Alertmanagers
expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBReloadsFailing
annotations:
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.'
summary: Prometheus has issues reloading data blocks from disk
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
for: 12h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBCompactionsFailing
annotations:
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.'
summary: Prometheus has issues compacting sample blocks
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
for: 12h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBWALCorruptions
annotations:
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
summary: Prometheus write-ahead log is corrupted
expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
for: 4h
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
summary: Prometheus isn't ingesting samples
expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTargetScrapesDuplicate
annotations:
description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
summary: Prometheus has many samples rejected
expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,97 @@
suite: test ingress
templates:
- alertmanager/ingress.yaml
tests:
- it: should be empty if alertmanager is not enabled
set:
alertmanager.enabled: false
alertmanager.ingress.enabled: true
asserts:
- hasDocuments:
count: 0
- it: should be empty if ingress is not enabled
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: false
asserts:
- hasDocuments:
count: 0
- it: should have apiVersion extensions/v1beta1 for k8s < 1.16
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 15
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: extensions/v1beta1
- it: should have apiVersion networking.k8s.io/v1beta1 for k8s >= 1.16 < 1.19
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 16
apiVersions:
- networking.k8s.io/v1beta1
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: networking.k8s.io/v1beta1
- it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.19 < 1.22
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 10
apiVersions:
- networking.k8s.io/v1
- networking.k8s.io/v1beta1
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: networking.k8s.io/v1beta1
- it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.22
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 22
apiVersions:
- networking.k8s.io/v1
asserts:
- hasDocuments:
count: 1
- isKind:
of: Ingress
- isAPIVersion:
of: networking.k8s.io/v1
- it: should have explicit pathType ImplementationSpecific for networking.k8s.io/v1 by default
set:
alertmanager.enabled: true
alertmanager.ingress.enabled: true
capabilities:
majorVersion: 1
minorVersion: 19
apiVersions:
- networking.k8s.io/v1
asserts:
- hasDocuments:
count: 1
- equal:
path: spec.rules[0].http.paths[0].pathType
value: ImplementationSpecific

View File

@ -13,3 +13,17 @@ The chart installs the following components:
- [Prometheus Adapter](https://github.com/helm/charts/tree/master/stable/prometheus-adapter) - The adapter allows a user to expose custom metrics, resource metrics, and external metrics on the default [Prometheus](https://prometheus.io/) instance to the Kubernetes API Server.
For more information, review the Helm README of this chart.
## Upgrading from 100.0.0+up16.6.0 to 100.1.0+up19.0.3
### Noticeable changes:
Grafana:
- `sidecar.dashboards.searchNamespace`, `sidecar.datasources.searchNamespace` and `sidecar.notifiers.searchNamespace` support a list of namespaces now.
Kube-state-metrics
- the type of `collectors` is changed from Dictionary to List.
- `kubeStateMetrics.serviceMonitor.namespaceOverride` was replaced by `kube-state-metrics.namespaceOverride`.
### Known issues:
- Occasionally, the upgrade fails with errors related to the webhook `prometheusrulemutate.monitoring.coreos.com`. This is a known issue in the upstream, and the workaround is to trigger the upgrade one more time. [32416](https://github.com/rancher/rancher/issues/32416#issuecomment-828881726)

View File

@ -43,7 +43,6 @@ metadata:
rbac.authorization.k8s.io/aggregate-to-edit: "true"
{{- end }}
rules:
rules:
- apiGroups:
- monitoring.coreos.com
resources:

View File

@ -1,6 +1,6 @@
--- charts-original/Chart.yaml
+++ charts/Chart.yaml
@@ -1,3 +1,35 @@
@@ -1,3 +1,38 @@
+apiVersion: v2
+description: Collects several related Helm charts, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
+icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png
@ -20,12 +20,15 @@
+ - name: Arvind
+ email: arvind.iyengar@suse.com
+ url: ""
+ - name: Jack
+ email: jiaqi.luo@suse.com
+ url: "https://github.com/jiaqiluo"
+name: rancher-monitoring
+sources:
+ - https://github.com/prometheus-community/helm-charts
+ - https://github.com/prometheus-operator/kube-prometheus
+version: 16.6.0
+appVersion: 0.48.0
+version: 19.0.3
+appVersion: 0.50.0
+kubeVersion: ">=1.16.0-0"
+home: https://github.com/prometheus-operator/kube-prometheus
+keywords:
@ -36,12 +39,12 @@
annotations:
artifacthub.io/links: |
- name: Chart Source
@@ -5,8 +37,16 @@
@@ -5,8 +40,19 @@
- name: Upstream Project
url: https://github.com/prometheus-operator/kube-prometheus
artifacthub.io/operator: "true"
-apiVersion: v2
-appVersion: 0.48.0
-appVersion: 0.50.0
+ catalog.cattle.io/certified: rancher
+ catalog.cattle.io/namespace: cattle-monitoring-system
+ catalog.cattle.io/release-name: rancher-monitoring
@ -52,10 +55,13 @@
+ catalog.cattle.io/auto-install: rancher-monitoring-crd=match
+ catalog.cattle.io/requests-cpu: "4500m"
+ catalog.cattle.io/requests-memory: "4000Mi"
+ catalog.cattle.io/rancher-version: ">= 2.6.0-0 <=2.6.99-0"
+ catalog.cattle.io/kube-version: ">=1.16.0-0"
+ catalog.cattle.io/upstream-version: "19.0.3"
dependencies:
- condition: grafana.enabled
name: grafana
@@ -71,34 +111,6 @@
@@ -71,34 +117,6 @@
- condition: rkeScheduler.enabled
name: rkeScheduler
repository: file://./charts/rkeScheduler
@ -90,4 +96,4 @@
-- https://github.com/prometheus-community/helm-charts
-- https://github.com/prometheus-operator/kube-prometheus
-type: application
-version: 16.6.0
-version: 19.0.3

View File

@ -1,6 +1,6 @@
--- charts-original/README.md
+++ charts/README.md
@@ -193,7 +193,39 @@
@@ -230,7 +230,39 @@
helm show values prometheus-community/kube-prometheus-stack
```

View File

@ -1,11 +0,0 @@
--- charts-original/charts/windowsExporter/values.yaml
+++ charts/charts/windowsExporter/values.yaml
@@ -25,7 +25,7 @@
port: 9796
image:
repository: rancher/windows_exporter-package
- tag: v0.0.2
+ tag: v0.0.3
os: "windows"
# Specify the IP addresses of nodes that you want to collect metrics from

View File

@ -25,7 +25,7 @@
{{- end }}
paused: {{ .Values.alertmanager.alertmanagerSpec.paused }}
@@ -104,8 +106,8 @@
- {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]}
- {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]}
{{- end }}
{{- end }}
+ tolerations: {{ include "linux-node-tolerations" . | nindent 4 }}

View File

@ -20,7 +20,7 @@
{{ toYaml .Values.alertmanager.secret.annotations | indent 4 }}
{{- end }}
labels:
@@ -20,4 +26,4 @@
@@ -24,4 +30,4 @@
{{- range $key, $val := .Values.alertmanager.templateFiles }}
{{ $key }}: {{ $val | b64enc | quote }}
{{- end }}

View File

@ -0,0 +1,10 @@
--- charts-original/templates/exporters/kube-state-metrics/serviceMonitor.yaml
+++ charts/templates/exporters/kube-state-metrics/serviceMonitor.yaml
@@ -1,4 +1,7 @@
{{- if .Values.kubeStateMetrics.enabled }}
+{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
+{{- fail "kubeStateMetrics.serviceMonitor.namespaceOverride was removed. Please use kube-state-metrics.namespaceOverride instead." }}
+{{- end }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:

View File

@ -4,7 +4,7 @@
kind: ConfigMap
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }}
- namespace: {{ template "kube-prometheus-stack.namespace" $ }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" $ }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
labels:
{{- if $.Values.grafana.sidecar.dashboards.label }}

View File

@ -4,7 +4,7 @@
kind: ConfigMap
metadata:
name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ default .Values.grafana.sidecar.datasources.searchNamespace (include "kube-prometheus-stack.namespace" .) }}
{{- if .Values.grafana.sidecar.datasources.annotations }}
annotations:

View File

@ -0,0 +1,23 @@
--- charts-original/templates/grafana/dashboards-1.14/alertmanager-overview.yaml
+++ charts/templates/grafana/dashboards-1.14/alertmanager-overview.yaml
@@ -5,10 +5,11 @@
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
+{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -607,4 +608,5 @@
"uid": "alertmanager-overview",
"version": 0
}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,8 +4,17 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1831,7 +1831,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -10,7 +10,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }}
annotations:
@ -32,29 +32,29 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)",
- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -282,7 +287,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)",
- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -388,7 +393,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -494,28 +499,28 @@
"steppedLine": false,
"targets": [
@ -133,12 +133,21 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1100,7 +1105,7 @@
"options": [
],
- "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1126,7 +1131,7 @@
"options": [
],
- "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)",
+ "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)",
- "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)",
+ "query": "label_values(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -10,7 +10,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,8 +4,17 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -2973,7 +2973,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -10,268 +10,277 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -107,7 +108,7 @@
"tableColumn": "",
"targets": [
{
- "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
+ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -191,7 +192,7 @@
"tableColumn": "",
"targets": [
{
- "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -275,7 +276,7 @@
"tableColumn": "",
"targets": [
{
- "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -359,7 +360,7 @@
"tableColumn": "",
"targets": [
{
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -443,7 +444,7 @@
"tableColumn": "",
"targets": [
{
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -527,7 +528,7 @@
"tableColumn": "",
"targets": [
{
- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
+ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -609,7 +610,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
+ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -702,7 +703,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -808,7 +809,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -914,14 +915,14 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} pod",
"refId": "A"
},
{
- "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} worker",
@@ -1014,14 +1015,14 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} pod",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} worker",
@@ -1129,7 +1130,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1224,7 +1225,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1332,7 +1333,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1438,7 +1439,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}operation_type{{`}}`}}",
@@ -1531,7 +1532,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -1638,7 +1639,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1731,7 +1732,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1837,7 +1838,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1943,28 +1944,28 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@@ -2070,7 +2071,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}",
@@ -2176,7 +2177,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2269,7 +2270,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2362,7 +2363,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2482,7 +2483,7 @@
@@ -87,7 +88,7 @@
"pluginVersion": "7",
"targets": [
{
- "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
+ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -144,7 +145,7 @@
"pluginVersion": "7",
"targets": [
{
- "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -201,7 +202,7 @@
"pluginVersion": "7",
"targets": [
{
- "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -258,7 +259,7 @@
"pluginVersion": "7",
"targets": [
{
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -315,7 +316,7 @@
"pluginVersion": "7",
"targets": [
{
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -372,7 +373,7 @@
"pluginVersion": "7",
"targets": [
{
- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
+ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -431,7 +432,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
+ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -526,7 +527,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -621,7 +622,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -716,14 +717,14 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} pod",
"refId": "A"
},
{
- "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} worker",
@@ -818,14 +819,14 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} pod",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} worker",
@@ -922,7 +923,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1019,7 +1020,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
+ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1116,7 +1117,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}",
@@ -1211,7 +1212,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
+ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}operation_type{{`}}`}}",
@@ -1306,7 +1307,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}",
@@ -1402,7 +1403,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1497,7 +1498,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1592,7 +1593,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1687,28 +1688,28 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@@ -1803,7 +1804,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}",
@@ -1898,7 +1899,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1993,7 +1994,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2088,7 +2089,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -2177,7 +2178,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -2203,7 +2204,7 @@
"options": [
],
@ -280,7 +289,7 @@
"refresh": 2,
"regex": "",
"sort": 1,
@@ -2530,4 +2531,5 @@
@@ -2251,4 +2252,5 @@
"uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0
}

View File

@ -4,8 +4,17 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1301,7 +1301,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -4,8 +4,17 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1541,7 +1541,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,8 +4,17 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1033,7 +1033,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -10,7 +10,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -10,7 +10,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }}
annotations:
@ -32,68 +32,68 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A"
},
{
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B"
},
{
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C"
},
{
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
@@ -290,28 +295,28 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume",
"legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
@@ -417,28 +422,28 @@
"steppedLine": false,
"targets": [
@ -172,6 +172,15 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1023,7 +1028,7 @@
"options": [
],
- "query": "label_values(up{job=\"kube-scheduler\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1049,7 +1054,7 @@
"options": [

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "statefulset" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }}
annotations:

View File

@ -22,9 +22,21 @@
- - --prometheus-config-reloader={{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}
+ - --prometheus-config-reloader={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.prometheusConfigReloaderImage.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloaderImage.tag }}
{{- end }}
- --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }}
@@ -130,16 +130,16 @@
- --config-reloader-cpu-request={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-cpu-limit={{ .Values.prometheusOperator.configReloaderCpu }}
@@ -81,9 +81,9 @@
- --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }}
{{- end }}
{{- if .Values.prometheusOperator.thanosImage.sha }}
- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
+ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
{{- else }}
- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
+ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
{{- end }}
{{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }}
- --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }}
@@ -137,16 +137,16 @@
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
{{- end }}

View File

@ -1,15 +1,24 @@
--- charts-original/templates/prometheus/prometheus.yaml
+++ charts/templates/prometheus/prometheus.yaml
@@ -32,7 +32,7 @@
{{ toYaml .Values.prometheus.prometheusSpec.apiserverConfig | indent 4}}
@@ -33,13 +33,13 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.image }}
- image: {{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}
+ image: {{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}
{{- if and .Values.prometheus.prometheusSpec.image.tag .Values.prometheus.prometheusSpec.image.sha }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
{{- else if .Values.prometheus.prometheusSpec.image.sha }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}"
{{- else if .Values.prometheus.prometheusSpec.image.tag }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}"
{{- else }}
- image: "{{ .Values.prometheus.prometheusSpec.image.repository }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.image.repository }}"
{{- end }}
version: {{ .Values.prometheus.prometheusSpec.image.tag }}
{{- if .Values.prometheus.prometheusSpec.image.sha }}
sha: {{ .Values.prometheus.prometheusSpec.image.sha }}
@@ -56,11 +56,13 @@
@@ -64,11 +64,13 @@
externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}"
{{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }}
externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}"
@ -24,7 +33,7 @@
{{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }}
{{- end }}
paused: {{ .Values.prometheus.prometheusSpec.paused }}
@@ -232,8 +234,8 @@
@@ -244,8 +246,8 @@
- {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]}
{{- end }}
{{- end }}
@ -34,7 +43,7 @@
{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }}
@@ -266,7 +268,7 @@
@@ -284,7 +286,7 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.containers }}
containers:
@ -43,7 +52,7 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.initContainers }}
initContainers:
@@ -282,6 +284,7 @@
@@ -300,6 +302,7 @@
{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
{{- end }}
@ -51,7 +60,7 @@
portName: {{ .Values.prometheus.prometheusSpec.portName }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.volumes }}
@@ -326,3 +329,4 @@
@@ -356,3 +359,4 @@
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
{{- end }}

View File

@ -0,0 +1,18 @@
--- charts-original/templates/prometheus/rules-1.14/alertmanager.rules.yaml
+++ charts/templates/prometheus/rules-1.14/alertmanager.rules.yaml
@@ -7,6 +7,7 @@
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
+{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
@@ -172,4 +173,5 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}

View File

@ -4,12 +4,12 @@
rules:
- expr: |-
sum by (cluster, namespace, pod, container) (
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
- irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
+ irate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
- expr: |-
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}

View File

@ -1,18 +1,25 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml
@@ -31,9 +31,9 @@
@@ -31,13 +31,12 @@
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: |-
- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
- (
- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
- ) < 0.03
+ kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
/
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ /
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
< 0.03
+ < 0.03
and
- kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
for: 1m
labels:
@@ -48,12 +48,12 @@
severity: critical
@@ -51,14 +50,14 @@
summary: PersistentVolume is filling up.
expr: |-
(
@ -23,6 +30,9 @@
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
) < 0.15
and
- kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
and
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
for: 1h

View File

@ -463,7 +463,7 @@
pspEnabled: true
pspAnnotations: {}
## Specify pod annotations
@@ -187,25 +625,76 @@
@@ -187,25 +625,77 @@
## ref: https://prometheus.io/docs/alerting/notifications/
## https://prometheus.io/docs/alerting/notification_examples/
##
@ -477,7 +477,7 @@
- # {{- $root := . -}}
- # {{ range .Alerts }}
- # *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
- # *Cluster:* {{ template "cluster" $root }}
- # *Cluster:* {{ template "cluster" $root }}
- # *Description:* {{ .Annotations.description }}
- # *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
- # *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:>
@ -486,6 +486,7 @@
- # {{ end }}
- # {{ end }}
- # {{ end }}
+
+ templateFiles:
+ rancher_defaults.tmpl: |-
+ {{- define "slack.rancher.text" -}}
@ -559,7 +560,7 @@
ingress:
enabled: false
@@ -395,7 +884,7 @@
@@ -397,7 +887,7 @@
## Image of Alertmanager
##
image:
@ -568,7 +569,7 @@
tag: v0.22.2
sha: ""
@@ -507,9 +996,13 @@
@@ -509,9 +999,13 @@
## Define resources requests and limits for single Pods.
## ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
@ -585,7 +586,7 @@
## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
@@ -613,6 +1106,30 @@
@@ -625,6 +1119,30 @@
enabled: true
namespaceOverride: ""
@ -616,7 +617,7 @@
## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled
##
forceDeployDatasources: false
@@ -625,6 +1142,18 @@
@@ -637,6 +1155,18 @@
##
defaultDashboardsEnabled: true
@ -632,10 +633,10 @@
+ # Ignore if useExistingNamespace is true
+ cleanupOnUninstall: false
+
adminPassword: prom-operator
ingress:
@@ -664,6 +1193,7 @@
## Timezone for the default dashboards
## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg
##
@@ -681,6 +1211,7 @@
dashboards:
enabled: true
label: grafana_dashboard
@ -643,7 +644,7 @@
## Annotations for Grafana dashboard configmaps
##
@@ -716,7 +1246,60 @@
@@ -739,7 +1270,60 @@
## Passed to grafana subchart and used by servicemonitor below
##
service:
@ -705,11 +706,10 @@
## If true, create a serviceMonitor for grafana
##
@@ -746,6 +1329,14 @@
# targetLabel: nodename
@@ -773,6 +1357,17 @@
# replacement: $1
# action: replace
+
+ resources:
+ limits:
+ memory: 200Mi
@ -717,10 +717,14 @@
+ requests:
+ memory: 100Mi
+ cpu: 100m
+
+ testFramework:
+ enabled: false
+
## Component scraping the kube api server
##
@@ -907,7 +1498,7 @@
kubeApiServer:
@@ -952,7 +1547,7 @@
## Component scraping the kube controller manager
##
kubeControllerManager:
@ -729,7 +733,7 @@
## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
##
@@ -1054,7 +1645,7 @@
@@ -1110,7 +1705,7 @@
## Component scraping etcd
##
kubeEtcd:
@ -738,7 +742,7 @@
## If your etcd is not deployed as a pod, specify IPs it can be found on
##
@@ -1119,7 +1710,7 @@
@@ -1177,7 +1772,7 @@
## Component scraping kube scheduler
##
kubeScheduler:
@ -747,7 +751,7 @@
## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
##
@@ -1177,7 +1768,7 @@
@@ -1237,7 +1832,7 @@
## Component scraping kube proxy
##
kubeProxy:
@ -756,7 +760,7 @@
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
##
@@ -1266,6 +1857,13 @@
@@ -1337,6 +1932,13 @@
create: true
podSecurityPolicy:
enabled: true
@ -770,7 +774,7 @@
## Deploy node exporter as a daemonset to all nodes
##
@@ -1319,6 +1917,16 @@
@@ -1392,6 +1994,16 @@
extraArgs:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@ -787,7 +791,7 @@
## Manages Prometheus and Alertmanager components
##
@@ -1331,8 +1939,8 @@
@@ -1404,8 +2016,8 @@
enabled: true
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
tlsMinVersion: VersionTLS13
@ -798,16 +802,19 @@
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
## rules from making their way into prometheus and potentially preventing the container from starting
@@ -1349,7 +1957,7 @@
@@ -1422,9 +2034,9 @@
patch:
enabled: true
image:
- repository: jettech/kube-webhook-certgen
+ repository: rancher/mirrored-jettech-kube-webhook-certgen
tag: v1.5.2
sha: ""
- repository: k8s.gcr.io/ingress-nginx/kube-webhook-certgen
+ repository: rancher/mirrored-ingress-nginx-kube-webhook-certgen
tag: v1.0
- sha: "f3b6b39a6062328c095337b4cadcefd1612348fdd5190b1dcbcb9b9e90bd8068"
+ sha: ""
pullPolicy: IfNotPresent
@@ -1498,13 +2106,13 @@
resources: {}
## Provide a priority class name to the webhook patching job
@@ -1571,13 +2183,13 @@
## Resource limits & requests
##
@ -828,25 +835,34 @@
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
@@ -1557,7 +2165,7 @@
@@ -1630,7 +2242,7 @@
## Prometheus-operator image
##
image:
- repository: quay.io/prometheus-operator/prometheus-operator
+ repository: rancher/mirrored-prometheus-operator-prometheus-operator
tag: v0.48.0
tag: v0.50.0
sha: ""
pullPolicy: IfNotPresent
@@ -1573,7 +2181,7 @@
@@ -1646,7 +2258,7 @@
## Prometheus-config-reloader image to use for config and rule reloading
##
prometheusConfigReloaderImage:
- repository: quay.io/prometheus-operator/prometheus-config-reloader
+ repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader
tag: v0.48.0
tag: v0.50.0
sha: ""
@@ -1659,7 +2267,7 @@
@@ -1661,7 +2273,7 @@
## Thanos side-car image when configured
##
thanosImage:
- repository: quay.io/thanos/thanos
+ repository: rancher/mirrored-thanos-thanos
tag: v0.17.2
sha: ""
@@ -1781,7 +2393,7 @@
port: 9090
## To be used with a proxy extraContainer port
@ -855,28 +871,16 @@
## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
@@ -1916,7 +2524,7 @@
@@ -2054,7 +2666,7 @@
## Image of Prometheus.
##
image:
- repository: quay.io/prometheus/prometheus
+ repository: rancher/mirrored-prometheus-prometheus
tag: v2.27.1
tag: v2.28.1
sha: ""
@@ -1979,6 +2587,11 @@
##
externalUrl: ""
+ ## Ignore NamespaceSelector settings from the PodMonitor and ServiceMonitor configs
+ ## If true, PodMonitors and ServiceMonitors can only discover Pods and Services within the namespace they are deployed into
+ ##
+ ignoreNamespaceSelectors: false
+
## Define which Nodes the Pods are scheduled on.
## ref: https://kubernetes.io/docs/user-guide/node-selection/
##
@@ -2011,7 +2624,7 @@
@@ -2149,7 +2761,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the PrometheusRule resources created
##
@ -885,7 +889,7 @@
## PrometheusRules to be selected for target discovery.
## If {}, select all PrometheusRules
@@ -2036,7 +2649,7 @@
@@ -2174,7 +2786,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the servicemonitors created
##
@ -894,7 +898,7 @@
## ServiceMonitors to be selected for target discovery.
## If {}, select all ServiceMonitors
@@ -2059,7 +2672,7 @@
@@ -2197,7 +2809,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the podmonitors created
##
@ -903,7 +907,7 @@
## PodMonitors to be selected for target discovery.
## If {}, select all PodMonitors
@@ -2190,9 +2803,13 @@
@@ -2328,9 +2940,13 @@
## Resource limits & requests
##
@ -912,7 +916,7 @@
- # memory: 400Mi
+ resources:
+ limits:
+ memory: 1500Mi
+ memory: 3000Mi
+ cpu: 1000m
+ requests:
+ memory: 750Mi
@ -920,7 +924,7 @@
## Prometheus StorageSpec for persistent data
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
@@ -2215,7 +2832,13 @@
@@ -2353,7 +2969,13 @@
# medium: Memory
# Additional volumes on the output StatefulSet definition.
@ -935,9 +939,9 @@
# Additional VolumeMounts on the output StatefulSet definition.
volumeMounts: []
@@ -2322,9 +2945,34 @@
##
thanos: {}
@@ -2475,9 +3097,34 @@
# fileName: "objstore.yaml"
# objectStorageConfigFile: /var/secrets/object-store.yaml
+ proxy:
+ image:
@ -945,7 +949,7 @@
+ tag: 1.21.1-alpine
+
## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod.
## if using proxy extraContainer update targetPort with proxy container port
## if using proxy extraContainer update targetPort with proxy container port
- containers: []
+ containers: |
+ - name: prometheus-proxy
@ -971,7 +975,7 @@
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
## (permissions, dir tree) on mounted volumes before starting prometheus
@@ -2332,7 +2980,7 @@
@@ -2485,7 +3132,7 @@
## PortName to use for Prometheus.
##

View File

@ -1,10 +1,11 @@
url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/kube-prometheus-stack
commit: ba91bdb2d79ca4419cf72078f5f4bfcc426d4599
version: 100.0.1
commit: c6208979d494156a3869d2e5faab669ce4301c68
version: 100.1.0
additionalCharts:
- workingDir: charts-crd
crdOptions:
templateDirectory: crd-template
crdDirectory: crd-manifest
addCRDValidationToMainChart: true
useTarArchive: true

View File

@ -49,7 +49,9 @@ spec:
- -c
- >
echo "Applying CRDs...";
kubectl apply -f /etc/config/crd-manifest.yaml;
mkdir -p /etc/crd;
base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd;
kubectl apply -Rf /etc/crd;
echo "Waiting for CRDs to be recognized before finishing installation...";
@ -68,7 +70,13 @@ spec:
mountPath: /etc/config
restartPolicy: OnFailure
nodeSelector: {{ include "linux-node-selector" . | nindent 8 }}
{{- if .Values.nodeSelector }}
{{- toYaml .Values.nodeSelector | nindent 8 }}
{{- end }}
tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}
{{- if .Values.tolerations }}
{{- toYaml .Values.tolerations | nindent 8 }}
{{- end }}
volumes:
- name: crd-manifest
configMap:
@ -121,14 +129,23 @@ spec:
- /bin/sh
- -c
- >
kubectl delete -f /etc/config/crd-manifest.yaml
echo "Deleting CRDs...";
mkdir -p /etc/crd;
base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd;
kubectl delete -Rf /etc/crd;
volumeMounts:
- name: crd-manifest
readOnly: true
mountPath: /etc/config
restartPolicy: OnFailure
nodeSelector: {{ include "linux-node-selector" . | nindent 8 }}
{{- if .Values.nodeSelector }}
{{- toYaml .Values.nodeSelector | nindent 8 }}
{{- end }}
tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}
{{- if .Values.tolerations }}
{{- toYaml .Values.tolerations | nindent 8 }}
{{- end }}
volumes:
- name: crd-manifest
configMap:

View File

@ -4,11 +4,5 @@ metadata:
name: {{ .Chart.Name }}-manifest
namespace: {{ .Release.Namespace }}
data:
crd-manifest.yaml: |
{{- $currentScope := . -}}
{{- $crds := (.Files.Glob "crd-manifest/**.yaml") -}}
{{- range $path, $_ := $crds -}}
{{- with $currentScope -}}
{{ .Files.Get $path | nindent 4 }}
---
{{- end -}}{{- end -}}
crd-manifest.tgz.b64:
{{- .Files.Get "files/crd-manifest.tgz" | b64enc | indent 4 }}

View File

@ -9,3 +9,7 @@ global:
image:
repository: rancher/shell
tag: v0.1.8
nodeSelector: {}
tolerations: []

View File

@ -0,0 +1,3 @@
service:
targetPort: 9102
port: 9102

View File

@ -7,11 +7,16 @@
+ catalog.rancher.io/release-name: rancher-node-exporter
+ catalog.cattle.io/hidden: "true"
+ catalog.cattle.io/os: linux
apiVersion: v1
appVersion: 1.1.2
apiVersion: v2
appVersion: 1.2.2
description: A Helm chart for prometheus node-exporter
-name: prometheus-node-exporter
+name: rancher-node-exporter
version: 1.18.1
version: 2.2.0
type: application
home: https://github.com/prometheus/node_exporter/
sources:
@@ -16,3 +22,4 @@
name: gianrubio
- name: vsliouniaev
- name: bismarck
+

View File

@ -1,6 +1,6 @@
--- charts-original/templates/daemonset.yaml
+++ charts/templates/daemonset.yaml
@@ -35,7 +35,7 @@
@@ -36,7 +36,7 @@
{{- end }}
containers:
- name: node-exporter
@ -9,7 +9,7 @@
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- --path.procfs=/host/proc
@@ -133,18 +133,18 @@
@@ -134,18 +134,18 @@
affinity:
{{ toYaml .Values.affinity | indent 8 }}
{{- end }}

View File

@ -12,10 +12,10 @@
image:
- repository: quay.io/prometheus/node-exporter
+ repository: rancher/mirrored-prometheus-node-exporter
tag: v1.1.2
tag: v1.2.2
pullPolicy: IfNotPresent
@@ -126,6 +131,8 @@
@@ -135,6 +140,8 @@
tolerations:
- effect: NoSchedule
operator: Exists

View File

@ -1,4 +1,5 @@
url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/prometheus-node-exporter
commit: a05f7b8888d6174827b815aa097d64b94f00af3e
commit: cd13facd9c6042f7f15978024572cf297fbac6c0
version: 100.0.0
doNotRelease: true

View File

@ -0,0 +1,9 @@
rules:
external:
- seriesQuery: '{__name__=~"^some_metric_count$"}'
resources:
template: <<.Resource>>
name:
matches: ""
as: "my_custom_metric"
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)

View File

@ -10,6 +10,6 @@
apiVersion: v1
-name: prometheus-adapter
+name: rancher-prometheus-adapter
version: 2.14.0
appVersion: v0.8.4
version: 2.17.0
appVersion: v0.9.0
description: A Helm chart for k8s prometheus adapter

View File

@ -1,6 +1,6 @@
--- charts-original/templates/deployment.yaml
+++ charts/templates/deployment.yaml
@@ -40,7 +40,7 @@
@@ -41,7 +41,7 @@
{{- end}}
containers:
- name: {{ .Chart.Name }}
@ -9,7 +9,7 @@
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- /adapter
@@ -102,13 +102,17 @@
@@ -105,8 +105,10 @@
name: volume-serving-cert
readOnly: true
{{- end }}
@ -22,6 +22,10 @@
affinity:
{{- toYaml .Values.affinity | nindent 8 }}
priorityClassName: {{ .Values.priorityClassName }}
@@ -114,8 +116,10 @@
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- end }}
- tolerations:
- {{- toYaml .Values.tolerations | nindent 8 }}
+ tolerations: {{ include "linux-node-tolerations" . | nindent 8 }}

View File

@ -9,8 +9,8 @@
affinity: {}
image:
- repository: directxman12/k8s-prometheus-adapter-amd64
+ repository: rancher/mirrored-directxman12-k8s-prometheus-adapter
tag: v0.8.4
- repository: k8s.gcr.io/prometheus-adapter/prometheus-adapter
+ repository: rancher/mirrored-prometheus-adapter-prometheus-adapter
tag: v0.9.0
pullPolicy: IfNotPresent

View File

@ -1,4 +1,5 @@
url: https://github.com/prometheus-community/helm-charts.git
subdirectory: charts/prometheus-adapter
commit: 50b719af447594abbae7beeb4d6458a19f8e9689
commit: 843d2f3d302ca24d745abf539a86ec680ae4f8c0
version: 100.0.0
doNotRelease: true

View File

@ -1,5 +1,5 @@
apiVersion: v1
version: 0.1.4
version: 0.1.5
appVersion: 0.1.0
annotations:
catalog.rancher.io/certified: rancher

View File

@ -82,7 +82,6 @@ k8s-app: {{ template "pushProxy.proxy.name" . }}
{{- define "pushProxy.serviceMonitor.labels" -}}
app: {{ template "pushprox.serviceMonitor.name" . }}
release: {{ .Release.Name | quote }}
{{ template "pushProxy.commonLabels" . }}
{{- end -}}

View File

@ -1,2 +1,2 @@
url: local
version: 100.0.0
version: 100.0.1

View File

@ -25,7 +25,7 @@ clients:
port: 9796
image:
repository: rancher/windows_exporter-package
tag: v0.0.2
tag: v0.0.3
os: "windows"
# Specify the IP addresses of nodes that you want to collect metrics from

Some files were not shown because too many files have changed in this diff Show More