(dev-v2.6-archive) Rebase to ea0e187

(partially cherry picked from commit 0b9eff2180)
pull/1680/head
Jiaqi Luo 2021-10-11 21:52:21 -07:00 committed by Arvind Iyengar
parent 2d3f6561ba
commit c314e527b4
No known key found for this signature in database
GPG Key ID: A8DD9BFD6C811498
76 changed files with 29802 additions and 296 deletions

View File

@ -0,0 +1,610 @@
{{- /*
Generated from 'alertmanager-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
metadata:
namespace: {{ template "kube-prometheus-stack.namespace" . }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
labels:
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
data:
alertmanager-overview.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [
],
"refresh": "30s",
"rows": [
{
"collapse": false,
"collapsed": false,
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(alertmanager_alerts{namespace=\"$namespace\",service=\"$service\"}) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Alerts",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(alertmanager_alerts_received_total{namespace=\"$namespace\",service=\"$service\"}[5m])) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Received",
"refId": "A"
},
{
"expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=\"$namespace\",service=\"$service\"}[5m])) by (namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Invalid",
"refId": "B"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Alerts receive rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Alerts",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": "integration",
"seriesOverrides": [
],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(alertmanager_notifications_total{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (integration,namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Total",
"refId": "A"
},
{
"expr": "sum(rate(alertmanager_notifications_failed_total{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (integration,namespace,service,instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Failed",
"refId": "B"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "$integration: Notifications Send Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "ops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": false,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": "integration",
"seriesOverrides": [
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (le,namespace,service,instance)\n) \n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} 99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (le,namespace,service,instance)\n) \n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Median",
"refId": "B"
},
{
"expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (namespace,service,instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} Average",
"refId": "C"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "$integration: Notification Duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Notifications",
"titleSize": "h6",
"type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"alertmanager-mixin"
],
"templating": {
"list": [
{
"current": {
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [
],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "namespace",
"options": [
],
"query": "label_values(alertmanager_alerts, namespace)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "service",
"options": [
],
"query": "label_values(alertmanager_alerts, service)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "all",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 2,
"includeAll": true,
"label": null,
"multi": false,
"name": "integration",
"options": [
],
"query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "Alertmanager / Overview",
"uid": "alertmanager-overview",
"version": 0
}
{{- end }}

View File

@ -0,0 +1,328 @@
{{- /*
Generated from 'kube-apiserver-burnrate.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-burnrate.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-apiserver-burnrate.rules
rules:
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[1d]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[1d]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[1d]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
labels:
verb: read
record: apiserver_request:burnrate1d
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[1h]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[1h]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[1h]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
labels:
verb: read
record: apiserver_request:burnrate1h
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[2h]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[2h]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[2h]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
labels:
verb: read
record: apiserver_request:burnrate2h
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[30m]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[30m]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[30m]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
labels:
verb: read
record: apiserver_request:burnrate30m
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[3d]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[3d]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[3d]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
labels:
verb: read
record: apiserver_request:burnrate3d
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[5m]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[5m]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[5m]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
record: apiserver_request:burnrate5m
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
-
(
(
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[6h]))
or
vector(0)
)
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[6h]))
+
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[6h]))
)
)
+
# errors
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
labels:
verb: read
record: apiserver_request:burnrate6h
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
labels:
verb: write
record: apiserver_request:burnrate1d
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
labels:
verb: write
record: apiserver_request:burnrate1h
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
labels:
verb: write
record: apiserver_request:burnrate2h
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
labels:
verb: write
record: apiserver_request:burnrate30m
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
labels:
verb: write
record: apiserver_request:burnrate3d
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
record: apiserver_request:burnrate5m
- expr: |-
(
(
# too slow
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
-
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
)
+
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
)
/
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
labels:
verb: write
record: apiserver_request:burnrate6h
{{- end }}

View File

@ -0,0 +1,49 @@
{{- /*
Generated from 'kube-apiserver-histogram.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-histogram.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kube-apiserver-histogram.rules
rules:
- expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
labels:
quantile: '0.99'
verb: read
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
labels:
quantile: '0.99'
verb: write
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
quantile: '0.99'
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
quantile: '0.9'
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
quantile: '0.5'
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
{{- end }}

View File

@ -24,8 +24,8 @@
+sources:
+ - https://github.com/prometheus-community/helm-charts
+ - https://github.com/prometheus-operator/kube-prometheus
+version: 16.6.0
+appVersion: 0.48.0
+version: 17.0.0
+appVersion: 0.49.0
+kubeVersion: ">=1.16.0-0"
+home: https://github.com/prometheus-operator/kube-prometheus
+keywords:

View File

@ -1,6 +1,28 @@
--- charts-original/README.md
+++ charts/README.md
@@ -193,7 +193,39 @@
@@ -83,6 +83,21 @@
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
+### From 16.x to 17.x
+Version 15 upgrades prometheus-operator from 0.48.x to 0.49.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating:
+
+```console
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
+```
+
+
### From 15.x to 16.x
Version 16 upgrades kube-state-metrics to v2.0.0. This includes changed command-line arguments and removed metrics, see this [blog post](https://kubernetes.io/blog/2021/04/13/kube-state-metrics-v-2-0/). This version also removes Grafana dashboards that supported Kubernetes 1.14 or earlier.
@@ -193,7 +208,39 @@
helm show values prometheus-community/kube-prometheus-stack
```

View File

@ -0,0 +1,460 @@
--- charts-original/crds/crd-podmonitors.yaml
+++ charts/crds/crd-podmonitors.yaml
@@ -1,4 +1,4 @@
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1
@@ -25,24 +25,31 @@
description: PodMonitor defines monitoring for a set of pods.
properties:
apiVersion:
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ description: 'APIVersion defines the versioned schema of this representation
+ of an object. Servers should convert recognized schemas to the latest
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ description: 'Kind is a string value representing the REST resource this
+ object represents. Servers may infer this from the endpoint the client
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
- description: Specification of desired Pod selection for target discovery by Prometheus.
+ description: Specification of desired Pod selection for target discovery
+ by Prometheus.
properties:
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
- description: Selector to select which namespaces the Endpoints objects are discovered from.
+ description: Selector to select which namespaces the Endpoints objects
+ are discovered from.
properties:
any:
- description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
+ description: Boolean describing whether all namespaces are selected
+ in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
@@ -53,94 +60,126 @@
podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor.
items:
- description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics.
+ description: PodMetricsEndpoint defines a scrapeable endpoint of
+ a Kubernetes Pod serving Prometheus metrics.
properties:
basicAuth:
- description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
+ description: 'BasicAuth allow an endpoint to authenticate over
+ basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
properties:
password:
- description: The secret in the service monitor namespace that contains the password for authentication.
+ description: The secret in the service monitor namespace
+ that contains the password for authentication.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
type: object
username:
- description: The secret in the service monitor namespace that contains the username for authentication.
+ description: The secret in the service monitor namespace
+ that contains the username for authentication.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
type: object
type: object
bearerTokenSecret:
- description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator.
+ description: Secret to mount to read bearer token for scraping
+ targets. The secret needs to be in the same namespace as the
+ pod monitor and accessible by the Prometheus Operator.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
type: object
honorLabels:
- description: HonorLabels chooses the metric's labels on collisions with target labels.
+ description: HonorLabels chooses the metric's labels on collisions
+ with target labels.
type: boolean
honorTimestamps:
- description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
+ description: HonorTimestamps controls whether Prometheus respects
+ the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
- description: MetricRelabelConfigs to apply to samples before ingestion.
+ description: MetricRelabelConfigs to apply to samples before
+ ingestion.
items:
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
+ description: 'RelabelConfig allows dynamic rewriting of the
+ label set, being applied to samples before ingestion. It
+ defines `<metric_relabel_configs>`-section of Prometheus
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
- description: Action to perform based on regex matching. Default is 'replace'
+ description: Action to perform based on regex matching.
+ Default is 'replace'
type: string
modulus:
- description: Modulus to take of the hash of the source label values.
+ description: Modulus to take of the hash of the source
+ label values.
format: int64
type: integer
regex:
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
+ description: Regular expression against which the extracted
+ value is matched. Default is '(.*)'
type: string
replacement:
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
+ description: Replacement value against which a regex replace
+ is performed if the regular expression matches. Regex
+ capture groups are available. Default is '$1'
type: string
separator:
- description: Separator placed between concatenated source label values. default is ';'.
+ description: Separator placed between concatenated source
+ label values. default is ';'.
type: string
sourceLabels:
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
+ description: The source labels select values from existing
+ labels. Their content is concatenated using the configured
+ separator and matched against the configured regular
+ expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
+ description: Label to which the resulting value is written
+ in a replace action. It is mandatory for replace actions.
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -155,39 +194,58 @@
description: HTTP path to scrape for metrics.
type: string
port:
- description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort.
+ description: Name of the pod port this endpoint refers to. Mutually
+ exclusive with targetPort.
type: string
proxyUrl:
- description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
+ description: ProxyURL eg http://proxyserver:2195 Directs scrapes
+ to proxy through this endpoint.
type: string
relabelings:
- description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields and replaces original scrape job name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
+ description: 'RelabelConfigs to apply to samples before scraping.
+ Prometheus Operator automatically adds relabelings for a few
+ standard Kubernetes fields and replaces original scrape job
+ name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
+ description: 'RelabelConfig allows dynamic rewriting of the
+ label set, being applied to samples before ingestion. It
+ defines `<metric_relabel_configs>`-section of Prometheus
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
- description: Action to perform based on regex matching. Default is 'replace'
+ description: Action to perform based on regex matching.
+ Default is 'replace'
type: string
modulus:
- description: Modulus to take of the hash of the source label values.
+ description: Modulus to take of the hash of the source
+ label values.
format: int64
type: integer
regex:
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
+ description: Regular expression against which the extracted
+ value is matched. Default is '(.*)'
type: string
replacement:
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
+ description: Replacement value against which a regex replace
+ is performed if the regular expression matches. Regex
+ capture groups are available. Default is '$1'
type: string
separator:
- description: Separator placed between concatenated source label values. default is ';'.
+ description: Separator placed between concatenated source
+ label values. default is ';'.
type: string
sourceLabels:
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
+ description: The source labels select values from existing
+ labels. Their content is concatenated using the configured
+ separator and matched against the configured regular
+ expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
+ description: Label to which the resulting value is written
+ in a replace action. It is mandatory for replace actions.
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -207,19 +265,24 @@
description: TLS configuration to use when scraping the endpoint.
properties:
ca:
- description: Struct containing the CA cert to use for the targets.
+ description: Struct containing the CA cert to use for the
+ targets.
properties:
configMap:
- description: ConfigMap containing data to use for the targets.
+ description: ConfigMap containing data to use for the
+ targets.
properties:
key:
description: The key to select.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the ConfigMap or its key must be defined
+ description: Specify whether the ConfigMap or its
+ key must be defined
type: boolean
required:
- key
@@ -228,32 +291,41 @@
description: Secret containing data to use for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key
+ must be defined
type: boolean
required:
- key
type: object
type: object
cert:
- description: Struct containing the client cert file for the targets.
+ description: Struct containing the client cert file for
+ the targets.
properties:
configMap:
- description: ConfigMap containing data to use for the targets.
+ description: ConfigMap containing data to use for the
+ targets.
properties:
key:
description: The key to select.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the ConfigMap or its key must be defined
+ description: Specify whether the ConfigMap or its
+ key must be defined
type: boolean
required:
- key
@@ -262,13 +334,17 @@
description: Secret containing data to use for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key
+ must be defined
type: boolean
required:
- key
@@ -278,16 +354,20 @@
description: Disable target certificate validation.
type: boolean
keySecret:
- description: Secret containing the client key file for the targets.
+ description: Secret containing the client key file for the
+ targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
@@ -299,30 +379,42 @@
type: object
type: array
podTargetLabels:
- description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
+ description: PodTargetLabels transfers labels on the Kubernetes Pod
+ onto the target.
items:
type: string
type: array
sampleLimit:
- description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
+ description: SampleLimit defines per-scrape limit on number of scraped
+ samples that will be accepted.
format: int64
type: integer
selector:
description: Selector to select Pod objects.
properties:
matchExpressions:
- description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
+ description: matchExpressions is a list of label selector requirements.
+ The requirements are ANDed.
items:
- description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
+ description: A label selector requirement is a selector that
+ contains values, a key, and an operator that relates the key
+ and values.
properties:
key:
- description: key is the label key that the selector applies to.
+ description: key is the label key that the selector applies
+ to.
type: string
operator:
- description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
+ description: operator represents a key's relationship to
+ a set of values. Valid operators are In, NotIn, Exists
+ and DoesNotExist.
type: string
values:
- description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
+ description: values is an array of string values. If the
+ operator is In or NotIn, the values array must be non-empty.
+ If the operator is Exists or DoesNotExist, the values
+ array must be empty. This array is replaced during a strategic
+ merge patch.
items:
type: string
type: array
@@ -334,11 +426,16 @@
matchLabels:
additionalProperties:
type: string
- description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
+ description: matchLabels is a map of {key,value} pairs. A single
+ {key,value} in the matchLabels map is equivalent to an element
+ of matchExpressions, whose key field is "key", the operator
+ is "In", and the values array contains only "value". The requirements
+ are ANDed.
type: object
type: object
targetLimit:
- description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
+ description: TargetLimit defines a limit on the number of scraped
+ targets that will be accepted.
format: int64
type: integer
required:

View File

@ -0,0 +1,425 @@
--- charts-original/crds/crd-probes.yaml
+++ charts/crds/crd-probes.yaml
@@ -1,4 +1,4 @@
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
---
apiVersion: apiextensions.k8s.io/v1
@@ -25,58 +25,76 @@
description: Probe defines monitoring for a set of static targets or ingresses.
properties:
apiVersion:
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ description: 'APIVersion defines the versioned schema of this representation
+ of an object. Servers should convert recognized schemas to the latest
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ description: 'Kind is a string value representing the REST resource this
+ object represents. Servers may infer this from the endpoint the client
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
- description: Specification of desired Ingress selection for target discovery by Prometheus.
+ description: Specification of desired Ingress selection for target discovery
+ by Prometheus.
properties:
basicAuth:
- description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
+ description: 'BasicAuth allow an endpoint to authenticate over basic
+ authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
properties:
password:
- description: The secret in the service monitor namespace that contains the password for authentication.
+ description: The secret in the service monitor namespace that
+ contains the password for authentication.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must be
+ a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must be
+ defined
type: boolean
required:
- key
type: object
username:
- description: The secret in the service monitor namespace that contains the username for authentication.
+ description: The secret in the service monitor namespace that
+ contains the username for authentication.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must be
+ a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must be
+ defined
type: boolean
required:
- key
type: object
type: object
bearerTokenSecret:
- description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the probe and accessible by the Prometheus Operator.
+ description: Secret to mount to read bearer token for scraping targets.
+ The secret needs to be in the same namespace as the probe and accessible
+ by the Prometheus Operator.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must be a
+ valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must be defined
@@ -85,20 +103,28 @@
- key
type: object
interval:
- description: Interval at which targets are probed using the configured prober. If not specified Prometheus' global scrape interval is used.
+ description: Interval at which targets are probed using the configured
+ prober. If not specified Prometheus' global scrape interval is used.
type: string
jobName:
description: The job name assigned to scraped metrics by default.
type: string
module:
- description: 'The module to use for probing specifying how to probe the target. Example module configuring in the blackbox exporter: https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
+ description: 'The module to use for probing specifying how to probe
+ the target. Example module configuring in the blackbox exporter:
+ https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
type: string
prober:
- description: Specification for the prober to use for probing targets. The prober.URL parameter is required. Targets cannot be probed if left empty.
+ description: Specification for the prober to use for probing targets.
+ The prober.URL parameter is required. Targets cannot be probed if
+ left empty.
properties:
path:
description: Path to collect metrics from. Defaults to `/probe`.
type: string
+ proxyUrl:
+ description: Optional ProxyURL.
+ type: string
scheme:
description: HTTP scheme to use for scraping. Defaults to `http`.
type: string
@@ -112,16 +138,19 @@
description: Timeout for scraping metrics from the Prometheus exporter.
type: string
targets:
- description: Targets defines a set of static and/or dynamically discovered targets to be probed using the prober.
+ description: Targets defines a set of static and/or dynamically discovered
+ targets to be probed using the prober.
properties:
ingress:
- description: Ingress defines the set of dynamically discovered ingress objects which hosts are considered for probing.
+ description: Ingress defines the set of dynamically discovered
+ ingress objects which hosts are considered for probing.
properties:
namespaceSelector:
description: Select Ingress objects by namespace.
properties:
any:
- description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
+ description: Boolean describing whether all namespaces
+ are selected in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
@@ -130,33 +159,48 @@
type: array
type: object
relabelingConfigs:
- description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
+ description: 'RelabelConfigs to apply to samples before ingestion.
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
+ description: 'RelabelConfig allows dynamic rewriting of
+ the label set, being applied to samples before ingestion.
+ It defines `<metric_relabel_configs>`-section of Prometheus
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
- description: Action to perform based on regex matching. Default is 'replace'
+ description: Action to perform based on regex matching.
+ Default is 'replace'
type: string
modulus:
- description: Modulus to take of the hash of the source label values.
+ description: Modulus to take of the hash of the source
+ label values.
format: int64
type: integer
regex:
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
+ description: Regular expression against which the extracted
+ value is matched. Default is '(.*)'
type: string
replacement:
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
+ description: Replacement value against which a regex
+ replace is performed if the regular expression matches.
+ Regex capture groups are available. Default is '$1'
type: string
separator:
- description: Separator placed between concatenated source label values. default is ';'.
+ description: Separator placed between concatenated source
+ label values. default is ';'.
type: string
sourceLabels:
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
+ description: The source labels select values from existing
+ labels. Their content is concatenated using the configured
+ separator and matched against the configured regular
+ expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
+ description: Label to which the resulting value is written
+ in a replace action. It is mandatory for replace actions.
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -164,18 +208,29 @@
description: Select Ingress objects by labels.
properties:
matchExpressions:
- description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
+ description: matchExpressions is a list of label selector
+ requirements. The requirements are ANDed.
items:
- description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
+ description: A label selector requirement is a selector
+ that contains values, a key, and an operator that
+ relates the key and values.
properties:
key:
- description: key is the label key that the selector applies to.
+ description: key is the label key that the selector
+ applies to.
type: string
operator:
- description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
+ description: operator represents a key's relationship
+ to a set of values. Valid operators are In, NotIn,
+ Exists and DoesNotExist.
type: string
values:
- description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
+ description: values is an array of string values.
+ If the operator is In or NotIn, the values array
+ must be non-empty. If the operator is Exists or
+ DoesNotExist, the values array must be empty.
+ This array is replaced during a strategic merge
+ patch.
items:
type: string
type: array
@@ -187,51 +242,73 @@
matchLabels:
additionalProperties:
type: string
- description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
+ description: matchLabels is a map of {key,value} pairs.
+ A single {key,value} in the matchLabels map is equivalent
+ to an element of matchExpressions, whose key field is
+ "key", the operator is "In", and the values array contains
+ only "value". The requirements are ANDed.
type: object
type: object
type: object
staticConfig:
- description: 'StaticConfig defines static targets which are considers for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
+ description: 'StaticConfig defines static targets which are considers
+ for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
properties:
labels:
additionalProperties:
type: string
- description: Labels assigned to all metrics scraped from the targets.
+ description: Labels assigned to all metrics scraped from the
+ targets.
type: object
relabelingConfigs:
- description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
+ description: 'RelabelConfigs to apply to samples before ingestion.
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
+ description: 'RelabelConfig allows dynamic rewriting of
+ the label set, being applied to samples before ingestion.
+ It defines `<metric_relabel_configs>`-section of Prometheus
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
- description: Action to perform based on regex matching. Default is 'replace'
+ description: Action to perform based on regex matching.
+ Default is 'replace'
type: string
modulus:
- description: Modulus to take of the hash of the source label values.
+ description: Modulus to take of the hash of the source
+ label values.
format: int64
type: integer
regex:
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
+ description: Regular expression against which the extracted
+ value is matched. Default is '(.*)'
type: string
replacement:
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
+ description: Replacement value against which a regex
+ replace is performed if the regular expression matches.
+ Regex capture groups are available. Default is '$1'
type: string
separator:
- description: Separator placed between concatenated source label values. default is ';'.
+ description: Separator placed between concatenated source
+ label values. default is ';'.
type: string
sourceLabels:
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
+ description: The source labels select values from existing
+ labels. Their content is concatenated using the configured
+ separator and matched against the configured regular
+ expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
+ description: Label to which the resulting value is written
+ in a replace action. It is mandatory for replace actions.
+ Regex capture groups are available.
type: string
type: object
type: array
static:
- description: Targets is a list of URLs to probe using the configured prober.
+ description: Targets is a list of URLs to probe using the
+ configured prober.
items:
type: string
type: array
@@ -250,10 +327,12 @@
description: The key to select.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the ConfigMap or its key must be defined
+ description: Specify whether the ConfigMap or its key
+ must be defined
type: boolean
required:
- key
@@ -262,13 +341,16 @@
description: Secret containing data to use for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
@@ -284,10 +366,12 @@
description: The key to select.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the ConfigMap or its key must be defined
+ description: Specify whether the ConfigMap or its key
+ must be defined
type: boolean
required:
- key
@@ -296,13 +380,16 @@
description: Secret containing data to use for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
@@ -315,13 +402,16 @@
description: Secret containing the client key file for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must be
+ a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must be
+ defined
type: boolean
required:
- key

View File

@ -0,0 +1,54 @@
--- charts-original/crds/crd-prometheusrules.yaml
+++ charts/crds/crd-prometheusrules.yaml
@@ -1,4 +1,4 @@
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
---
apiVersion: apiextensions.k8s.io/v1
@@ -20,13 +20,18 @@
- name: v1
schema:
openAPIV3Schema:
- description: PrometheusRule defines recording and alerting rules for a Prometheus instance
+ description: PrometheusRule defines recording and alerting rules for a Prometheus
+ instance
properties:
apiVersion:
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ description: 'APIVersion defines the versioned schema of this representation
+ of an object. Servers should convert recognized schemas to the latest
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ description: 'Kind is a string value representing the REST resource this
+ object represents. Servers may infer this from the endpoint the client
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
@@ -36,7 +41,10 @@
groups:
description: Content of Prometheus rule file
items:
- description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
+ description: 'RuleGroup is a list of sequentially evaluated recording
+ and alerting rules. Note: PartialResponseStrategy is only used
+ by ThanosRuler and will be ignored by Prometheus instances. Valid
+ values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
properties:
interval:
type: string
@@ -46,7 +54,10 @@
type: string
rules:
items:
- description: Rule describes an alerting or recording rule.
+ description: 'Rule describes an alerting or recording rule
+ See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
+ or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules)
+ rule'
properties:
alert:
type: string

View File

@ -0,0 +1,501 @@
--- charts-original/crds/crd-servicemonitors.yaml
+++ charts/crds/crd-servicemonitors.yaml
@@ -1,4 +1,4 @@
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
---
apiVersion: apiextensions.k8s.io/v1
@@ -25,50 +25,65 @@
description: ServiceMonitor defines monitoring for a set of services.
properties:
apiVersion:
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ description: 'APIVersion defines the versioned schema of this representation
+ of an object. Servers should convert recognized schemas to the latest
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ description: 'Kind is a string value representing the REST resource this
+ object represents. Servers may infer this from the endpoint the client
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
- description: Specification of desired Service selection for target discovery by Prometheus.
+ description: Specification of desired Service selection for target discovery
+ by Prometheus.
properties:
endpoints:
description: A list of endpoints allowed as part of this ServiceMonitor.
items:
- description: Endpoint defines a scrapeable endpoint serving Prometheus metrics.
+ description: Endpoint defines a scrapeable endpoint serving Prometheus
+ metrics.
properties:
basicAuth:
- description: 'BasicAuth allow an endpoint to authenticate over basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
+ description: 'BasicAuth allow an endpoint to authenticate over
+ basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
properties:
password:
- description: The secret in the service monitor namespace that contains the password for authentication.
+ description: The secret in the service monitor namespace
+ that contains the password for authentication.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
type: object
username:
- description: The secret in the service monitor namespace that contains the username for authentication.
+ description: The secret in the service monitor namespace
+ that contains the username for authentication.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
@@ -78,57 +93,79 @@
description: File to read bearer token for scraping targets.
type: string
bearerTokenSecret:
- description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the service monitor and accessible by the Prometheus Operator.
+ description: Secret to mount to read bearer token for scraping
+ targets. The secret needs to be in the same namespace as the
+ service monitor and accessible by the Prometheus Operator.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
type: object
honorLabels:
- description: HonorLabels chooses the metric's labels on collisions with target labels.
+ description: HonorLabels chooses the metric's labels on collisions
+ with target labels.
type: boolean
honorTimestamps:
- description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
+ description: HonorTimestamps controls whether Prometheus respects
+ the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
- description: MetricRelabelConfigs to apply to samples before ingestion.
+ description: MetricRelabelConfigs to apply to samples before
+ ingestion.
items:
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
+ description: 'RelabelConfig allows dynamic rewriting of the
+ label set, being applied to samples before ingestion. It
+ defines `<metric_relabel_configs>`-section of Prometheus
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
- description: Action to perform based on regex matching. Default is 'replace'
+ description: Action to perform based on regex matching.
+ Default is 'replace'
type: string
modulus:
- description: Modulus to take of the hash of the source label values.
+ description: Modulus to take of the hash of the source
+ label values.
format: int64
type: integer
regex:
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
+ description: Regular expression against which the extracted
+ value is matched. Default is '(.*)'
type: string
replacement:
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
+ description: Replacement value against which a regex replace
+ is performed if the regular expression matches. Regex
+ capture groups are available. Default is '$1'
type: string
separator:
- description: Separator placed between concatenated source label values. default is ';'.
+ description: Separator placed between concatenated source
+ label values. default is ';'.
type: string
sourceLabels:
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
+ description: The source labels select values from existing
+ labels. Their content is concatenated using the configured
+ separator and matched against the configured regular
+ expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
+ description: Label to which the resulting value is written
+ in a replace action. It is mandatory for replace actions.
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -143,39 +180,58 @@
description: HTTP path to scrape for metrics.
type: string
port:
- description: Name of the service port this endpoint refers to. Mutually exclusive with targetPort.
+ description: Name of the service port this endpoint refers to.
+ Mutually exclusive with targetPort.
type: string
proxyUrl:
- description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
+ description: ProxyURL eg http://proxyserver:2195 Directs scrapes
+ to proxy through this endpoint.
type: string
relabelings:
- description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields and replaces original scrape job name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
+ description: 'RelabelConfigs to apply to samples before scraping.
+ Prometheus Operator automatically adds relabelings for a few
+ standard Kubernetes fields and replaces original scrape job
+ name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
+ description: 'RelabelConfig allows dynamic rewriting of the
+ label set, being applied to samples before ingestion. It
+ defines `<metric_relabel_configs>`-section of Prometheus
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
- description: Action to perform based on regex matching. Default is 'replace'
+ description: Action to perform based on regex matching.
+ Default is 'replace'
type: string
modulus:
- description: Modulus to take of the hash of the source label values.
+ description: Modulus to take of the hash of the source
+ label values.
format: int64
type: integer
regex:
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
+ description: Regular expression against which the extracted
+ value is matched. Default is '(.*)'
type: string
replacement:
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
+ description: Replacement value against which a regex replace
+ is performed if the regular expression matches. Regex
+ capture groups are available. Default is '$1'
type: string
separator:
- description: Separator placed between concatenated source label values. default is ';'.
+ description: Separator placed between concatenated source
+ label values. default is ';'.
type: string
sourceLabels:
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
+ description: The source labels select values from existing
+ labels. Their content is concatenated using the configured
+ separator and matched against the configured regular
+ expression for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
+ description: Label to which the resulting value is written
+ in a replace action. It is mandatory for replace actions.
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -189,25 +245,32 @@
anyOf:
- type: integer
- type: string
- description: Name or number of the target port of the Pod behind the Service, the port must be specified with container port property. Mutually exclusive with port.
+ description: Name or number of the target port of the Pod behind
+ the Service, the port must be specified with container port
+ property. Mutually exclusive with port.
x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint
properties:
ca:
- description: Struct containing the CA cert to use for the targets.
+ description: Struct containing the CA cert to use for the
+ targets.
properties:
configMap:
- description: ConfigMap containing data to use for the targets.
+ description: ConfigMap containing data to use for the
+ targets.
properties:
key:
description: The key to select.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the ConfigMap or its key must be defined
+ description: Specify whether the ConfigMap or its
+ key must be defined
type: boolean
required:
- key
@@ -216,35 +279,45 @@
description: Secret containing data to use for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key
+ must be defined
type: boolean
required:
- key
type: object
type: object
caFile:
- description: Path to the CA cert in the Prometheus container to use for the targets.
+ description: Path to the CA cert in the Prometheus container
+ to use for the targets.
type: string
cert:
- description: Struct containing the client cert file for the targets.
+ description: Struct containing the client cert file for
+ the targets.
properties:
configMap:
- description: ConfigMap containing data to use for the targets.
+ description: ConfigMap containing data to use for the
+ targets.
properties:
key:
description: The key to select.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the ConfigMap or its key must be defined
+ description: Specify whether the ConfigMap or its
+ key must be defined
type: boolean
required:
- key
@@ -253,38 +326,48 @@
description: Secret containing data to use for the targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind,
+ uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key
+ must be defined
type: boolean
required:
- key
type: object
type: object
certFile:
- description: Path to the client cert file in the Prometheus container for the targets.
+ description: Path to the client cert file in the Prometheus
+ container for the targets.
type: string
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keyFile:
- description: Path to the client key file in the Prometheus container for the targets.
+ description: Path to the client key file in the Prometheus
+ container for the targets.
type: string
keySecret:
- description: Secret containing the client key file for the targets.
+ description: Secret containing the client key file for the
+ targets.
properties:
key:
- description: The key of the secret to select from. Must be a valid secret key.
+ description: The key of the secret to select from. Must
+ be a valid secret key.
type: string
name:
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
- description: Specify whether the Secret or its key must be defined
+ description: Specify whether the Secret or its key must
+ be defined
type: boolean
required:
- key
@@ -296,13 +379,18 @@
type: object
type: array
jobLabel:
- description: The label to use to retrieve the job name from.
+ description: "Chooses the label of the Kubernetes `Endpoints`. Its
+ value will be used for the `job`-label's value of the created metrics.
+ \n Default & fallback value: the name of the respective Kubernetes
+ `Endpoint`."
type: string
namespaceSelector:
- description: Selector to select which namespaces the Endpoints objects are discovered from.
+ description: Selector to select which namespaces the Kubernetes Endpoints
+ objects are discovered from.
properties:
any:
- description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
+ description: Boolean describing whether all namespaces are selected
+ in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
@@ -311,30 +399,42 @@
type: array
type: object
podTargetLabels:
- description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
+ description: PodTargetLabels transfers labels on the Kubernetes `Pod`
+ onto the created metrics.
items:
type: string
type: array
sampleLimit:
- description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
+ description: SampleLimit defines per-scrape limit on number of scraped
+ samples that will be accepted.
format: int64
type: integer
selector:
description: Selector to select Endpoints objects.
properties:
matchExpressions:
- description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
+ description: matchExpressions is a list of label selector requirements.
+ The requirements are ANDed.
items:
- description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
+ description: A label selector requirement is a selector that
+ contains values, a key, and an operator that relates the key
+ and values.
properties:
key:
- description: key is the label key that the selector applies to.
+ description: key is the label key that the selector applies
+ to.
type: string
operator:
- description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
+ description: operator represents a key's relationship to
+ a set of values. Valid operators are In, NotIn, Exists
+ and DoesNotExist.
type: string
values:
- description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
+ description: values is an array of string values. If the
+ operator is In or NotIn, the values array must be non-empty.
+ If the operator is Exists or DoesNotExist, the values
+ array must be empty. This array is replaced during a strategic
+ merge patch.
items:
type: string
type: array
@@ -346,16 +446,23 @@
matchLabels:
additionalProperties:
type: string
- description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
+ description: matchLabels is a map of {key,value} pairs. A single
+ {key,value} in the matchLabels map is equivalent to an element
+ of matchExpressions, whose key field is "key", the operator
+ is "In", and the values array contains only "value". The requirements
+ are ANDed.
type: object
type: object
targetLabels:
- description: TargetLabels transfers labels on the Kubernetes Service onto the target.
+ description: TargetLabels transfers labels from the Kubernetes `Service`
+ onto the created metrics. All labels set in `selector.matchLabels`
+ are automatically transferred.
items:
type: string
type: array
targetLimit:
- description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
+ description: TargetLimit defines a limit on the number of scraped
+ targets that will be accepted.
format: int64
type: integer
required:

View File

@ -140,3 +140,17 @@
app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }}
chart: {{ template "kube-prometheus-stack.chartref" . }}
release: {{ $.Release.Name | quote }}
@@ -98,7 +225,12 @@
{{/* Allow KubeVersion to be overridden. */}}
{{- define "kube-prometheus-stack.ingress.kubeVersion" -}}
- {{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}}
+ {{- $kubeVersion := default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}}
+ {{/* Special use case for Amazon EKS, Google GKE */}}
+ {{- if and (regexMatch "\\d+\\.\\d+\\.\\d+-(?:eks|gke).+" $kubeVersion) (not .Values.kubeVersionOverride) -}}
+ {{- $kubeVersion = regexFind "\\d+\\.\\d+\\.\\d+" $kubeVersion -}}
+ {{- end -}}
+ {{- $kubeVersion -}}
{{- end -}}
{{/* Get Ingress API Version */}}

View File

@ -0,0 +1,36 @@
--- charts-original/templates/exporters/kube-state-metrics/serviceMonitor.yaml
+++ charts/templates/exporters/kube-state-metrics/serviceMonitor.yaml
@@ -10,7 +10,7 @@
spec:
jobLabel: app.kubernetes.io/name
endpoints:
- - port: http
+ - port: metrics
{{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
{{- end }}
@@ -26,6 +26,24 @@
relabelings:
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
{{- end }}
+{{- if .Values.kubeStateMetrics.serviceMonitor.selfMonitor.enabled }}
+ - port: metrics
+ {{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
+ interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
+ {{- end }}
+ {{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }}
+ proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}}
+ {{- end }}
+ honorLabels: true
+{{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }}
+ metricRelabelings:
+{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.metricRelabelings | indent 4) . }}
+{{- end }}
+{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
+ relabelings:
+{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
+{{- end }}
+{{- end }}
{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
namespaceSelector:
matchNames:

View File

@ -33,3 +33,11 @@
{{- if .Values.kubelet.serviceMonitor.interval }}
interval: {{ .Values.kubelet.serviceMonitor.interval }}
{{- end }}
@@ -168,6 +171,6 @@
- {{ .Values.kubelet.namespace }}
selector:
matchLabels:
- app.kubernetes.io/managed-by: prometheus-operator
+ app.kubernetes.io/name: kubelet
k8s-app: kubelet
{{- end}}

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/apiserver.yaml
+++ charts/templates/grafana/dashboards-1.14/apiserver.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/cluster-total.yaml
+++ charts/templates/grafana/dashboards-1.14/cluster-total.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,6 +1,10 @@
--- charts-original/templates/grafana/dashboards-1.14/controller-manager.yaml
+++ charts/templates/grafana/dashboards-1.14/controller-manager.yaml
@@ -4,11 +4,12 @@
@@ -1,14 +1,15 @@
{{- /*
-Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
@ -28,33 +32,45 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -176,7 +181,7 @@
@@ -176,10 +181,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -282,7 +287,7 @@
- "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"refId": "A"
}
],
@@ -282,10 +287,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
@@ -388,7 +393,7 @@
- "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"refId": "A"
}
],
@@ -388,10 +393,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
- "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
"refId": "A"
}
],
@@ -494,28 +499,28 @@
"steppedLine": false,
"targets": [

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,138 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -247,7 +247,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -499,7 +499,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -594,7 +594,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}namespace{{`}}`}}",
@@ -885,7 +885,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -903,7 +903,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -912,7 +912,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -921,7 +921,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1321,7 +1321,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1330,7 +1330,7 @@
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1339,7 +1339,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1348,7 +1348,7 @@
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2149,7 +2149,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -2235,7 +2235,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -2333,7 +2333,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -2419,7 +2419,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,147 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -78,7 +78,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -162,7 +162,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -446,7 +446,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
@@ -697,7 +697,7 @@
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -706,7 +706,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -715,7 +715,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -724,7 +724,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -733,7 +733,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1171,7 +1171,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1180,7 +1180,7 @@
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1189,7 +1189,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1198,7 +1198,7 @@
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1842,7 +1842,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1928,7 +1928,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -2026,7 +2026,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -2112,7 +2112,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-node.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-node.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,93 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -77,7 +77,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
@@ -312,7 +312,7 @@
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -321,7 +321,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -330,7 +330,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -339,7 +339,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -348,7 +348,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -749,7 +749,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -758,7 +758,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -767,7 +767,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -776,7 +776,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,129 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -94,7 +94,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}container{{`}}`}}",
@@ -450,7 +450,7 @@
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -459,7 +459,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -468,7 +468,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -477,7 +477,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -486,7 +486,7 @@
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -922,7 +922,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -931,7 +931,7 @@
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -940,7 +940,7 @@
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -949,7 +949,7 @@
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1294,7 +1294,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1381,7 +1381,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1480,7 +1480,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1567,7 +1567,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,75 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -77,7 +77,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
@@ -312,7 +312,7 @@
],
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -330,7 +330,7 @@
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -348,7 +348,7 @@
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1520,7 +1520,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1606,7 +1606,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1704,7 +1704,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1790,7 +1790,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,75 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -98,7 +98,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}",
@@ -396,7 +396,7 @@
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -414,7 +414,7 @@
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -432,7 +432,7 @@
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -1707,7 +1707,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1793,7 +1793,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1891,7 +1891,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -1977,7 +1977,7 @@
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/namespace-by-pod.yaml
+++ charts/templates/grafana/dashboards-1.14/namespace-by-pod.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/namespace-by-workload.yaml
+++ charts/templates/grafana/dashboards-1.14/namespace-by-workload.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml
+++ charts/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/node-rsrc-use.yaml
+++ charts/templates/grafana/dashboards-1.14/node-rsrc-use.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/nodes.yaml
+++ charts/templates/grafana/dashboards-1.14/nodes.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml
+++ charts/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/pod-total.yaml
+++ charts/templates/grafana/dashboards-1.14/pod-total.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml
+++ charts/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/prometheus.yaml
+++ charts/templates/grafana/dashboards-1.14/prometheus.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap
@ -9,3 +16,18 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -574,6 +574,14 @@
"steppedLine": false,
"targets": [
{
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "exceeded body size limit: {{`{{`}}job{{`}}`}}",
+ "legendLink": null,
+ "step": 10
+ },
+ {
"expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))",
"format": "time_series",
"intervalFactor": 2,

View File

@ -1,6 +1,10 @@
--- charts-original/templates/grafana/dashboards-1.14/proxy.yaml
+++ charts/templates/grafana/dashboards-1.14/proxy.yaml
@@ -4,11 +4,12 @@
@@ -1,14 +1,15 @@
{{- /*
-Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}

View File

@ -1,6 +1,10 @@
--- charts-original/templates/grafana/dashboards-1.14/scheduler.yaml
+++ charts/templates/grafana/dashboards-1.14/scheduler.yaml
@@ -4,11 +4,12 @@
@@ -1,14 +1,15 @@
{{- /*
-Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
@ -28,72 +32,86 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -176,28 +181,28 @@
@@ -176,31 +181,31 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
- "legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A"
},
{
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding",
- "legendFormat": "{{`{{`}}instance{{`}}`}} binding",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B"
},
{
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
- "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C"
},
{
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume",
@@ -290,28 +295,28 @@
- "legendFormat": "{{`{{`}}instance{{`}}`}} volume",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
"refId": "D"
}
],
@@ -290,31 +295,31 @@
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
- "legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} binding",
- "legendFormat": "{{`{{`}}instance{{`}}`}} binding",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
"refId": "B"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
- "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
"refId": "C"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}} volume",
- "legendFormat": "{{`{{`}}instance{{`}}`}} volume",
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
"refId": "D"
}
],
@@ -417,28 +422,28 @@
"steppedLine": false,
"targets": [

View File

@ -1,5 +1,12 @@
--- charts-original/templates/grafana/dashboards-1.14/workload-total.yaml
+++ charts/templates/grafana/dashboards-1.14/workload-total.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
+Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -8,7 +8,7 @@
apiVersion: v1
kind: ConfigMap

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:

View File

@ -1,5 +1,11 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: batch/v1
kind: Job
metadata:
@@ -32,9 +32,9 @@
containers:
- name: create

View File

@ -1,5 +1,11 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: batch/v1
kind: Job
metadata:
@@ -32,9 +32,9 @@
containers:
- name: patch

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
apiVersion: v1
kind: ServiceAccount
metadata:

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }}
apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
metadata:

View File

@ -0,0 +1,8 @@
--- charts-original/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml
+++ charts/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled }}
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }}
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:

View File

@ -24,7 +24,19 @@
{{- end }}
- --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }}
@@ -130,16 +130,16 @@
@@ -78,6 +78,11 @@
{{- if .Values.prometheusOperator.prometheusInstanceNamespaces }}
- --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }}
{{- end }}
+ {{- if .Values.prometheusOperator.thanosImage.sha }}
+ - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
+ {{- else }}
+ - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
+ {{- end }}
{{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }}
- --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }}
{{- end }}
@@ -130,16 +135,16 @@
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
{{- end }}

View File

@ -34,7 +34,26 @@
{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }}
@@ -266,7 +268,7 @@
@@ -254,11 +256,17 @@
name: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.name }}
key: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.key }}
{{- end }}
-{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }}
+{{- if or .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }}
additionalAlertManagerConfigs:
+{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }}
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-confg
key: additional-alertmanager-configs.yaml
{{- end }}
+{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }}
+ name: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.name }}
+ key: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.key }}
+{{- end }}
+{{- end }}
{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }}
additionalAlertRelabelConfigs:
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-relabel-confg
@@ -266,7 +274,7 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.containers }}
containers:
@ -43,7 +62,7 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.initContainers }}
initContainers:
@@ -282,6 +284,7 @@
@@ -282,6 +290,7 @@
{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
{{- end }}
@ -51,7 +70,7 @@
portName: {{ .Values.prometheus.prometheusSpec.portName }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.volumes }}
@@ -326,3 +329,4 @@
@@ -326,3 +335,4 @@
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
{{- end }}

View File

@ -0,0 +1,176 @@
--- charts-original/templates/prometheus/rules-1.14/alertmanager.rules.yaml
+++ charts/templates/prometheus/rules-1.14/alertmanager.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/alertmanager-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -26,41 +26,146 @@
groups:
- name: alertmanager.rules
rules:
- - alert: AlertmanagerConfigInconsistent
+ - alert: AlertmanagerFailedReload
+ annotations:
+ description: Configuration has failed to load for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerfailedreload
+ summary: Reloading an Alertmanager configuration has failed.
+ expr: |-
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) == 0
+ for: 10m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: AlertmanagerMembersInconsistent
+ annotations:
+ description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} has only found {{`{{`}} $value {{`}}`}} members of the {{`{{`}}$labels.job{{`}}`}} cluster.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagermembersinconsistent
+ summary: A member of an Alertmanager cluster has not found all other cluster members.
+ expr: |-
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
+ < on (namespace,service) group_left
+ count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]))
+ for: 15m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: AlertmanagerFailedToSendAlerts
+ annotations:
+ description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} failed to send {{`{{`}} $value | humanizePercentage {{`}}`}} of notifications to {{`{{`}} $labels.integration {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerfailedtosendalerts
+ summary: An Alertmanager instance failed to send notifications.
+ expr: |-
+ (
+ rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
+ /
+ rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: warning
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: AlertmanagerClusterFailedToSendAlerts
annotations:
- message: 'The configuration of the instances of the Alertmanager cluster `{{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.service {{`}}`}}` are out of sync.
-
- {{`{{`}} range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query {{`}}`}}
-
- Configuration hash for pod {{`{{`}} .Labels.pod {{`}}`}} is "{{`{{`}} printf "%.f" .Value {{`}}`}}"
-
- {{`{{`}} end {{`}}`}}
-
- '
- expr: count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})) != 1
+ description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterfailedtosendalerts
+ summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
+ expr: |-
+ min by (namespace,service, integration) (
+ rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m])
+ /
+ rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m])
+ )
+ > 0.01
for: 5m
labels:
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- - alert: AlertmanagerFailedReload
+ - alert: AlertmanagerClusterFailedToSendAlerts
annotations:
- message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
- expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
- for: 10m
+ description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterfailedtosendalerts
+ summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
+ expr: |-
+ min by (namespace,service, integration) (
+ rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m])
+ /
+ rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m])
+ )
+ > 0.01
+ for: 5m
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- - alert: AlertmanagerMembersInconsistent
+ - alert: AlertmanagerConfigInconsistent
+ annotations:
+ description: Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have different configurations.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerconfiginconsistent
+ summary: Alertmanager instances within the same cluster have different configurations.
+ expr: |-
+ count by (namespace,service) (
+ count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
+ )
+ != 1
+ for: 20m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: AlertmanagerClusterDown
+ annotations:
+ description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have been up for less than half of the last 5m.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterdown
+ summary: Half or more of the Alertmanager instances within the same cluster are down.
+ expr: |-
+ (
+ count by (namespace,service) (
+ avg_over_time(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) < 0.5
+ )
+ /
+ count by (namespace,service) (
+ up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
+ )
+ )
+ >= 0.5
+ for: 5m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: AlertmanagerClusterCrashlooping
annotations:
- message: Alertmanager has not found all other members of the cluster.
+ description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have restarted at least 5 times in the last 10m.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclustercrashlooping
+ summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
expr: |-
- alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
- != on (service) GROUP_LEFT()
- count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
+ (
+ count by (namespace,service) (
+ changes(process_start_time_seconds{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[10m]) > 4
+ )
+ /
+ count by (namespace,service) (
+ up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
+ )
+ )
+ >= 0.5
for: 5m
labels:
severity: critical

View File

@ -0,0 +1,38 @@
--- charts-original/templates/prometheus/rules-1.14/general.rules.yaml
+++ charts/templates/prometheus/rules-1.14/general.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -26,7 +26,9 @@
rules:
- alert: TargetDown
annotations:
- message: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
+ description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-targetdown
+ summary: One or more targets are unreachable.
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
for: 10m
labels:
@@ -36,7 +38,7 @@
{{- end }}
- alert: Watchdog
annotations:
- message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
+ description: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
@@ -47,6 +49,8 @@
"DeadMansSnitch" integration in PagerDuty.
'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-watchdog
+ summary: An alert that should always be firing to certify that Alertmanager is working properly.
expr: vector(1)
labels:
severity: none

View File

@ -1,15 +1,23 @@
--- charts-original/templates/prometheus/rules-1.14/k8s.rules.yaml
+++ charts/templates/prometheus/rules-1.14/k8s.rules.yaml
@@ -26,31 +26,31 @@
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -26,36 +26,42 @@
rules:
- expr: |-
sum by (cluster, namespace, pod, container) (
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
+ irate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
+ record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
- expr: |-
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
+ container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}
@ -37,3 +45,68 @@
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info{node!=""})
)
record: node_namespace_pod_container:container_memory_swap
- expr: |-
+ kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
+ group_left() max by (namespace, pod) (
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
+ )
+ record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
+ - expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
max by (namespace, pod, container, cluster) (
@@ -67,6 +73,12 @@
)
record: namespace_memory:kube_pod_container_resource_requests:sum
- expr: |-
+ kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
+ group_left() max by (namespace, pod) (
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
+ )
+ record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
+ - expr: |-
sum by (namespace, cluster) (
sum by (namespace, pod, cluster) (
max by (namespace, pod, container, cluster) (
@@ -78,6 +90,40 @@
)
record: namespace_cpu:kube_pod_container_resource_requests:sum
- expr: |-
+ kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
+ group_left() max by (namespace, pod) (
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
+ )
+ record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
+ - expr: |-
+ sum by (namespace, cluster) (
+ sum by (namespace, pod, cluster) (
+ max by (namespace, pod, container, cluster) (
+ kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}
+ ) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
+ kube_pod_status_phase{phase=~"Pending|Running"} == 1
+ )
+ )
+ )
+ record: namespace_memory:kube_pod_container_resource_limits:sum
+ - expr: |-
+ kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
+ group_left() max by (namespace, pod) (
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
+ )
+ record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
+ - expr: |-
+ sum by (namespace, cluster) (
+ sum by (namespace, pod, cluster) (
+ max by (namespace, pod, container, cluster) (
+ kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}
+ ) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
+ kube_pod_status_phase{phase=~"Pending|Running"} == 1
+ )
+ )
+ )
+ record: namespace_cpu:kube_pod_container_resource_limits:sum
+ - expr: |-
max by (cluster, namespace, workload, pod) (
label_replace(
label_replace(

View File

@ -0,0 +1,187 @@
--- charts-original/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml
+++ charts/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -25,60 +25,70 @@
- interval: 3m
name: kube-apiserver-availability.rules
rules:
+ - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
+ record: code_verb:apiserver_request_total:increase30d
+ - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
+ labels:
+ verb: read
+ record: code:apiserver_request_total:increase30d
+ - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
+ labels:
+ verb: write
+ record: code:apiserver_request_total:increase30d
- expr: |-
1 - (
(
# write too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
-
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
) +
(
# read too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
-
(
(
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="1"}[30d]))
or
vector(0)
)
+
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="5"}[30d]))
+
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="40"}[30d]))
)
) +
# errors
- sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
+ sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
)
/
- sum(code:apiserver_request_total:increase30d)
+ sum by (cluster) (code:apiserver_request_total:increase30d)
labels:
verb: all
record: apiserver_request:availability30d
- expr: |-
1 - (
- sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
-
(
# too slow
(
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[30d]))
or
vector(0)
)
+
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[30d]))
+
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[30d]))
)
+
# errors
- sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
)
/
- sum(code:apiserver_request_total:increase30d{verb="read"})
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"})
labels:
verb: read
record: apiserver_request:availability30d
@@ -86,75 +96,33 @@
1 - (
(
# too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
-
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
)
+
# errors
- sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
)
/
- sum(code:apiserver_request_total:increase30d{verb="write"})
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"})
labels:
verb: write
record: apiserver_request:availability30d
- - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
- record: code_verb:apiserver_request_total:increase30d
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h]))
- record: code_verb:apiserver_request_total:increase1h
- - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
- record: code:apiserver_request_total:increase30d
- - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
+ record: code_resource:apiserver_request_total:rate5m
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
- record: code:apiserver_request_total:increase30d
+ record: code_resource:apiserver_request_total:rate5m
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h]))
+ record: code_verb:apiserver_request_total:increase1h
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h]))
+ record: code_verb:apiserver_request_total:increase1h
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h]))
+ record: code_verb:apiserver_request_total:increase1h
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
+ record: code_verb:apiserver_request_total:increase1h
{{- end }}
\ No newline at end of file

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml
+++ charts/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -0,0 +1,401 @@
--- charts-original/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml
+++ charts/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -28,26 +28,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
labels:
verb: read
record: apiserver_request:burnrate1d
@@ -55,26 +55,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
labels:
verb: read
record: apiserver_request:burnrate1h
@@ -82,26 +82,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
labels:
verb: read
record: apiserver_request:burnrate2h
@@ -109,26 +109,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
labels:
verb: read
record: apiserver_request:burnrate30m
@@ -136,26 +136,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
labels:
verb: read
record: apiserver_request:burnrate3d
@@ -163,26 +163,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
record: apiserver_request:burnrate5m
@@ -190,26 +190,26 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
-
(
(
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h]))
or
vector(0)
)
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h]))
+
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
)
)
+
# errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
labels:
verb: read
record: apiserver_request:burnrate6h
@@ -217,15 +217,15 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
labels:
verb: write
record: apiserver_request:burnrate1d
@@ -233,15 +233,15 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
labels:
verb: write
record: apiserver_request:burnrate1h
@@ -249,15 +249,15 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
labels:
verb: write
record: apiserver_request:burnrate2h
@@ -265,15 +265,15 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
labels:
verb: write
record: apiserver_request:burnrate30m
@@ -281,15 +281,15 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
labels:
verb: write
record: apiserver_request:burnrate3d
@@ -297,15 +297,15 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
record: apiserver_request:burnrate5m
@@ -313,32 +313,32 @@
(
(
# too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
-
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
)
+
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
)
/
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
labels:
verb: write
record: apiserver_request:burnrate6h
- - expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
record: code_resource:apiserver_request_total:rate5m
- - expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
record: code_resource:apiserver_request_total:rate5m
- - expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
+ - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
labels:
quantile: '0.99'
verb: read
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
+ - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
labels:
quantile: '0.99'
verb: write

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml
+++ charts/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml
+++ charts/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -1,6 +1,10 @@
--- charts-original/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
+++ charts/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
@@ -4,7 +4,8 @@
@@ -1,10 +1,11 @@
{{- /*
-Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}

View File

@ -0,0 +1,43 @@
--- charts-original/templates/prometheus/rules-1.14/kube-state-metrics.yaml
+++ charts/templates/prometheus/rules-1.14/kube-state-metrics.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-state-metrics-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -56,4 +56,32 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
+ - alert: KubeStateMetricsShardingMismatch
+ annotations:
+ description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsshardingmismatch
+ summary: kube-state-metrics sharding is misconfigured.
+ expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
+ for: 15m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: KubeStateMetricsShardsMissing
+ annotations:
+ description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsshardsmissing
+ summary: kube-state-metrics shards are missing.
+ expr: |-
+ 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
+ -
+ sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
+ != 0
+ for: 15m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
{{- end }}
\ No newline at end of file

View File

@ -1,6 +1,10 @@
--- charts-original/templates/prometheus/rules-1.14/kubelet.rules.yaml
+++ charts/templates/prometheus/rules-1.14/kubelet.rules.yaml
@@ -4,7 +4,8 @@
@@ -1,10 +1,11 @@
{{- /*
-Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}

View File

@ -0,0 +1,79 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-apps.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-apps.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -30,7 +30,10 @@
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
summary: Pod is crash looping.
- expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) * 60 * 5 > 0
+ expr: |-
+ increase(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) > 0
+ and
+ kube_pod_container_status_waiting{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} == 1
for: 15m
labels:
severity: warning
@@ -79,7 +82,7 @@
expr: |-
(
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
- !=
+ >
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
) and (
changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m])
@@ -257,23 +260,23 @@
{{- end }}
- alert: KubeHpaReplicasMismatch
annotations:
- description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.hpa {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
+ description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpareplicasmismatch
summary: HPA has not matched descired number of replicas.
expr: |-
- (kube_hpa_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
!=
- kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
+ kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
and
- (kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
>
- kube_hpa_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
+ kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
and
- (kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
<
- kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
+ kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
and
- changes(kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
+ changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
for: 15m
labels:
severity: warning
@@ -282,13 +285,13 @@
{{- end }}
- alert: KubeHpaMaxedOut
annotations:
- description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.hpa {{`}}`}} has been running at max replicas for longer than 15 minutes.
+ description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpamaxedout
summary: HPA is running at max replicas
expr: |-
- kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
==
- kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
+ kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
labels:
severity: warning

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-resources.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-resources.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -1,6 +1,13 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml
@@ -31,9 +31,9 @@
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -31,10 +31,12 @@
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
summary: PersistentVolume is filling up.
expr: |-
@ -10,9 +17,12 @@
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
< 0.03
+ and
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
for: 1m
labels:
@@ -48,12 +48,12 @@
severity: critical
@@ -48,12 +50,14 @@
summary: PersistentVolume is filling up.
expr: |-
(
@ -24,6 +34,8 @@
) < 0.15
and
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
+ and
+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
for: 1h
labels:

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -1,6 +1,10 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
@@ -4,7 +4,8 @@
@@ -1,10 +1,11 @@
{{- /*
-Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}

View File

@ -1,5 +1,12 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -96,7 +96,7 @@
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletpodstartuplatencyhigh

View File

@ -1,6 +1,10 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
@@ -4,7 +4,8 @@
@@ -1,10 +1,11 @@
{{- /*
-Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system.yaml
+++ charts/templates/prometheus/rules-1.14/kubernetes-system.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -0,0 +1,67 @@
--- charts-original/templates/prometheus/rules-1.14/node-exporter.rules.yaml
+++ charts/templates/prometheus/rules-1.14/node-exporter.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml
+Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -33,9 +33,9 @@
record: instance:node_num_cpu:sum
- expr: |-
1 - avg without (cpu, mode) (
- rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
+ rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[5m])
)
- record: instance:node_cpu_utilisation:rate1m
+ record: instance:node_cpu_utilisation:rate5m
- expr: |-
(
node_load1{job="node-exporter"}
@@ -50,30 +50,30 @@
node_memory_MemTotal_bytes{job="node-exporter"}
)
record: instance:node_memory_utilisation:ratio
- - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
- record: instance:node_vmstat_pgmajfault:rate1m
- - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
- record: instance_device:node_disk_io_time_seconds:rate1m
- - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
- record: instance_device:node_disk_io_time_weighted_seconds:rate1m
+ - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
+ record: instance:node_vmstat_pgmajfault:rate5m
+ - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
+ record: instance_device:node_disk_io_time_seconds:rate5m
+ - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
+ record: instance_device:node_disk_io_time_weighted_seconds:rate5m
- expr: |-
sum without (device) (
- rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
+ rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
)
- record: instance:node_network_receive_bytes_excluding_lo:rate1m
+ record: instance:node_network_receive_bytes_excluding_lo:rate5m
- expr: |-
sum without (device) (
- rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
+ rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
)
- record: instance:node_network_transmit_bytes_excluding_lo:rate1m
+ record: instance:node_network_transmit_bytes_excluding_lo:rate5m
- expr: |-
sum without (device) (
- rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
+ rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
)
- record: instance:node_network_receive_drop_excluding_lo:rate1m
+ record: instance:node_network_receive_drop_excluding_lo:rate5m
- expr: |-
sum without (device) (
- rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
+ rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
)
- record: instance:node_network_transmit_drop_excluding_lo:rate1m
+ record: instance:node_network_transmit_drop_excluding_lo:rate5m
{{- end }}
\ No newline at end of file

View File

@ -0,0 +1,178 @@
--- charts-original/templates/prometheus/rules-1.14/node-exporter.yaml
+++ charts/templates/prometheus/rules-1.14/node-exporter.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml
+Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -27,6 +27,7 @@
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |-
(
@@ -45,6 +46,7 @@
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |-
(
@@ -63,6 +65,7 @@
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
summary: Filesystem has less than 5% space left.
expr: |-
(
@@ -79,6 +82,7 @@
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
summary: Filesystem has less than 3% space left.
expr: |-
(
@@ -95,6 +99,7 @@
- alert: NodeFilesystemFilesFillingUp
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: |-
(
@@ -113,6 +118,7 @@
- alert: NodeFilesystemFilesFillingUp
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: |-
(
@@ -131,6 +137,7 @@
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
summary: Filesystem has less than 5% inodes left.
expr: |-
(
@@ -147,6 +154,7 @@
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
summary: Filesystem has less than 3% inodes left.
expr: |-
(
@@ -163,6 +171,7 @@
- alert: NodeNetworkReceiveErrs
annotations:
description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
for: 1h
@@ -174,6 +183,7 @@
- alert: NodeNetworkTransmitErrs
annotations:
description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
for: 1h
@@ -185,6 +195,7 @@
- alert: NodeHighNumberConntrackEntriesUsed
annotations:
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit.
expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
labels:
@@ -195,6 +206,7 @@
- alert: NodeTextFileCollectorScrapeError
annotations:
description: Node Exporter text file collector failed to scrape.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape.
expr: node_textfile_scrape_error{job="node-exporter"} == 1
labels:
@@ -204,7 +216,8 @@
{{- end }}
- alert: NodeClockSkewDetected
annotations:
- message: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+ description: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclockskewdetected
summary: Clock skew detected.
expr: |-
(
@@ -226,7 +239,8 @@
{{- end }}
- alert: NodeClockNotSynchronising
annotations:
- message: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.
+ description: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclocknotsynchronising
summary: Clock not synchronising.
expr: |-
min_over_time(node_timex_sync_status[5m]) == 0
@@ -241,6 +255,7 @@
- alert: NodeRAIDDegraded
annotations:
description: RAID array '{{`{{`}} $labels.device {{`}}`}}' on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddegraded
summary: RAID Array is degraded
expr: node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
for: 15m
@@ -252,11 +267,42 @@
- alert: NodeRAIDDiskFailure
annotations:
description: At least one device in RAID array on {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddiskfailure
summary: Failed device in RAID array
- expr: node_md_disks{state="fail"} > 0
+ expr: node_md_disks{state="failed"} > 0
labels:
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
+ - alert: NodeFileDescriptorLimit
+ annotations:
+ description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefiledescriptorlimit
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
+ expr: |-
+ (
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
+ )
+ for: 15m
+ labels:
+ severity: warning
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: NodeFileDescriptorLimit
+ annotations:
+ description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefiledescriptorlimit
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
+ expr: |-
+ (
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
+ )
+ for: 15m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
{{- end }}
\ No newline at end of file

View File

@ -0,0 +1,20 @@
--- charts-original/templates/prometheus/rules-1.14/node-network.yaml
+++ charts/templates/prometheus/rules-1.14/node-network.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -26,7 +26,9 @@
rules:
- alert: NodeNetworkInterfaceFlapping
annotations:
- message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
+ description: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkinterfaceflapping
+ summary: Network interface is often changin it's status
expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m
labels:

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/node.rules.yaml
+++ charts/templates/prometheus/rules-1.14/node.rules.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
+Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -0,0 +1,9 @@
--- charts-original/templates/prometheus/rules-1.14/prometheus-operator.yaml
+++ charts/templates/prometheus/rules-1.14/prometheus-operator.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-operator-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}

View File

@ -0,0 +1,231 @@
--- charts-original/templates/prometheus/rules-1.14/prometheus.yaml
+++ charts/templates/prometheus/rules-1.14/prometheus.yaml
@@ -1,5 +1,5 @@
{{- /*
-Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
+Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
@@ -29,6 +29,7 @@
- alert: PrometheusBadConfig
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusbadconfig
summary: Failed Prometheus configuration reload.
expr: |-
# Without max_over_time, failed scrapes could create false negatives, see
@@ -43,6 +44,7 @@
- alert: PrometheusNotificationQueueRunningFull
annotations:
description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotificationqueuerunningfull
summary: Prometheus alert notification queue predicted to run full in less than 30m.
expr: |-
# Without min_over_time, failed scrapes could create false negatives, see
@@ -61,6 +63,7 @@
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
annotations:
description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuserrorsendingalertstosomealertmanagers
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
expr: |-
(
@@ -76,27 +79,10 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
- annotations:
- description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
- summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
- expr: |-
- min without(alertmanager) (
- rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
- /
- rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
- )
- * 100
- > 3
- for: 15m
- labels:
- severity: critical
-{{- if .Values.defaultRules.additionalRuleLabels }}
-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
-{{- end }}
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotconnectedtoalertmanagers
summary: Prometheus is not connected to any Alertmanagers.
expr: |-
# Without max_over_time, failed scrapes could create false negatives, see
@@ -111,6 +97,7 @@
- alert: PrometheusTSDBReloadsFailing
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustsdbreloadsfailing
summary: Prometheus has issues reloading blocks from disk.
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
for: 4h
@@ -122,6 +109,7 @@
- alert: PrometheusTSDBCompactionsFailing
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustsdbcompactionsfailing
summary: Prometheus has issues compacting blocks.
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
for: 4h
@@ -133,8 +121,18 @@
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotingestingsamples
summary: Prometheus is not ingesting samples.
- expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
+ expr: |-
+ (
+ rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
+ and
+ (
+ sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0
+ or
+ sum without(rule_group) (prometheus_rule_group_rules{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0
+ )
+ )
for: 10m
labels:
severity: warning
@@ -144,6 +142,7 @@
- alert: PrometheusDuplicateTimestamps
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusduplicatetimestamps
summary: Prometheus is dropping samples with duplicate timestamps.
expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m
@@ -155,6 +154,7 @@
- alert: PrometheusOutOfOrderTimestamps
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoutofordertimestamps
summary: Prometheus drops samples with out-of-order timestamps.
expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m
@@ -166,15 +166,16 @@
- alert: PrometheusRemoteStorageFailures
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotestoragefailures
summary: Prometheus fails to send samples to remote storage.
expr: |-
(
- rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
/
(
- rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
+
- rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
+ (rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
)
)
* 100
@@ -188,13 +189,14 @@
- alert: PrometheusRemoteWriteBehind
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotewritebehind
summary: Prometheus remote write is behind.
expr: |-
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
- - on(job, instance) group_right
+ - ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
)
> 120
@@ -207,6 +209,7 @@
- alert: PrometheusRemoteWriteDesiredShards
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotewritedesiredshards
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
expr: |-
# Without max_over_time, failed scrapes could create false negatives, see
@@ -225,6 +228,7 @@
- alert: PrometheusRuleFailures
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusrulefailures
summary: Prometheus is failing rule evaluations.
expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m
@@ -236,6 +240,7 @@
- alert: PrometheusMissingRuleEvaluations
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusmissingruleevaluations
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m
@@ -247,6 +252,7 @@
- alert: PrometheusTargetLimitHit
annotations:
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustargetlimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 15m
@@ -255,4 +261,47 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
+ - alert: PrometheusLabelLimitHit
+ annotations:
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuslabellimithit
+ summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
+ expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: PrometheusTargetSyncFailure
+ annotations:
+ description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustargetsyncfailure
+ summary: Prometheus has failed to sync targets.
+ expr: increase(prometheus_target_sync_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[30m]) > 0
+ for: 5m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
+ - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+ annotations:
+ description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuserrorsendingalertstoanyalertmanager
+ summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+ expr: |-
+ min without (alertmanager) (
+ rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m])
+ )
+ * 100
+ > 3
+ for: 15m
+ labels:
+ severity: critical
+{{- if .Values.defaultRules.additionalRuleLabels }}
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
+{{- end }}
{{- end }}
\ No newline at end of file

View File

@ -756,7 +756,18 @@
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
##
@@ -1266,6 +1857,13 @@
@@ -1258,6 +1849,10 @@
# replacement: $1
# action: replace
+ # Enable self metrics configuration for Service Monitor
+ selfMonitor:
+ enabled: false
+
## Configuration for kube-state-metrics subchart
##
kube-state-metrics:
@@ -1266,6 +1861,13 @@
create: true
podSecurityPolicy:
enabled: true
@ -770,7 +781,7 @@
## Deploy node exporter as a daemonset to all nodes
##
@@ -1319,6 +1917,16 @@
@@ -1319,6 +1921,16 @@
extraArgs:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@ -787,7 +798,7 @@
## Manages Prometheus and Alertmanager components
##
@@ -1331,8 +1939,8 @@
@@ -1331,8 +1943,8 @@
enabled: true
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
tlsMinVersion: VersionTLS13
@ -798,7 +809,7 @@
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
## rules from making their way into prometheus and potentially preventing the container from starting
@@ -1349,7 +1957,7 @@
@@ -1349,7 +1961,7 @@
patch:
enabled: true
image:
@ -807,7 +818,7 @@
tag: v1.5.2
sha: ""
pullPolicy: IfNotPresent
@@ -1498,13 +2106,13 @@
@@ -1498,13 +2110,13 @@
## Resource limits & requests
##
@ -828,25 +839,43 @@
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
@@ -1557,7 +2165,7 @@
@@ -1557,8 +2169,8 @@
## Prometheus-operator image
##
image:
- repository: quay.io/prometheus-operator/prometheus-operator
- tag: v0.48.0
+ repository: rancher/mirrored-prometheus-operator-prometheus-operator
tag: v0.48.0
+ tag: v0.49.0
sha: ""
pullPolicy: IfNotPresent
@@ -1573,7 +2181,7 @@
@@ -1573,8 +2185,8 @@
## Prometheus-config-reloader image to use for config and rule reloading
##
prometheusConfigReloaderImage:
- repository: quay.io/prometheus-operator/prometheus-config-reloader
- tag: v0.48.0
+ repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader
tag: v0.48.0
+ tag: v0.49.0
sha: ""
@@ -1659,7 +2267,7 @@
## Set the prometheus config reloader side-car CPU limit
@@ -1585,6 +2197,13 @@
##
configReloaderMemory: 50Mi
+ ## Thanos side-car image when configured
+ ##
+ thanosImage:
+ repository: quay.io/thanos/thanos
+ tag: v0.17.2
+ sha: ""
+
## Set a Field Selector to filter watched secrets
##
secretFieldSelector: ""
@@ -1659,7 +2278,7 @@
port: 9090
## To be used with a proxy extraContainer port
@ -855,16 +884,18 @@
## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
@@ -1916,7 +2524,7 @@
@@ -1916,8 +2535,8 @@
## Image of Prometheus.
##
image:
- repository: quay.io/prometheus/prometheus
- tag: v2.27.1
+ repository: rancher/mirrored-prometheus-prometheus
tag: v2.27.1
+ tag: v2.28.1
sha: ""
@@ -1979,6 +2587,11 @@
## Tolerations for use with node taints
@@ -1979,6 +2598,11 @@
##
externalUrl: ""
@ -876,7 +907,7 @@
## Define which Nodes the Pods are scheduled on.
## ref: https://kubernetes.io/docs/user-guide/node-selection/
##
@@ -2011,7 +2624,7 @@
@@ -2011,7 +2635,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the PrometheusRule resources created
##
@ -885,7 +916,7 @@
## PrometheusRules to be selected for target discovery.
## If {}, select all PrometheusRules
@@ -2036,7 +2649,7 @@
@@ -2036,7 +2660,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the servicemonitors created
##
@ -894,7 +925,7 @@
## ServiceMonitors to be selected for target discovery.
## If {}, select all ServiceMonitors
@@ -2059,7 +2672,7 @@
@@ -2059,7 +2683,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the podmonitors created
##
@ -903,7 +934,7 @@
## PodMonitors to be selected for target discovery.
## If {}, select all PodMonitors
@@ -2190,9 +2803,13 @@
@@ -2190,9 +2814,13 @@
## Resource limits & requests
##
@ -920,7 +951,7 @@
## Prometheus StorageSpec for persistent data
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
@@ -2215,7 +2832,13 @@
@@ -2215,7 +2843,13 @@
# medium: Memory
# Additional volumes on the output StatefulSet definition.
@ -935,7 +966,22 @@
# Additional VolumeMounts on the output StatefulSet definition.
volumeMounts: []
@@ -2322,9 +2945,34 @@
@@ -2288,6 +2922,14 @@
# services:
# - metrics-prometheus-alertmanager
+ ## If additional alertmanager configurations are already deployed in a single secret, or you want to manage
+ ## them separately from the helm deployment, you can use this section.
+ ## Expected values are the secret name and key
+ ## Cannot be used with additionalAlertManagerConfigs
+ additionalAlertManagerConfigsSecret: {}
+ # name:
+ # key:
+
## AdditionalAlertRelabelConfigs allows specifying Prometheus alert relabel configurations. Alert relabel configurations specified are appended
## to the configurations generated by the Prometheus Operator. Alert relabel configurations specified must have the form as specified in the
## official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alert_relabel_configs.
@@ -2322,9 +2964,34 @@
##
thanos: {}
@ -971,7 +1017,7 @@
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
## (permissions, dir tree) on mounted volumes before starting prometheus
@@ -2332,7 +2980,7 @@
@@ -2332,7 +2999,7 @@
## PortName to use for Prometheus.
##