mirror of https://git.rancher.io/charts
Rebase to ea0e187
parent
60aa84153e
commit
0b9eff2180
|
@ -0,0 +1,610 @@
|
||||||
|
{{- /*
|
||||||
|
Generated from 'alertmanager-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
namespace: {{ template "kube-prometheus-stack.namespace" . }}
|
||||||
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }}
|
||||||
|
annotations:
|
||||||
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
labels:
|
||||||
|
{{- if $.Values.grafana.sidecar.dashboards.label }}
|
||||||
|
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
|
||||||
|
{{- end }}
|
||||||
|
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
|
||||||
|
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
|
||||||
|
data:
|
||||||
|
alertmanager-overview.json: |-
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"__requires": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"gnetId": null,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"hideControls": false,
|
||||||
|
"id": null,
|
||||||
|
"links": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"collapsed": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": false,
|
||||||
|
"sideWidth": null,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"repeat": null,
|
||||||
|
"seriesOverrides": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(alertmanager_alerts{namespace=\"$namespace\",service=\"$service\"}) by (namespace,service,instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Alerts",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": [
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "none",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "none",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": false,
|
||||||
|
"sideWidth": null,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"repeat": null,
|
||||||
|
"seriesOverrides": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(alertmanager_alerts_received_total{namespace=\"$namespace\",service=\"$service\"}[5m])) by (namespace,service,instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} Received",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=\"$namespace\",service=\"$service\"}[5m])) by (namespace,service,instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} Invalid",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Alerts receive rate",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": [
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "ops",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "ops",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": true,
|
||||||
|
"title": "Alerts",
|
||||||
|
"titleSize": "h6",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"collapsed": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": false,
|
||||||
|
"sideWidth": null,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"repeat": "integration",
|
||||||
|
"seriesOverrides": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(alertmanager_notifications_total{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (integration,namespace,service,instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} Total",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(alertmanager_notifications_failed_total{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (integration,namespace,service,instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} Failed",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "$integration: Notifications Send Rate",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": [
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "ops",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "ops",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"fill": 1,
|
||||||
|
"fillGradient": 0,
|
||||||
|
"gridPos": {
|
||||||
|
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": false,
|
||||||
|
"sideWidth": null,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"repeat": "integration",
|
||||||
|
"seriesOverrides": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (le,namespace,service,instance)\n) \n",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} 99th Percentile",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (le,namespace,service,instance)\n) \n",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} Median",
|
||||||
|
"refId": "B"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=\"$namespace\",service=\"$service\", integration=\"$integration\"}[5m])) by (namespace,service,instance)\n",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}instance{{`}}`}} Average",
|
||||||
|
"refId": "C"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "$integration: Notification Duration",
|
||||||
|
"tooltip": {
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": [
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "s",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "s",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": true,
|
||||||
|
"title": "Notifications",
|
||||||
|
"titleSize": "h6",
|
||||||
|
"type": "row"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"alertmanager-mixin"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": {
|
||||||
|
"text": "Prometheus",
|
||||||
|
"value": "Prometheus"
|
||||||
|
},
|
||||||
|
"hide": 0,
|
||||||
|
"label": null,
|
||||||
|
"name": "datasource",
|
||||||
|
"options": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"query": "prometheus",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"type": "datasource"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {
|
||||||
|
"text": "",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "namespace",
|
||||||
|
"options": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"query": "label_values(alertmanager_alerts, namespace)",
|
||||||
|
"refresh": 2,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 1,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {
|
||||||
|
"text": "",
|
||||||
|
"value": ""
|
||||||
|
},
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "service",
|
||||||
|
"options": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"query": "label_values(alertmanager_alerts, service)",
|
||||||
|
"refresh": 2,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 1,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {
|
||||||
|
"text": "all",
|
||||||
|
"value": "$__all"
|
||||||
|
},
|
||||||
|
"datasource": "$datasource",
|
||||||
|
"hide": 2,
|
||||||
|
"includeAll": true,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "integration",
|
||||||
|
"options": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)",
|
||||||
|
"refresh": 2,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 1,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-1h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "utc",
|
||||||
|
"title": "Alertmanager / Overview",
|
||||||
|
"uid": "alertmanager-overview",
|
||||||
|
"version": 0
|
||||||
|
}
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,328 @@
|
||||||
|
{{- /*
|
||||||
|
Generated from 'kube-apiserver-burnrate.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-burnrate.rules" | trunc 63 | trimSuffix "-" }}
|
||||||
|
namespace: {{ template "kube-prometheus-stack.namespace" . }}
|
||||||
|
labels:
|
||||||
|
app: {{ template "kube-prometheus-stack.name" . }}
|
||||||
|
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
|
||||||
|
{{- if .Values.defaultRules.labels }}
|
||||||
|
{{ toYaml .Values.defaultRules.labels | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.defaultRules.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: kube-apiserver-burnrate.rules
|
||||||
|
rules:
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[1d]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[1d]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[1d]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate1d
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[1h]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[1h]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[1h]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate1h
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[2h]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[2h]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[2h]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate2h
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[30m]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[30m]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[30m]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate30m
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[3d]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[3d]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[3d]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate3d
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[5m]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[5m]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[5m]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate5m
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[6h]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[6h]))
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[6h]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate6h
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate1d
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate1h
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate2h
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate30m
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate3d
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate5m
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||||
|
-
|
||||||
|
sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate6h
|
||||||
|
{{- end }}
|
|
@ -0,0 +1,49 @@
|
||||||
|
{{- /*
|
||||||
|
Generated from 'kube-apiserver-histogram.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create }}
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PrometheusRule
|
||||||
|
metadata:
|
||||||
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-histogram.rules" | trunc 63 | trimSuffix "-" }}
|
||||||
|
namespace: {{ template "kube-prometheus-stack.namespace" . }}
|
||||||
|
labels:
|
||||||
|
app: {{ template "kube-prometheus-stack.name" . }}
|
||||||
|
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
|
||||||
|
{{- if .Values.defaultRules.labels }}
|
||||||
|
{{ toYaml .Values.defaultRules.labels | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.defaultRules.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: kube-apiserver-histogram.rules
|
||||||
|
rules:
|
||||||
|
- expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
|
||||||
|
labels:
|
||||||
|
quantile: '0.99'
|
||||||
|
verb: read
|
||||||
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
|
||||||
|
labels:
|
||||||
|
quantile: '0.99'
|
||||||
|
verb: write
|
||||||
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- expr: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||||
|
labels:
|
||||||
|
quantile: '0.99'
|
||||||
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- expr: histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||||
|
labels:
|
||||||
|
quantile: '0.9'
|
||||||
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- expr: histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
|
||||||
|
labels:
|
||||||
|
quantile: '0.5'
|
||||||
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
{{- end }}
|
|
@ -24,8 +24,8 @@
|
||||||
+sources:
|
+sources:
|
||||||
+ - https://github.com/prometheus-community/helm-charts
|
+ - https://github.com/prometheus-community/helm-charts
|
||||||
+ - https://github.com/prometheus-operator/kube-prometheus
|
+ - https://github.com/prometheus-operator/kube-prometheus
|
||||||
+version: 16.6.0
|
+version: 17.0.0
|
||||||
+appVersion: 0.48.0
|
+appVersion: 0.49.0
|
||||||
+kubeVersion: ">=1.16.0-0"
|
+kubeVersion: ">=1.16.0-0"
|
||||||
+home: https://github.com/prometheus-operator/kube-prometheus
|
+home: https://github.com/prometheus-operator/kube-prometheus
|
||||||
+keywords:
|
+keywords:
|
||||||
|
|
|
@ -1,6 +1,28 @@
|
||||||
--- charts-original/README.md
|
--- charts-original/README.md
|
||||||
+++ charts/README.md
|
+++ charts/README.md
|
||||||
@@ -193,7 +193,39 @@
|
@@ -83,6 +83,21 @@
|
||||||
|
|
||||||
|
A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions.
|
||||||
|
|
||||||
|
+### From 16.x to 17.x
|
||||||
|
+Version 15 upgrades prometheus-operator from 0.48.x to 0.49.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating:
|
||||||
|
+
|
||||||
|
+```console
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
|
||||||
|
+kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
|
||||||
|
+```
|
||||||
|
+
|
||||||
|
+
|
||||||
|
### From 15.x to 16.x
|
||||||
|
Version 16 upgrades kube-state-metrics to v2.0.0. This includes changed command-line arguments and removed metrics, see this [blog post](https://kubernetes.io/blog/2021/04/13/kube-state-metrics-v-2-0/). This version also removes Grafana dashboards that supported Kubernetes 1.14 or earlier.
|
||||||
|
|
||||||
|
@@ -193,7 +208,39 @@
|
||||||
helm show values prometheus-community/kube-prometheus-stack
|
helm show values prometheus-community/kube-prometheus-stack
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,460 @@
|
||||||
|
--- charts-original/crds/crd-podmonitors.yaml
|
||||||
|
+++ charts/crds/crd-podmonitors.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
|
||||||
|
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
@@ -25,24 +25,31 @@
|
||||||
|
description: PodMonitor defines monitoring for a set of pods.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
+ description: 'APIVersion defines the versioned schema of this representation
|
||||||
|
+ of an object. Servers should convert recognized schemas to the latest
|
||||||
|
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
+ description: 'Kind is a string value representing the REST resource this
|
||||||
|
+ object represents. Servers may infer this from the endpoint the client
|
||||||
|
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
- description: Specification of desired Pod selection for target discovery by Prometheus.
|
||||||
|
+ description: Specification of desired Pod selection for target discovery
|
||||||
|
+ by Prometheus.
|
||||||
|
properties:
|
||||||
|
jobLabel:
|
||||||
|
description: The label to use to retrieve the job name from.
|
||||||
|
type: string
|
||||||
|
namespaceSelector:
|
||||||
|
- description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||||
|
+ description: Selector to select which namespaces the Endpoints objects
|
||||||
|
+ are discovered from.
|
||||||
|
properties:
|
||||||
|
any:
|
||||||
|
- description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||||
|
+ description: Boolean describing whether all namespaces are selected
|
||||||
|
+ in contrast to a list restricting them.
|
||||||
|
type: boolean
|
||||||
|
matchNames:
|
||||||
|
description: List of namespace names.
|
||||||
|
@@ -53,94 +60,126 @@
|
||||||
|
podMetricsEndpoints:
|
||||||
|
description: A list of endpoints allowed as part of this PodMonitor.
|
||||||
|
items:
|
||||||
|
- description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics.
|
||||||
|
+ description: PodMetricsEndpoint defines a scrapeable endpoint of
|
||||||
|
+ a Kubernetes Pod serving Prometheus metrics.
|
||||||
|
properties:
|
||||||
|
basicAuth:
|
||||||
|
- description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||||
|
+ description: 'BasicAuth allow an endpoint to authenticate over
|
||||||
|
+ basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||||
|
properties:
|
||||||
|
password:
|
||||||
|
- description: The secret in the service monitor namespace that contains the password for authentication.
|
||||||
|
+ description: The secret in the service monitor namespace
|
||||||
|
+ that contains the password for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
username:
|
||||||
|
- description: The secret in the service monitor namespace that contains the username for authentication.
|
||||||
|
+ description: The secret in the service monitor namespace
|
||||||
|
+ that contains the username for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
bearerTokenSecret:
|
||||||
|
- description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator.
|
||||||
|
+ description: Secret to mount to read bearer token for scraping
|
||||||
|
+ targets. The secret needs to be in the same namespace as the
|
||||||
|
+ pod monitor and accessible by the Prometheus Operator.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
honorLabels:
|
||||||
|
- description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||||
|
+ description: HonorLabels chooses the metric's labels on collisions
|
||||||
|
+ with target labels.
|
||||||
|
type: boolean
|
||||||
|
honorTimestamps:
|
||||||
|
- description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||||
|
+ description: HonorTimestamps controls whether Prometheus respects
|
||||||
|
+ the timestamps present in scraped data.
|
||||||
|
type: boolean
|
||||||
|
interval:
|
||||||
|
description: Interval at which metrics should be scraped
|
||||||
|
type: string
|
||||||
|
metricRelabelings:
|
||||||
|
- description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||||
|
+ description: MetricRelabelConfigs to apply to samples before
|
||||||
|
+ ingestion.
|
||||||
|
items:
|
||||||
|
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
+ description: 'RelabelConfig allows dynamic rewriting of the
|
||||||
|
+ label set, being applied to samples before ingestion. It
|
||||||
|
+ defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
- description: Action to perform based on regex matching. Default is 'replace'
|
||||||
|
+ description: Action to perform based on regex matching.
|
||||||
|
+ Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
- description: Modulus to take of the hash of the source label values.
|
||||||
|
+ description: Modulus to take of the hash of the source
|
||||||
|
+ label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||||
|
+ description: Regular expression against which the extracted
|
||||||
|
+ value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||||
|
+ description: Replacement value against which a regex replace
|
||||||
|
+ is performed if the regular expression matches. Regex
|
||||||
|
+ capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
- description: Separator placed between concatenated source label values. default is ';'.
|
||||||
|
+ description: Separator placed between concatenated source
|
||||||
|
+ label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||||
|
+ description: The source labels select values from existing
|
||||||
|
+ labels. Their content is concatenated using the configured
|
||||||
|
+ separator and matched against the configured regular
|
||||||
|
+ expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||||
|
+ description: Label to which the resulting value is written
|
||||||
|
+ in a replace action. It is mandatory for replace actions.
|
||||||
|
+ Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
@@ -155,39 +194,58 @@
|
||||||
|
description: HTTP path to scrape for metrics.
|
||||||
|
type: string
|
||||||
|
port:
|
||||||
|
- description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort.
|
||||||
|
+ description: Name of the pod port this endpoint refers to. Mutually
|
||||||
|
+ exclusive with targetPort.
|
||||||
|
type: string
|
||||||
|
proxyUrl:
|
||||||
|
- description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||||
|
+ description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
||||||
|
+ to proxy through this endpoint.
|
||||||
|
type: string
|
||||||
|
relabelings:
|
||||||
|
- description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields and replaces original scrape job name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
+ description: 'RelabelConfigs to apply to samples before scraping.
|
||||||
|
+ Prometheus Operator automatically adds relabelings for a few
|
||||||
|
+ standard Kubernetes fields and replaces original scrape job
|
||||||
|
+ name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
items:
|
||||||
|
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
+ description: 'RelabelConfig allows dynamic rewriting of the
|
||||||
|
+ label set, being applied to samples before ingestion. It
|
||||||
|
+ defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
- description: Action to perform based on regex matching. Default is 'replace'
|
||||||
|
+ description: Action to perform based on regex matching.
|
||||||
|
+ Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
- description: Modulus to take of the hash of the source label values.
|
||||||
|
+ description: Modulus to take of the hash of the source
|
||||||
|
+ label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||||
|
+ description: Regular expression against which the extracted
|
||||||
|
+ value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||||
|
+ description: Replacement value against which a regex replace
|
||||||
|
+ is performed if the regular expression matches. Regex
|
||||||
|
+ capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
- description: Separator placed between concatenated source label values. default is ';'.
|
||||||
|
+ description: Separator placed between concatenated source
|
||||||
|
+ label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||||
|
+ description: The source labels select values from existing
|
||||||
|
+ labels. Their content is concatenated using the configured
|
||||||
|
+ separator and matched against the configured regular
|
||||||
|
+ expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||||
|
+ description: Label to which the resulting value is written
|
||||||
|
+ in a replace action. It is mandatory for replace actions.
|
||||||
|
+ Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
@@ -207,19 +265,24 @@
|
||||||
|
description: TLS configuration to use when scraping the endpoint.
|
||||||
|
properties:
|
||||||
|
ca:
|
||||||
|
- description: Struct containing the CA cert to use for the targets.
|
||||||
|
+ description: Struct containing the CA cert to use for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
configMap:
|
||||||
|
- description: ConfigMap containing data to use for the targets.
|
||||||
|
+ description: ConfigMap containing data to use for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the ConfigMap or its key must be defined
|
||||||
|
+ description: Specify whether the ConfigMap or its
|
||||||
|
+ key must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -228,32 +291,41 @@
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key
|
||||||
|
+ must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
cert:
|
||||||
|
- description: Struct containing the client cert file for the targets.
|
||||||
|
+ description: Struct containing the client cert file for
|
||||||
|
+ the targets.
|
||||||
|
properties:
|
||||||
|
configMap:
|
||||||
|
- description: ConfigMap containing data to use for the targets.
|
||||||
|
+ description: ConfigMap containing data to use for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the ConfigMap or its key must be defined
|
||||||
|
+ description: Specify whether the ConfigMap or its
|
||||||
|
+ key must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -262,13 +334,17 @@
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key
|
||||||
|
+ must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -278,16 +354,20 @@
|
||||||
|
description: Disable target certificate validation.
|
||||||
|
type: boolean
|
||||||
|
keySecret:
|
||||||
|
- description: Secret containing the client key file for the targets.
|
||||||
|
+ description: Secret containing the client key file for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -299,30 +379,42 @@
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
podTargetLabels:
|
||||||
|
- description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||||
|
+ description: PodTargetLabels transfers labels on the Kubernetes Pod
|
||||||
|
+ onto the target.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
sampleLimit:
|
||||||
|
- description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||||
|
+ description: SampleLimit defines per-scrape limit on number of scraped
|
||||||
|
+ samples that will be accepted.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
selector:
|
||||||
|
description: Selector to select Pod objects.
|
||||||
|
properties:
|
||||||
|
matchExpressions:
|
||||||
|
- description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||||
|
+ description: matchExpressions is a list of label selector requirements.
|
||||||
|
+ The requirements are ANDed.
|
||||||
|
items:
|
||||||
|
- description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||||
|
+ description: A label selector requirement is a selector that
|
||||||
|
+ contains values, a key, and an operator that relates the key
|
||||||
|
+ and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: key is the label key that the selector applies to.
|
||||||
|
+ description: key is the label key that the selector applies
|
||||||
|
+ to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
- description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||||
|
+ description: operator represents a key's relationship to
|
||||||
|
+ a set of values. Valid operators are In, NotIn, Exists
|
||||||
|
+ and DoesNotExist.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
- description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||||
|
+ description: values is an array of string values. If the
|
||||||
|
+ operator is In or NotIn, the values array must be non-empty.
|
||||||
|
+ If the operator is Exists or DoesNotExist, the values
|
||||||
|
+ array must be empty. This array is replaced during a strategic
|
||||||
|
+ merge patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
@@ -334,11 +426,16 @@
|
||||||
|
matchLabels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
- description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||||
|
+ description: matchLabels is a map of {key,value} pairs. A single
|
||||||
|
+ {key,value} in the matchLabels map is equivalent to an element
|
||||||
|
+ of matchExpressions, whose key field is "key", the operator
|
||||||
|
+ is "In", and the values array contains only "value". The requirements
|
||||||
|
+ are ANDed.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
targetLimit:
|
||||||
|
- description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
|
||||||
|
+ description: TargetLimit defines a limit on the number of scraped
|
||||||
|
+ targets that will be accepted.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
required:
|
|
@ -0,0 +1,425 @@
|
||||||
|
--- charts-original/crds/crd-probes.yaml
|
||||||
|
+++ charts/crds/crd-probes.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
|
||||||
|
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
@@ -25,58 +25,76 @@
|
||||||
|
description: Probe defines monitoring for a set of static targets or ingresses.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
+ description: 'APIVersion defines the versioned schema of this representation
|
||||||
|
+ of an object. Servers should convert recognized schemas to the latest
|
||||||
|
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
+ description: 'Kind is a string value representing the REST resource this
|
||||||
|
+ object represents. Servers may infer this from the endpoint the client
|
||||||
|
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
- description: Specification of desired Ingress selection for target discovery by Prometheus.
|
||||||
|
+ description: Specification of desired Ingress selection for target discovery
|
||||||
|
+ by Prometheus.
|
||||||
|
properties:
|
||||||
|
basicAuth:
|
||||||
|
- description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||||
|
+ description: 'BasicAuth allow an endpoint to authenticate over basic
|
||||||
|
+ authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||||
|
properties:
|
||||||
|
password:
|
||||||
|
- description: The secret in the service monitor namespace that contains the password for authentication.
|
||||||
|
+ description: The secret in the service monitor namespace that
|
||||||
|
+ contains the password for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must be
|
||||||
|
+ a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must be
|
||||||
|
+ defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
username:
|
||||||
|
- description: The secret in the service monitor namespace that contains the username for authentication.
|
||||||
|
+ description: The secret in the service monitor namespace that
|
||||||
|
+ contains the username for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must be
|
||||||
|
+ a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must be
|
||||||
|
+ defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
bearerTokenSecret:
|
||||||
|
- description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the probe and accessible by the Prometheus Operator.
|
||||||
|
+ description: Secret to mount to read bearer token for scraping targets.
|
||||||
|
+ The secret needs to be in the same namespace as the probe and accessible
|
||||||
|
+ by the Prometheus Operator.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must be a
|
||||||
|
+ valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or its key must be defined
|
||||||
|
@@ -85,20 +103,28 @@
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
interval:
|
||||||
|
- description: Interval at which targets are probed using the configured prober. If not specified Prometheus' global scrape interval is used.
|
||||||
|
+ description: Interval at which targets are probed using the configured
|
||||||
|
+ prober. If not specified Prometheus' global scrape interval is used.
|
||||||
|
type: string
|
||||||
|
jobName:
|
||||||
|
description: The job name assigned to scraped metrics by default.
|
||||||
|
type: string
|
||||||
|
module:
|
||||||
|
- description: 'The module to use for probing specifying how to probe the target. Example module configuring in the blackbox exporter: https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
|
||||||
|
+ description: 'The module to use for probing specifying how to probe
|
||||||
|
+ the target. Example module configuring in the blackbox exporter:
|
||||||
|
+ https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
|
||||||
|
type: string
|
||||||
|
prober:
|
||||||
|
- description: Specification for the prober to use for probing targets. The prober.URL parameter is required. Targets cannot be probed if left empty.
|
||||||
|
+ description: Specification for the prober to use for probing targets.
|
||||||
|
+ The prober.URL parameter is required. Targets cannot be probed if
|
||||||
|
+ left empty.
|
||||||
|
properties:
|
||||||
|
path:
|
||||||
|
description: Path to collect metrics from. Defaults to `/probe`.
|
||||||
|
type: string
|
||||||
|
+ proxyUrl:
|
||||||
|
+ description: Optional ProxyURL.
|
||||||
|
+ type: string
|
||||||
|
scheme:
|
||||||
|
description: HTTP scheme to use for scraping. Defaults to `http`.
|
||||||
|
type: string
|
||||||
|
@@ -112,16 +138,19 @@
|
||||||
|
description: Timeout for scraping metrics from the Prometheus exporter.
|
||||||
|
type: string
|
||||||
|
targets:
|
||||||
|
- description: Targets defines a set of static and/or dynamically discovered targets to be probed using the prober.
|
||||||
|
+ description: Targets defines a set of static and/or dynamically discovered
|
||||||
|
+ targets to be probed using the prober.
|
||||||
|
properties:
|
||||||
|
ingress:
|
||||||
|
- description: Ingress defines the set of dynamically discovered ingress objects which hosts are considered for probing.
|
||||||
|
+ description: Ingress defines the set of dynamically discovered
|
||||||
|
+ ingress objects which hosts are considered for probing.
|
||||||
|
properties:
|
||||||
|
namespaceSelector:
|
||||||
|
description: Select Ingress objects by namespace.
|
||||||
|
properties:
|
||||||
|
any:
|
||||||
|
- description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||||
|
+ description: Boolean describing whether all namespaces
|
||||||
|
+ are selected in contrast to a list restricting them.
|
||||||
|
type: boolean
|
||||||
|
matchNames:
|
||||||
|
description: List of namespace names.
|
||||||
|
@@ -130,33 +159,48 @@
|
||||||
|
type: array
|
||||||
|
type: object
|
||||||
|
relabelingConfigs:
|
||||||
|
- description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
+ description: 'RelabelConfigs to apply to samples before ingestion.
|
||||||
|
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
items:
|
||||||
|
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
+ description: 'RelabelConfig allows dynamic rewriting of
|
||||||
|
+ the label set, being applied to samples before ingestion.
|
||||||
|
+ It defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
- description: Action to perform based on regex matching. Default is 'replace'
|
||||||
|
+ description: Action to perform based on regex matching.
|
||||||
|
+ Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
- description: Modulus to take of the hash of the source label values.
|
||||||
|
+ description: Modulus to take of the hash of the source
|
||||||
|
+ label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||||
|
+ description: Regular expression against which the extracted
|
||||||
|
+ value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||||
|
+ description: Replacement value against which a regex
|
||||||
|
+ replace is performed if the regular expression matches.
|
||||||
|
+ Regex capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
- description: Separator placed between concatenated source label values. default is ';'.
|
||||||
|
+ description: Separator placed between concatenated source
|
||||||
|
+ label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||||
|
+ description: The source labels select values from existing
|
||||||
|
+ labels. Their content is concatenated using the configured
|
||||||
|
+ separator and matched against the configured regular
|
||||||
|
+ expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||||
|
+ description: Label to which the resulting value is written
|
||||||
|
+ in a replace action. It is mandatory for replace actions.
|
||||||
|
+ Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
@@ -164,18 +208,29 @@
|
||||||
|
description: Select Ingress objects by labels.
|
||||||
|
properties:
|
||||||
|
matchExpressions:
|
||||||
|
- description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||||
|
+ description: matchExpressions is a list of label selector
|
||||||
|
+ requirements. The requirements are ANDed.
|
||||||
|
items:
|
||||||
|
- description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||||
|
+ description: A label selector requirement is a selector
|
||||||
|
+ that contains values, a key, and an operator that
|
||||||
|
+ relates the key and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: key is the label key that the selector applies to.
|
||||||
|
+ description: key is the label key that the selector
|
||||||
|
+ applies to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
- description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||||
|
+ description: operator represents a key's relationship
|
||||||
|
+ to a set of values. Valid operators are In, NotIn,
|
||||||
|
+ Exists and DoesNotExist.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
- description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||||
|
+ description: values is an array of string values.
|
||||||
|
+ If the operator is In or NotIn, the values array
|
||||||
|
+ must be non-empty. If the operator is Exists or
|
||||||
|
+ DoesNotExist, the values array must be empty.
|
||||||
|
+ This array is replaced during a strategic merge
|
||||||
|
+ patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
@@ -187,51 +242,73 @@
|
||||||
|
matchLabels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
- description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||||
|
+ description: matchLabels is a map of {key,value} pairs.
|
||||||
|
+ A single {key,value} in the matchLabels map is equivalent
|
||||||
|
+ to an element of matchExpressions, whose key field is
|
||||||
|
+ "key", the operator is "In", and the values array contains
|
||||||
|
+ only "value". The requirements are ANDed.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
staticConfig:
|
||||||
|
- description: 'StaticConfig defines static targets which are considers for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
|
||||||
|
+ description: 'StaticConfig defines static targets which are considers
|
||||||
|
+ for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
|
||||||
|
properties:
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
- description: Labels assigned to all metrics scraped from the targets.
|
||||||
|
+ description: Labels assigned to all metrics scraped from the
|
||||||
|
+ targets.
|
||||||
|
type: object
|
||||||
|
relabelingConfigs:
|
||||||
|
- description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
+ description: 'RelabelConfigs to apply to samples before ingestion.
|
||||||
|
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
items:
|
||||||
|
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
+ description: 'RelabelConfig allows dynamic rewriting of
|
||||||
|
+ the label set, being applied to samples before ingestion.
|
||||||
|
+ It defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
- description: Action to perform based on regex matching. Default is 'replace'
|
||||||
|
+ description: Action to perform based on regex matching.
|
||||||
|
+ Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
- description: Modulus to take of the hash of the source label values.
|
||||||
|
+ description: Modulus to take of the hash of the source
|
||||||
|
+ label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||||
|
+ description: Regular expression against which the extracted
|
||||||
|
+ value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||||
|
+ description: Replacement value against which a regex
|
||||||
|
+ replace is performed if the regular expression matches.
|
||||||
|
+ Regex capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
- description: Separator placed between concatenated source label values. default is ';'.
|
||||||
|
+ description: Separator placed between concatenated source
|
||||||
|
+ label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||||
|
+ description: The source labels select values from existing
|
||||||
|
+ labels. Their content is concatenated using the configured
|
||||||
|
+ separator and matched against the configured regular
|
||||||
|
+ expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||||
|
+ description: Label to which the resulting value is written
|
||||||
|
+ in a replace action. It is mandatory for replace actions.
|
||||||
|
+ Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
static:
|
||||||
|
- description: Targets is a list of URLs to probe using the configured prober.
|
||||||
|
+ description: Targets is a list of URLs to probe using the
|
||||||
|
+ configured prober.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
@@ -250,10 +327,12 @@
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the ConfigMap or its key must be defined
|
||||||
|
+ description: Specify whether the ConfigMap or its key
|
||||||
|
+ must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -262,13 +341,16 @@
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -284,10 +366,12 @@
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the ConfigMap or its key must be defined
|
||||||
|
+ description: Specify whether the ConfigMap or its key
|
||||||
|
+ must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -296,13 +380,16 @@
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -315,13 +402,16 @@
|
||||||
|
description: Secret containing the client key file for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must be
|
||||||
|
+ a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must be
|
||||||
|
+ defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,54 @@
|
||||||
|
--- charts-original/crds/crd-prometheusrules.yaml
|
||||||
|
+++ charts/crds/crd-prometheusrules.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||||
|
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
@@ -20,13 +20,18 @@
|
||||||
|
- name: v1
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
- description: PrometheusRule defines recording and alerting rules for a Prometheus instance
|
||||||
|
+ description: PrometheusRule defines recording and alerting rules for a Prometheus
|
||||||
|
+ instance
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
+ description: 'APIVersion defines the versioned schema of this representation
|
||||||
|
+ of an object. Servers should convert recognized schemas to the latest
|
||||||
|
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
+ description: 'Kind is a string value representing the REST resource this
|
||||||
|
+ object represents. Servers may infer this from the endpoint the client
|
||||||
|
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
@@ -36,7 +41,10 @@
|
||||||
|
groups:
|
||||||
|
description: Content of Prometheus rule file
|
||||||
|
items:
|
||||||
|
- description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||||
|
+ description: 'RuleGroup is a list of sequentially evaluated recording
|
||||||
|
+ and alerting rules. Note: PartialResponseStrategy is only used
|
||||||
|
+ by ThanosRuler and will be ignored by Prometheus instances. Valid
|
||||||
|
+ values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||||
|
properties:
|
||||||
|
interval:
|
||||||
|
type: string
|
||||||
|
@@ -46,7 +54,10 @@
|
||||||
|
type: string
|
||||||
|
rules:
|
||||||
|
items:
|
||||||
|
- description: Rule describes an alerting or recording rule.
|
||||||
|
+ description: 'Rule describes an alerting or recording rule
|
||||||
|
+ See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
|
||||||
|
+ or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules)
|
||||||
|
+ rule'
|
||||||
|
properties:
|
||||||
|
alert:
|
||||||
|
type: string
|
|
@ -0,0 +1,501 @@
|
||||||
|
--- charts-original/crds/crd-servicemonitors.yaml
|
||||||
|
+++ charts/crds/crd-servicemonitors.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.48.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
|
||||||
|
+# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
@@ -25,50 +25,65 @@
|
||||||
|
description: ServiceMonitor defines monitoring for a set of services.
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
- description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
+ description: 'APIVersion defines the versioned schema of this representation
|
||||||
|
+ of an object. Servers should convert recognized schemas to the latest
|
||||||
|
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
- description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
+ description: 'Kind is a string value representing the REST resource this
|
||||||
|
+ object represents. Servers may infer this from the endpoint the client
|
||||||
|
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
- description: Specification of desired Service selection for target discovery by Prometheus.
|
||||||
|
+ description: Specification of desired Service selection for target discovery
|
||||||
|
+ by Prometheus.
|
||||||
|
properties:
|
||||||
|
endpoints:
|
||||||
|
description: A list of endpoints allowed as part of this ServiceMonitor.
|
||||||
|
items:
|
||||||
|
- description: Endpoint defines a scrapeable endpoint serving Prometheus metrics.
|
||||||
|
+ description: Endpoint defines a scrapeable endpoint serving Prometheus
|
||||||
|
+ metrics.
|
||||||
|
properties:
|
||||||
|
basicAuth:
|
||||||
|
- description: 'BasicAuth allow an endpoint to authenticate over basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||||
|
+ description: 'BasicAuth allow an endpoint to authenticate over
|
||||||
|
+ basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||||
|
properties:
|
||||||
|
password:
|
||||||
|
- description: The secret in the service monitor namespace that contains the password for authentication.
|
||||||
|
+ description: The secret in the service monitor namespace
|
||||||
|
+ that contains the password for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
username:
|
||||||
|
- description: The secret in the service monitor namespace that contains the username for authentication.
|
||||||
|
+ description: The secret in the service monitor namespace
|
||||||
|
+ that contains the username for authentication.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -78,57 +93,79 @@
|
||||||
|
description: File to read bearer token for scraping targets.
|
||||||
|
type: string
|
||||||
|
bearerTokenSecret:
|
||||||
|
- description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the service monitor and accessible by the Prometheus Operator.
|
||||||
|
+ description: Secret to mount to read bearer token for scraping
|
||||||
|
+ targets. The secret needs to be in the same namespace as the
|
||||||
|
+ service monitor and accessible by the Prometheus Operator.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
honorLabels:
|
||||||
|
- description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||||
|
+ description: HonorLabels chooses the metric's labels on collisions
|
||||||
|
+ with target labels.
|
||||||
|
type: boolean
|
||||||
|
honorTimestamps:
|
||||||
|
- description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||||
|
+ description: HonorTimestamps controls whether Prometheus respects
|
||||||
|
+ the timestamps present in scraped data.
|
||||||
|
type: boolean
|
||||||
|
interval:
|
||||||
|
description: Interval at which metrics should be scraped
|
||||||
|
type: string
|
||||||
|
metricRelabelings:
|
||||||
|
- description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||||
|
+ description: MetricRelabelConfigs to apply to samples before
|
||||||
|
+ ingestion.
|
||||||
|
items:
|
||||||
|
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
+ description: 'RelabelConfig allows dynamic rewriting of the
|
||||||
|
+ label set, being applied to samples before ingestion. It
|
||||||
|
+ defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
- description: Action to perform based on regex matching. Default is 'replace'
|
||||||
|
+ description: Action to perform based on regex matching.
|
||||||
|
+ Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
- description: Modulus to take of the hash of the source label values.
|
||||||
|
+ description: Modulus to take of the hash of the source
|
||||||
|
+ label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||||
|
+ description: Regular expression against which the extracted
|
||||||
|
+ value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||||
|
+ description: Replacement value against which a regex replace
|
||||||
|
+ is performed if the regular expression matches. Regex
|
||||||
|
+ capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
- description: Separator placed between concatenated source label values. default is ';'.
|
||||||
|
+ description: Separator placed between concatenated source
|
||||||
|
+ label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||||
|
+ description: The source labels select values from existing
|
||||||
|
+ labels. Their content is concatenated using the configured
|
||||||
|
+ separator and matched against the configured regular
|
||||||
|
+ expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||||
|
+ description: Label to which the resulting value is written
|
||||||
|
+ in a replace action. It is mandatory for replace actions.
|
||||||
|
+ Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
@@ -143,39 +180,58 @@
|
||||||
|
description: HTTP path to scrape for metrics.
|
||||||
|
type: string
|
||||||
|
port:
|
||||||
|
- description: Name of the service port this endpoint refers to. Mutually exclusive with targetPort.
|
||||||
|
+ description: Name of the service port this endpoint refers to.
|
||||||
|
+ Mutually exclusive with targetPort.
|
||||||
|
type: string
|
||||||
|
proxyUrl:
|
||||||
|
- description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||||
|
+ description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
||||||
|
+ to proxy through this endpoint.
|
||||||
|
type: string
|
||||||
|
relabelings:
|
||||||
|
- description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields and replaces original scrape job name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
+ description: 'RelabelConfigs to apply to samples before scraping.
|
||||||
|
+ Prometheus Operator automatically adds relabelings for a few
|
||||||
|
+ standard Kubernetes fields and replaces original scrape job
|
||||||
|
+ name with __tmp_prometheus_job_name. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||||
|
items:
|
||||||
|
- description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
+ description: 'RelabelConfig allows dynamic rewriting of the
|
||||||
|
+ label set, being applied to samples before ingestion. It
|
||||||
|
+ defines `<metric_relabel_configs>`-section of Prometheus
|
||||||
|
+ configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||||
|
properties:
|
||||||
|
action:
|
||||||
|
- description: Action to perform based on regex matching. Default is 'replace'
|
||||||
|
+ description: Action to perform based on regex matching.
|
||||||
|
+ Default is 'replace'
|
||||||
|
type: string
|
||||||
|
modulus:
|
||||||
|
- description: Modulus to take of the hash of the source label values.
|
||||||
|
+ description: Modulus to take of the hash of the source
|
||||||
|
+ label values.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
regex:
|
||||||
|
- description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||||
|
+ description: Regular expression against which the extracted
|
||||||
|
+ value is matched. Default is '(.*)'
|
||||||
|
type: string
|
||||||
|
replacement:
|
||||||
|
- description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||||
|
+ description: Replacement value against which a regex replace
|
||||||
|
+ is performed if the regular expression matches. Regex
|
||||||
|
+ capture groups are available. Default is '$1'
|
||||||
|
type: string
|
||||||
|
separator:
|
||||||
|
- description: Separator placed between concatenated source label values. default is ';'.
|
||||||
|
+ description: Separator placed between concatenated source
|
||||||
|
+ label values. default is ';'.
|
||||||
|
type: string
|
||||||
|
sourceLabels:
|
||||||
|
- description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||||
|
+ description: The source labels select values from existing
|
||||||
|
+ labels. Their content is concatenated using the configured
|
||||||
|
+ separator and matched against the configured regular
|
||||||
|
+ expression for the replace, keep, and drop actions.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLabel:
|
||||||
|
- description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||||
|
+ description: Label to which the resulting value is written
|
||||||
|
+ in a replace action. It is mandatory for replace actions.
|
||||||
|
+ Regex capture groups are available.
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
@@ -189,25 +245,32 @@
|
||||||
|
anyOf:
|
||||||
|
- type: integer
|
||||||
|
- type: string
|
||||||
|
- description: Name or number of the target port of the Pod behind the Service, the port must be specified with container port property. Mutually exclusive with port.
|
||||||
|
+ description: Name or number of the target port of the Pod behind
|
||||||
|
+ the Service, the port must be specified with container port
|
||||||
|
+ property. Mutually exclusive with port.
|
||||||
|
x-kubernetes-int-or-string: true
|
||||||
|
tlsConfig:
|
||||||
|
description: TLS configuration to use when scraping the endpoint
|
||||||
|
properties:
|
||||||
|
ca:
|
||||||
|
- description: Struct containing the CA cert to use for the targets.
|
||||||
|
+ description: Struct containing the CA cert to use for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
configMap:
|
||||||
|
- description: ConfigMap containing data to use for the targets.
|
||||||
|
+ description: ConfigMap containing data to use for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the ConfigMap or its key must be defined
|
||||||
|
+ description: Specify whether the ConfigMap or its
|
||||||
|
+ key must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -216,35 +279,45 @@
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key
|
||||||
|
+ must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
caFile:
|
||||||
|
- description: Path to the CA cert in the Prometheus container to use for the targets.
|
||||||
|
+ description: Path to the CA cert in the Prometheus container
|
||||||
|
+ to use for the targets.
|
||||||
|
type: string
|
||||||
|
cert:
|
||||||
|
- description: Struct containing the client cert file for the targets.
|
||||||
|
+ description: Struct containing the client cert file for
|
||||||
|
+ the targets.
|
||||||
|
properties:
|
||||||
|
configMap:
|
||||||
|
- description: ConfigMap containing data to use for the targets.
|
||||||
|
+ description: ConfigMap containing data to use for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key to select.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the ConfigMap or its key must be defined
|
||||||
|
+ description: Specify whether the ConfigMap or its
|
||||||
|
+ key must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -253,38 +326,48 @@
|
||||||
|
description: Secret containing data to use for the targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind,
|
||||||
|
+ uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key
|
||||||
|
+ must be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
certFile:
|
||||||
|
- description: Path to the client cert file in the Prometheus container for the targets.
|
||||||
|
+ description: Path to the client cert file in the Prometheus
|
||||||
|
+ container for the targets.
|
||||||
|
type: string
|
||||||
|
insecureSkipVerify:
|
||||||
|
description: Disable target certificate validation.
|
||||||
|
type: boolean
|
||||||
|
keyFile:
|
||||||
|
- description: Path to the client key file in the Prometheus container for the targets.
|
||||||
|
+ description: Path to the client key file in the Prometheus
|
||||||
|
+ container for the targets.
|
||||||
|
type: string
|
||||||
|
keySecret:
|
||||||
|
- description: Secret containing the client key file for the targets.
|
||||||
|
+ description: Secret containing the client key file for the
|
||||||
|
+ targets.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: The key of the secret to select from. Must be a valid secret key.
|
||||||
|
+ description: The key of the secret to select from. Must
|
||||||
|
+ be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
- description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||||
|
+ TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
- description: Specify whether the Secret or its key must be defined
|
||||||
|
+ description: Specify whether the Secret or its key must
|
||||||
|
+ be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
@@ -296,13 +379,18 @@
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
jobLabel:
|
||||||
|
- description: The label to use to retrieve the job name from.
|
||||||
|
+ description: "Chooses the label of the Kubernetes `Endpoints`. Its
|
||||||
|
+ value will be used for the `job`-label's value of the created metrics.
|
||||||
|
+ \n Default & fallback value: the name of the respective Kubernetes
|
||||||
|
+ `Endpoint`."
|
||||||
|
type: string
|
||||||
|
namespaceSelector:
|
||||||
|
- description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||||
|
+ description: Selector to select which namespaces the Kubernetes Endpoints
|
||||||
|
+ objects are discovered from.
|
||||||
|
properties:
|
||||||
|
any:
|
||||||
|
- description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||||
|
+ description: Boolean describing whether all namespaces are selected
|
||||||
|
+ in contrast to a list restricting them.
|
||||||
|
type: boolean
|
||||||
|
matchNames:
|
||||||
|
description: List of namespace names.
|
||||||
|
@@ -311,30 +399,42 @@
|
||||||
|
type: array
|
||||||
|
type: object
|
||||||
|
podTargetLabels:
|
||||||
|
- description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||||
|
+ description: PodTargetLabels transfers labels on the Kubernetes `Pod`
|
||||||
|
+ onto the created metrics.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
sampleLimit:
|
||||||
|
- description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||||
|
+ description: SampleLimit defines per-scrape limit on number of scraped
|
||||||
|
+ samples that will be accepted.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
selector:
|
||||||
|
description: Selector to select Endpoints objects.
|
||||||
|
properties:
|
||||||
|
matchExpressions:
|
||||||
|
- description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||||
|
+ description: matchExpressions is a list of label selector requirements.
|
||||||
|
+ The requirements are ANDed.
|
||||||
|
items:
|
||||||
|
- description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||||
|
+ description: A label selector requirement is a selector that
|
||||||
|
+ contains values, a key, and an operator that relates the key
|
||||||
|
+ and values.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
- description: key is the label key that the selector applies to.
|
||||||
|
+ description: key is the label key that the selector applies
|
||||||
|
+ to.
|
||||||
|
type: string
|
||||||
|
operator:
|
||||||
|
- description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||||
|
+ description: operator represents a key's relationship to
|
||||||
|
+ a set of values. Valid operators are In, NotIn, Exists
|
||||||
|
+ and DoesNotExist.
|
||||||
|
type: string
|
||||||
|
values:
|
||||||
|
- description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||||
|
+ description: values is an array of string values. If the
|
||||||
|
+ operator is In or NotIn, the values array must be non-empty.
|
||||||
|
+ If the operator is Exists or DoesNotExist, the values
|
||||||
|
+ array must be empty. This array is replaced during a strategic
|
||||||
|
+ merge patch.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
@@ -346,16 +446,23 @@
|
||||||
|
matchLabels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
- description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||||
|
+ description: matchLabels is a map of {key,value} pairs. A single
|
||||||
|
+ {key,value} in the matchLabels map is equivalent to an element
|
||||||
|
+ of matchExpressions, whose key field is "key", the operator
|
||||||
|
+ is "In", and the values array contains only "value". The requirements
|
||||||
|
+ are ANDed.
|
||||||
|
type: object
|
||||||
|
type: object
|
||||||
|
targetLabels:
|
||||||
|
- description: TargetLabels transfers labels on the Kubernetes Service onto the target.
|
||||||
|
+ description: TargetLabels transfers labels from the Kubernetes `Service`
|
||||||
|
+ onto the created metrics. All labels set in `selector.matchLabels`
|
||||||
|
+ are automatically transferred.
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
targetLimit:
|
||||||
|
- description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
|
||||||
|
+ description: TargetLimit defines a limit on the number of scraped
|
||||||
|
+ targets that will be accepted.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
|
required:
|
File diff suppressed because it is too large
Load Diff
|
@ -140,3 +140,17 @@
|
||||||
app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }}
|
app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }}
|
||||||
chart: {{ template "kube-prometheus-stack.chartref" . }}
|
chart: {{ template "kube-prometheus-stack.chartref" . }}
|
||||||
release: {{ $.Release.Name | quote }}
|
release: {{ $.Release.Name | quote }}
|
||||||
|
@@ -98,7 +225,12 @@
|
||||||
|
|
||||||
|
{{/* Allow KubeVersion to be overridden. */}}
|
||||||
|
{{- define "kube-prometheus-stack.ingress.kubeVersion" -}}
|
||||||
|
- {{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}}
|
||||||
|
+ {{- $kubeVersion := default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}}
|
||||||
|
+ {{/* Special use case for Amazon EKS, Google GKE */}}
|
||||||
|
+ {{- if and (regexMatch "\\d+\\.\\d+\\.\\d+-(?:eks|gke).+" $kubeVersion) (not .Values.kubeVersionOverride) -}}
|
||||||
|
+ {{- $kubeVersion = regexFind "\\d+\\.\\d+\\.\\d+" $kubeVersion -}}
|
||||||
|
+ {{- end -}}
|
||||||
|
+ {{- $kubeVersion -}}
|
||||||
|
{{- end -}}
|
||||||
|
|
||||||
|
{{/* Get Ingress API Version */}}
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
--- charts-original/templates/exporters/kube-state-metrics/serviceMonitor.yaml
|
||||||
|
+++ charts/templates/exporters/kube-state-metrics/serviceMonitor.yaml
|
||||||
|
@@ -10,7 +10,7 @@
|
||||||
|
spec:
|
||||||
|
jobLabel: app.kubernetes.io/name
|
||||||
|
endpoints:
|
||||||
|
- - port: http
|
||||||
|
+ - port: metrics
|
||||||
|
{{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
|
||||||
|
interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
|
||||||
|
{{- end }}
|
||||||
|
@@ -26,6 +26,24 @@
|
||||||
|
relabelings:
|
||||||
|
{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
+{{- if .Values.kubeStateMetrics.serviceMonitor.selfMonitor.enabled }}
|
||||||
|
+ - port: metrics
|
||||||
|
+ {{- if .Values.kubeStateMetrics.serviceMonitor.interval }}
|
||||||
|
+ interval: {{ .Values.kubeStateMetrics.serviceMonitor.interval }}
|
||||||
|
+ {{- end }}
|
||||||
|
+ {{- if .Values.kubeStateMetrics.serviceMonitor.proxyUrl }}
|
||||||
|
+ proxyUrl: {{ .Values.kubeStateMetrics.serviceMonitor.proxyUrl}}
|
||||||
|
+ {{- end }}
|
||||||
|
+ honorLabels: true
|
||||||
|
+{{- if .Values.kubeStateMetrics.serviceMonitor.metricRelabelings }}
|
||||||
|
+ metricRelabelings:
|
||||||
|
+{{ tpl (toYaml .Values.kubeStateMetrics.serviceMonitor.metricRelabelings | indent 4) . }}
|
||||||
|
+{{- end }}
|
||||||
|
+{{- if .Values.kubeStateMetrics.serviceMonitor.relabelings }}
|
||||||
|
+ relabelings:
|
||||||
|
+{{ toYaml .Values.kubeStateMetrics.serviceMonitor.relabelings | indent 4 }}
|
||||||
|
+{{- end }}
|
||||||
|
+{{- end }}
|
||||||
|
{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
|
||||||
|
namespaceSelector:
|
||||||
|
matchNames:
|
|
@ -33,3 +33,11 @@
|
||||||
{{- if .Values.kubelet.serviceMonitor.interval }}
|
{{- if .Values.kubelet.serviceMonitor.interval }}
|
||||||
interval: {{ .Values.kubelet.serviceMonitor.interval }}
|
interval: {{ .Values.kubelet.serviceMonitor.interval }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
@@ -168,6 +171,6 @@
|
||||||
|
- {{ .Values.kubelet.namespace }}
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
- app.kubernetes.io/managed-by: prometheus-operator
|
||||||
|
+ app.kubernetes.io/name: kubelet
|
||||||
|
k8s-app: kubelet
|
||||||
|
{{- end}}
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/apiserver.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/apiserver.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/apiserver.yaml
|
+++ charts/templates/grafana/dashboards-1.14/apiserver.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/cluster-total.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/cluster-total.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/cluster-total.yaml
|
+++ charts/templates/grafana/dashboards-1.14/cluster-total.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/controller-manager.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/controller-manager.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/controller-manager.yaml
|
+++ charts/templates/grafana/dashboards-1.14/controller-manager.yaml
|
||||||
@@ -4,11 +4,12 @@
|
@@ -1,14 +1,15 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
@ -28,33 +32,45 @@
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "",
|
"legendFormat": "",
|
||||||
@@ -176,7 +181,7 @@
|
@@ -176,10 +181,10 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
||||||
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
||||||
@@ -282,7 +287,7 @@
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
@@ -282,10 +287,10 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
||||||
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
||||||
@@ -388,7 +393,7 @@
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
@@ -388,10 +393,10 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
|
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
|
||||||
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
|
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance, name, le))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
@@ -494,28 +499,28 @@
|
@@ -494,28 +499,28 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml
|
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,138 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -247,7 +247,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||||
|
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -499,7 +499,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||||
|
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -594,7 +594,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}namespace{{`}}`}}",
|
||||||
|
@@ -885,7 +885,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -903,7 +903,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -912,7 +912,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -921,7 +921,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1321,7 +1321,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1330,7 +1330,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1339,7 +1339,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1348,7 +1348,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||||
|
+ "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -2149,7 +2149,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -2235,7 +2235,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -2333,7 +2333,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -2419,7 +2419,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml
|
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,147 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -78,7 +78,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -162,7 +162,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -446,7 +446,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}pod{{`}}`}}",
|
||||||
|
@@ -697,7 +697,7 @@
|
||||||
|
],
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -706,7 +706,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -715,7 +715,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -724,7 +724,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -733,7 +733,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1171,7 +1171,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1180,7 +1180,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1189,7 +1189,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1198,7 +1198,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1842,7 +1842,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1928,7 +1928,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -2026,7 +2026,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -2112,7 +2112,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-node.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-node.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-node.yaml
|
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-node.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,93 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -77,7 +77,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}pod{{`}}`}}",
|
||||||
|
@@ -312,7 +312,7 @@
|
||||||
|
],
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -321,7 +321,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -330,7 +330,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -339,7 +339,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -348,7 +348,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -749,7 +749,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -758,7 +758,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -767,7 +767,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -776,7 +776,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"}) by (pod)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml
|
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,129 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -94,7 +94,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}container{{`}}`}}",
|
||||||
|
@@ -450,7 +450,7 @@
|
||||||
|
],
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -459,7 +459,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -468,7 +468,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -477,7 +477,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -486,7 +486,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)",
|
||||||
|
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -922,7 +922,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -931,7 +931,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
|
||||||
|
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -940,7 +940,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
|
||||||
|
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -949,7 +949,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)",
|
||||||
|
+ "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1294,7 +1294,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1381,7 +1381,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1480,7 +1480,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1567,7 +1567,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml
|
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,75 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -77,7 +77,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}pod{{`}}`}}",
|
||||||
|
@@ -312,7 +312,7 @@
|
||||||
|
],
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -330,7 +330,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -348,7 +348,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1520,7 +1520,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1606,7 +1606,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1704,7 +1704,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1790,7 +1790,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml
|
+++ charts/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,75 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -98,7 +98,7 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}",
|
||||||
|
@@ -396,7 +396,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -414,7 +414,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -432,7 +432,7 @@
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
@@ -1707,7 +1707,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1793,7 +1793,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1891,7 +1891,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
@@ -1977,7 +1977,7 @@
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
- "format": "Bps",
|
||||||
|
+ "format": "pps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/namespace-by-pod.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/namespace-by-pod.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/namespace-by-pod.yaml
|
+++ charts/templates/grafana/dashboards-1.14/namespace-by-pod.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/namespace-by-workload.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/namespace-by-workload.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/namespace-by-workload.yaml
|
+++ charts/templates/grafana/dashboards-1.14/namespace-by-workload.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml
|
+++ charts/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/node-rsrc-use.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/node-rsrc-use.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/node-rsrc-use.yaml
|
+++ charts/templates/grafana/dashboards-1.14/node-rsrc-use.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/nodes.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/nodes.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/nodes.yaml
|
+++ charts/templates/grafana/dashboards-1.14/nodes.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml
|
+++ charts/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/pod-total.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/pod-total.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/pod-total.yaml
|
+++ charts/templates/grafana/dashboards-1.14/pod-total.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml
|
+++ charts/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/prometheus.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/prometheus.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/prometheus.yaml
|
+++ charts/templates/grafana/dashboards-1.14/prometheus.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@ -9,3 +16,18 @@
|
||||||
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }}
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
|
||||||
|
@@ -574,6 +574,14 @@
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))",
|
||||||
|
+ "format": "time_series",
|
||||||
|
+ "intervalFactor": 2,
|
||||||
|
+ "legendFormat": "exceeded body size limit: {{`{{`}}job{{`}}`}}",
|
||||||
|
+ "legendLink": null,
|
||||||
|
+ "step": 10
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
"expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/proxy.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/proxy.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/proxy.yaml
|
+++ charts/templates/grafana/dashboards-1.14/proxy.yaml
|
||||||
@@ -4,11 +4,12 @@
|
@@ -1,14 +1,15 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/scheduler.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/scheduler.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/scheduler.yaml
|
+++ charts/templates/grafana/dashboards-1.14/scheduler.yaml
|
||||||
@@ -4,11 +4,12 @@
|
@@ -1,14 +1,15 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
@ -28,72 +32,86 @@
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "",
|
"legendFormat": "",
|
||||||
@@ -176,28 +181,28 @@
|
@@ -176,31 +181,31 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
||||||
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
|
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
||||||
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
|
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} binding",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} binding",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
|
||||||
"refId": "B"
|
"refId": "B"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
||||||
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
|
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
|
||||||
"refId": "C"
|
"refId": "C"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
|
||||||
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)",
|
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (cluster, instance)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} volume",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} volume",
|
||||||
@@ -290,28 +295,28 @@
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
|
||||||
|
"refId": "D"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
@@ -290,31 +295,31 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||||
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} e2e",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||||
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} binding",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} binding",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding",
|
||||||
"refId": "B"
|
"refId": "B"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||||
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm",
|
||||||
"refId": "C"
|
"refId": "C"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||||
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (cluster, instance, le))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{`{{`}}instance{{`}}`}} volume",
|
- "legendFormat": "{{`{{`}}instance{{`}}`}} volume",
|
||||||
|
+ "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume",
|
||||||
|
"refId": "D"
|
||||||
|
}
|
||||||
|
],
|
||||||
@@ -417,28 +422,28 @@
|
@@ -417,28 +422,28 @@
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/grafana/dashboards-1.14/workload-total.yaml
|
--- charts-original/templates/grafana/dashboards-1.14/workload-total.yaml
|
||||||
+++ charts/templates/grafana/dashboards-1.14/workload-total.yaml
|
+++ charts/templates/grafana/dashboards-1.14/workload-total.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
+Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/grafana-dashboardDefinitions.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -8,7 +8,7 @@
|
@@ -8,7 +8,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
|
@ -1,5 +1,11 @@
|
||||||
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml
|
||||||
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
@@ -32,9 +32,9 @@
|
@@ -32,9 +32,9 @@
|
||||||
containers:
|
containers:
|
||||||
- name: create
|
- name: create
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml
|
||||||
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
@@ -32,9 +32,9 @@
|
@@ -32,9 +32,9 @@
|
||||||
containers:
|
containers:
|
||||||
- name: patch
|
- name: patch
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create .Values.global.rbac.pspEnabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: policy/v1beta1
|
||||||
|
kind: PodSecurityPolicy
|
||||||
|
metadata:
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }}
|
||||||
|
apiVersion: admissionregistration.k8s.io/v1
|
||||||
|
kind: MutatingWebhookConfiguration
|
||||||
|
metadata:
|
|
@ -0,0 +1,8 @@
|
||||||
|
--- charts-original/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml
|
||||||
|
+++ charts/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-{{- if and .Values.prometheusOperator.admissionWebhooks.enabled }}
|
||||||
|
+{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }}
|
||||||
|
apiVersion: admissionregistration.k8s.io/v1
|
||||||
|
kind: ValidatingWebhookConfiguration
|
||||||
|
metadata:
|
|
@ -24,7 +24,19 @@
|
||||||
{{- end }}
|
{{- end }}
|
||||||
- --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }}
|
- --config-reloader-cpu={{ .Values.prometheusOperator.configReloaderCpu }}
|
||||||
- --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }}
|
- --config-reloader-memory={{ .Values.prometheusOperator.configReloaderMemory }}
|
||||||
@@ -130,16 +130,16 @@
|
@@ -78,6 +78,11 @@
|
||||||
|
{{- if .Values.prometheusOperator.prometheusInstanceNamespaces }}
|
||||||
|
- --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }}
|
||||||
|
{{- end }}
|
||||||
|
+ {{- if .Values.prometheusOperator.thanosImage.sha }}
|
||||||
|
+ - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
|
||||||
|
+ {{- else }}
|
||||||
|
+ - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
|
||||||
|
+ {{- end }}
|
||||||
|
{{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }}
|
||||||
|
- --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }}
|
||||||
|
{{- end }}
|
||||||
|
@@ -130,16 +135,16 @@
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
dnsPolicy: ClusterFirstWithHostNet
|
dnsPolicy: ClusterFirstWithHostNet
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
|
@ -34,7 +34,26 @@
|
||||||
{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
|
{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }}
|
{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }}
|
||||||
@@ -266,7 +268,7 @@
|
@@ -254,11 +256,17 @@
|
||||||
|
name: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.name }}
|
||||||
|
key: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.key }}
|
||||||
|
{{- end }}
|
||||||
|
-{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }}
|
||||||
|
+{{- if or .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }}
|
||||||
|
additionalAlertManagerConfigs:
|
||||||
|
+{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }}
|
||||||
|
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-confg
|
||||||
|
key: additional-alertmanager-configs.yaml
|
||||||
|
{{- end }}
|
||||||
|
+{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }}
|
||||||
|
+ name: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.name }}
|
||||||
|
+ key: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.key }}
|
||||||
|
+{{- end }}
|
||||||
|
+{{- end }}
|
||||||
|
{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }}
|
||||||
|
additionalAlertRelabelConfigs:
|
||||||
|
name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-relabel-confg
|
||||||
|
@@ -266,7 +274,7 @@
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.prometheus.prometheusSpec.containers }}
|
{{- if .Values.prometheus.prometheusSpec.containers }}
|
||||||
containers:
|
containers:
|
||||||
|
@ -43,7 +62,7 @@
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.prometheus.prometheusSpec.initContainers }}
|
{{- if .Values.prometheus.prometheusSpec.initContainers }}
|
||||||
initContainers:
|
initContainers:
|
||||||
@@ -282,6 +284,7 @@
|
@@ -282,6 +290,7 @@
|
||||||
{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
|
{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
|
||||||
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
|
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
@ -51,7 +70,7 @@
|
||||||
portName: {{ .Values.prometheus.prometheusSpec.portName }}
|
portName: {{ .Values.prometheus.prometheusSpec.portName }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .Values.prometheus.prometheusSpec.volumes }}
|
{{- if .Values.prometheus.prometheusSpec.volumes }}
|
||||||
@@ -326,3 +329,4 @@
|
@@ -326,3 +335,4 @@
|
||||||
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
|
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
|
||||||
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
|
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
|
@ -0,0 +1,176 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/alertmanager.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/alertmanager.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/alertmanager-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -26,41 +26,146 @@
|
||||||
|
groups:
|
||||||
|
- name: alertmanager.rules
|
||||||
|
rules:
|
||||||
|
- - alert: AlertmanagerConfigInconsistent
|
||||||
|
+ - alert: AlertmanagerFailedReload
|
||||||
|
+ annotations:
|
||||||
|
+ description: Configuration has failed to load for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerfailedreload
|
||||||
|
+ summary: Reloading an Alertmanager configuration has failed.
|
||||||
|
+ expr: |-
|
||||||
|
+ # Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
+ max_over_time(alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) == 0
|
||||||
|
+ for: 10m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: AlertmanagerMembersInconsistent
|
||||||
|
+ annotations:
|
||||||
|
+ description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} has only found {{`{{`}} $value {{`}}`}} members of the {{`{{`}}$labels.job{{`}}`}} cluster.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagermembersinconsistent
|
||||||
|
+ summary: A member of an Alertmanager cluster has not found all other cluster members.
|
||||||
|
+ expr: |-
|
||||||
|
+ # Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
+ max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
+ < on (namespace,service) group_left
|
||||||
|
+ count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]))
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: AlertmanagerFailedToSendAlerts
|
||||||
|
+ annotations:
|
||||||
|
+ description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} failed to send {{`{{`}} $value | humanizePercentage {{`}}`}} of notifications to {{`{{`}} $labels.integration {{`}}`}}.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerfailedtosendalerts
|
||||||
|
+ summary: An Alertmanager instance failed to send notifications.
|
||||||
|
+ expr: |-
|
||||||
|
+ (
|
||||||
|
+ rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
+ /
|
||||||
|
+ rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
+ )
|
||||||
|
+ > 0.01
|
||||||
|
+ for: 5m
|
||||||
|
+ labels:
|
||||||
|
+ severity: warning
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
|
annotations:
|
||||||
|
- message: 'The configuration of the instances of the Alertmanager cluster `{{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.service {{`}}`}}` are out of sync.
|
||||||
|
-
|
||||||
|
- {{`{{`}} range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query {{`}}`}}
|
||||||
|
-
|
||||||
|
- Configuration hash for pod {{`{{`}} .Labels.pod {{`}}`}} is "{{`{{`}} printf "%.f" .Value {{`}}`}}"
|
||||||
|
-
|
||||||
|
- {{`{{`}} end {{`}}`}}
|
||||||
|
-
|
||||||
|
- '
|
||||||
|
- expr: count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})) != 1
|
||||||
|
+ description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterfailedtosendalerts
|
||||||
|
+ summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
||||||
|
+ expr: |-
|
||||||
|
+ min by (namespace,service, integration) (
|
||||||
|
+ rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m])
|
||||||
|
+ /
|
||||||
|
+ rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m])
|
||||||
|
+ )
|
||||||
|
+ > 0.01
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
- - alert: AlertmanagerFailedReload
|
||||||
|
+ - alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
|
annotations:
|
||||||
|
- message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
|
||||||
|
- expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
|
||||||
|
- for: 10m
|
||||||
|
+ description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterfailedtosendalerts
|
||||||
|
+ summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
||||||
|
+ expr: |-
|
||||||
|
+ min by (namespace,service, integration) (
|
||||||
|
+ rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m])
|
||||||
|
+ /
|
||||||
|
+ rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m])
|
||||||
|
+ )
|
||||||
|
+ > 0.01
|
||||||
|
+ for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
- - alert: AlertmanagerMembersInconsistent
|
||||||
|
+ - alert: AlertmanagerConfigInconsistent
|
||||||
|
+ annotations:
|
||||||
|
+ description: Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have different configurations.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerconfiginconsistent
|
||||||
|
+ summary: Alertmanager instances within the same cluster have different configurations.
|
||||||
|
+ expr: |-
|
||||||
|
+ count by (namespace,service) (
|
||||||
|
+ count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
|
||||||
|
+ )
|
||||||
|
+ != 1
|
||||||
|
+ for: 20m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: AlertmanagerClusterDown
|
||||||
|
+ annotations:
|
||||||
|
+ description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have been up for less than half of the last 5m.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclusterdown
|
||||||
|
+ summary: Half or more of the Alertmanager instances within the same cluster are down.
|
||||||
|
+ expr: |-
|
||||||
|
+ (
|
||||||
|
+ count by (namespace,service) (
|
||||||
|
+ avg_over_time(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) < 0.5
|
||||||
|
+ )
|
||||||
|
+ /
|
||||||
|
+ count by (namespace,service) (
|
||||||
|
+ up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
+ >= 0.5
|
||||||
|
+ for: 5m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: AlertmanagerClusterCrashlooping
|
||||||
|
annotations:
|
||||||
|
- message: Alertmanager has not found all other members of the cluster.
|
||||||
|
+ description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have restarted at least 5 times in the last 10m.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerclustercrashlooping
|
||||||
|
+ summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
|
||||||
|
expr: |-
|
||||||
|
- alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
|
||||||
|
- != on (service) GROUP_LEFT()
|
||||||
|
- count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
|
||||||
|
+ (
|
||||||
|
+ count by (namespace,service) (
|
||||||
|
+ changes(process_start_time_seconds{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[10m]) > 4
|
||||||
|
+ )
|
||||||
|
+ /
|
||||||
|
+ count by (namespace,service) (
|
||||||
|
+ up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
+ >= 0.5
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
|
@ -0,0 +1,38 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/general.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/general.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -26,7 +26,9 @@
|
||||||
|
rules:
|
||||||
|
- alert: TargetDown
|
||||||
|
annotations:
|
||||||
|
- message: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
|
||||||
|
+ description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-targetdown
|
||||||
|
+ summary: One or more targets are unreachable.
|
||||||
|
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
@@ -36,7 +38,7 @@
|
||||||
|
{{- end }}
|
||||||
|
- alert: Watchdog
|
||||||
|
annotations:
|
||||||
|
- message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||||
|
+ description: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||||
|
|
||||||
|
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||||
|
|
||||||
|
@@ -47,6 +49,8 @@
|
||||||
|
"DeadMansSnitch" integration in PagerDuty.
|
||||||
|
|
||||||
|
'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-watchdog
|
||||||
|
+ summary: An alert that should always be firing to certify that Alertmanager is working properly.
|
||||||
|
expr: vector(1)
|
||||||
|
labels:
|
||||||
|
severity: none
|
|
@ -1,15 +1,23 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/k8s.rules.yaml
|
--- charts-original/templates/prometheus/rules-1.14/k8s.rules.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/k8s.rules.yaml
|
+++ charts/templates/prometheus/rules-1.14/k8s.rules.yaml
|
||||||
@@ -26,31 +26,31 @@
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -26,36 +26,42 @@
|
||||||
rules:
|
rules:
|
||||||
- expr: |-
|
- expr: |-
|
||||||
sum by (cluster, namespace, pod, container) (
|
sum by (cluster, namespace, pod, container) (
|
||||||
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
|
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
|
||||||
+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
|
+ irate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m])
|
||||||
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
||||||
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
|
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
- record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||||
|
+ record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
|
||||||
- expr: |-
|
- expr: |-
|
||||||
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
+ container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}
|
+ container_memory_working_set_bytes{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
|
@ -37,3 +45,68 @@
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
|
record: node_namespace_pod_container:container_memory_swap
|
||||||
|
- expr: |-
|
||||||
|
+ kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
|
||||||
|
+ group_left() max by (namespace, pod) (
|
||||||
|
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
|
||||||
|
+ )
|
||||||
|
+ record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
|
||||||
|
+ - expr: |-
|
||||||
|
sum by (namespace, cluster) (
|
||||||
|
sum by (namespace, pod, cluster) (
|
||||||
|
max by (namespace, pod, container, cluster) (
|
||||||
|
@@ -67,6 +73,12 @@
|
||||||
|
)
|
||||||
|
record: namespace_memory:kube_pod_container_resource_requests:sum
|
||||||
|
- expr: |-
|
||||||
|
+ kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
|
||||||
|
+ group_left() max by (namespace, pod) (
|
||||||
|
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
|
||||||
|
+ )
|
||||||
|
+ record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
|
||||||
|
+ - expr: |-
|
||||||
|
sum by (namespace, cluster) (
|
||||||
|
sum by (namespace, pod, cluster) (
|
||||||
|
max by (namespace, pod, container, cluster) (
|
||||||
|
@@ -78,6 +90,40 @@
|
||||||
|
)
|
||||||
|
record: namespace_cpu:kube_pod_container_resource_requests:sum
|
||||||
|
- expr: |-
|
||||||
|
+ kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
|
||||||
|
+ group_left() max by (namespace, pod) (
|
||||||
|
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
|
||||||
|
+ )
|
||||||
|
+ record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
|
||||||
|
+ - expr: |-
|
||||||
|
+ sum by (namespace, cluster) (
|
||||||
|
+ sum by (namespace, pod, cluster) (
|
||||||
|
+ max by (namespace, pod, container, cluster) (
|
||||||
|
+ kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}
|
||||||
|
+ ) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
|
||||||
|
+ kube_pod_status_phase{phase=~"Pending|Running"} == 1
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
+ record: namespace_memory:kube_pod_container_resource_limits:sum
|
||||||
|
+ - expr: |-
|
||||||
|
+ kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
|
||||||
|
+ group_left() max by (namespace, pod) (
|
||||||
|
+ (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
|
||||||
|
+ )
|
||||||
|
+ record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
|
||||||
|
+ - expr: |-
|
||||||
|
+ sum by (namespace, cluster) (
|
||||||
|
+ sum by (namespace, pod, cluster) (
|
||||||
|
+ max by (namespace, pod, container, cluster) (
|
||||||
|
+ kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}
|
||||||
|
+ ) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
|
||||||
|
+ kube_pod_status_phase{phase=~"Pending|Running"} == 1
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
+ record: namespace_cpu:kube_pod_container_resource_limits:sum
|
||||||
|
+ - expr: |-
|
||||||
|
max by (cluster, namespace, workload, pod) (
|
||||||
|
label_replace(
|
||||||
|
label_replace(
|
||||||
|
|
|
@ -0,0 +1,187 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -25,60 +25,70 @@
|
||||||
|
- interval: 3m
|
||||||
|
name: kube-apiserver-availability.rules
|
||||||
|
rules:
|
||||||
|
+ - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
|
||||||
|
+ record: code_verb:apiserver_request_total:increase30d
|
||||||
|
+ - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||||
|
+ labels:
|
||||||
|
+ verb: read
|
||||||
|
+ record: code:apiserver_request_total:increase30d
|
||||||
|
+ - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
||||||
|
+ labels:
|
||||||
|
+ verb: write
|
||||||
|
+ record: code:apiserver_request_total:increase30d
|
||||||
|
- expr: |-
|
||||||
|
1 - (
|
||||||
|
(
|
||||||
|
# write too slow
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
-
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
) +
|
||||||
|
(
|
||||||
|
# read too slow
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="1"}[30d]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="5"}[30d]))
|
||||||
|
+
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="40"}[30d]))
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
# errors
|
||||||
|
- sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
|
||||||
|
+ sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(code:apiserver_request_total:increase30d)
|
||||||
|
+ sum by (cluster) (code:apiserver_request_total:increase30d)
|
||||||
|
labels:
|
||||||
|
verb: all
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |-
|
||||||
|
1 - (
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
(
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="1"}[30d]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="5"}[30d]))
|
||||||
|
+
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="40"}[30d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
|
||||||
|
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(code:apiserver_request_total:increase30d{verb="read"})
|
||||||
|
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"})
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
@@ -86,75 +96,33 @@
|
||||||
|
1 - (
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
-
|
||||||
|
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
+ sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
|
||||||
|
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(code:apiserver_request_total:increase30d{verb="write"})
|
||||||
|
+ sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"})
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
|
||||||
|
- record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h]))
|
||||||
|
- record: code_verb:apiserver_request_total:increase1h
|
||||||
|
- - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||||
|
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
- record: code:apiserver_request_total:increase30d
|
||||||
|
- - expr: sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
||||||
|
+ record: code_resource:apiserver_request_total:rate5m
|
||||||
|
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
- record: code:apiserver_request_total:increase30d
|
||||||
|
+ record: code_resource:apiserver_request_total:rate5m
|
||||||
|
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h]))
|
||||||
|
+ record: code_verb:apiserver_request_total:increase1h
|
||||||
|
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h]))
|
||||||
|
+ record: code_verb:apiserver_request_total:increase1h
|
||||||
|
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h]))
|
||||||
|
+ record: code_verb:apiserver_request_total:increase1h
|
||||||
|
+ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
|
||||||
|
+ record: code_verb:apiserver_request_total:increase1h
|
||||||
|
{{- end }}
|
||||||
|
\ No newline at end of file
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -0,0 +1,401 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kube-apiserver.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -28,26 +28,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate1d
|
||||||
|
@@ -55,26 +55,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate1h
|
||||||
|
@@ -82,26 +82,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate2h
|
||||||
|
@@ -109,26 +109,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate30m
|
||||||
|
@@ -136,26 +136,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate3d
|
||||||
|
@@ -163,26 +163,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate5m
|
||||||
|
@@ -190,26 +190,26 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
(
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h]))
|
||||||
|
or
|
||||||
|
vector(0)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h]))
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:burnrate6h
|
||||||
|
@@ -217,15 +217,15 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate1d
|
||||||
|
@@ -233,15 +233,15 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate1h
|
||||||
|
@@ -249,15 +249,15 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate2h
|
||||||
|
@@ -265,15 +265,15 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate30m
|
||||||
|
@@ -281,15 +281,15 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate3d
|
||||||
|
@@ -297,15 +297,15 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate5m
|
||||||
|
@@ -313,32 +313,32 @@
|
||||||
|
(
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||||
|
-
|
||||||
|
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||||
|
+ sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:burnrate6h
|
||||||
|
- - expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: code_resource:apiserver_request_total:rate5m
|
||||||
|
- - expr: sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
+ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: code_resource:apiserver_request_total:rate5m
|
||||||
|
- - expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
|
||||||
|
+ - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
|
||||||
|
labels:
|
||||||
|
quantile: '0.99'
|
||||||
|
verb: read
|
||||||
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- - expr: histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
|
||||||
|
+ - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
|
||||||
|
labels:
|
||||||
|
quantile: '0.99'
|
||||||
|
verb: write
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
|
--- charts-original/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
|
+++ charts/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml
|
||||||
@@ -4,7 +4,8 @@
|
@@ -1,10 +1,11 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kube-state-metrics.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kube-state-metrics.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-state-metrics-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -56,4 +56,32 @@
|
||||||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
+ - alert: KubeStateMetricsShardingMismatch
|
||||||
|
+ annotations:
|
||||||
|
+ description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsshardingmismatch
|
||||||
|
+ summary: kube-state-metrics sharding is misconfigured.
|
||||||
|
+ expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: KubeStateMetricsShardsMissing
|
||||||
|
+ annotations:
|
||||||
|
+ description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsshardsmissing
|
||||||
|
+ summary: kube-state-metrics shards are missing.
|
||||||
|
+ expr: |-
|
||||||
|
+ 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
|
||||||
|
+ -
|
||||||
|
+ sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
|
||||||
|
+ != 0
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
\ No newline at end of file
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/kubelet.rules.yaml
|
--- charts-original/templates/prometheus/rules-1.14/kubelet.rules.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/kubelet.rules.yaml
|
+++ charts/templates/prometheus/rules-1.14/kubelet.rules.yaml
|
||||||
@@ -4,7 +4,8 @@
|
@@ -1,10 +1,11 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-apps.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-apps.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -30,7 +30,10 @@
|
||||||
|
description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 10 minutes.
|
||||||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
|
||||||
|
summary: Pod is crash looping.
|
||||||
|
- expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) * 60 * 5 > 0
|
||||||
|
+ expr: |-
|
||||||
|
+ increase(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m]) > 0
|
||||||
|
+ and
|
||||||
|
+ kube_pod_container_status_waiting{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} == 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
@@ -79,7 +82,7 @@
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
- !=
|
||||||
|
+ >
|
||||||
|
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
) and (
|
||||||
|
changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[10m])
|
||||||
|
@@ -257,23 +260,23 @@
|
||||||
|
{{- end }}
|
||||||
|
- alert: KubeHpaReplicasMismatch
|
||||||
|
annotations:
|
||||||
|
- description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.hpa {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
|
||||||
|
+ description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.
|
||||||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpareplicasmismatch
|
||||||
|
summary: HPA has not matched descired number of replicas.
|
||||||
|
expr: |-
|
||||||
|
- (kube_hpa_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
+ (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
!=
|
||||||
|
- kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
|
||||||
|
+ kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
|
||||||
|
and
|
||||||
|
- (kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
+ (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
>
|
||||||
|
- kube_hpa_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
|
||||||
|
+ kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
|
||||||
|
and
|
||||||
|
- (kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
+ (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
<
|
||||||
|
- kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
|
||||||
|
+ kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"})
|
||||||
|
and
|
||||||
|
- changes(kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
|
||||||
|
+ changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) == 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
@@ -282,13 +285,13 @@
|
||||||
|
{{- end }}
|
||||||
|
- alert: KubeHpaMaxedOut
|
||||||
|
annotations:
|
||||||
|
- description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.hpa {{`}}`}} has been running at max replicas for longer than 15 minutes.
|
||||||
|
+ description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.
|
||||||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubehpamaxedout
|
||||||
|
summary: HPA is running at max replicas
|
||||||
|
expr: |-
|
||||||
|
- kube_hpa_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
+ kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
==
|
||||||
|
- kube_hpa_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
+ kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-resources.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-resources.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -1,6 +1,13 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-storage.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-storage.yaml
|
||||||
@@ -31,9 +31,9 @@
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -31,10 +31,12 @@
|
||||||
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefillingup
|
||||||
summary: PersistentVolume is filling up.
|
summary: PersistentVolume is filling up.
|
||||||
expr: |-
|
expr: |-
|
||||||
|
@ -10,9 +17,12 @@
|
||||||
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
|
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
|
||||||
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
|
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}
|
||||||
< 0.03
|
< 0.03
|
||||||
|
+ and
|
||||||
|
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
|
||||||
for: 1m
|
for: 1m
|
||||||
labels:
|
labels:
|
||||||
@@ -48,12 +48,12 @@
|
severity: critical
|
||||||
|
@@ -48,12 +50,14 @@
|
||||||
summary: PersistentVolume is filling up.
|
summary: PersistentVolume is filling up.
|
||||||
expr: |-
|
expr: |-
|
||||||
(
|
(
|
||||||
|
@ -24,6 +34,8 @@
|
||||||
) < 0.15
|
) < 0.15
|
||||||
and
|
and
|
||||||
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||||
|
+ kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0
|
||||||
|
+ and
|
||||||
+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml
|
||||||
@@ -4,7 +4,8 @@
|
@@ -1,10 +1,11 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
@@ -96,7 +96,7 @@
|
@@ -96,7 +96,7 @@
|
||||||
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.
|
||||||
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletpodstartuplatencyhigh
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletpodstartuplatencyhigh
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
|
||||||
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml
|
||||||
@@ -4,7 +4,8 @@
|
@@ -1,10 +1,11 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
*/ -}}
|
*/ -}}
|
||||||
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/kubernetes-system.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/kubernetes-system.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -0,0 +1,67 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/node-exporter.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/node-exporter.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml
|
||||||
|
+Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -33,9 +33,9 @@
|
||||||
|
record: instance:node_num_cpu:sum
|
||||||
|
- expr: |-
|
||||||
|
1 - avg without (cpu, mode) (
|
||||||
|
- rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
|
||||||
|
+ rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[5m])
|
||||||
|
)
|
||||||
|
- record: instance:node_cpu_utilisation:rate1m
|
||||||
|
+ record: instance:node_cpu_utilisation:rate5m
|
||||||
|
- expr: |-
|
||||||
|
(
|
||||||
|
node_load1{job="node-exporter"}
|
||||||
|
@@ -50,30 +50,30 @@
|
||||||
|
node_memory_MemTotal_bytes{job="node-exporter"}
|
||||||
|
)
|
||||||
|
record: instance:node_memory_utilisation:ratio
|
||||||
|
- - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
|
||||||
|
- record: instance:node_vmstat_pgmajfault:rate1m
|
||||||
|
- - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||||
|
- record: instance_device:node_disk_io_time_seconds:rate1m
|
||||||
|
- - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||||
|
- record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||||
|
+ - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
|
||||||
|
+ record: instance:node_vmstat_pgmajfault:rate5m
|
||||||
|
+ - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
|
||||||
|
+ record: instance_device:node_disk_io_time_seconds:rate5m
|
||||||
|
+ - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
|
||||||
|
+ record: instance_device:node_disk_io_time_weighted_seconds:rate5m
|
||||||
|
- expr: |-
|
||||||
|
sum without (device) (
|
||||||
|
- rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
+ rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
|
||||||
|
)
|
||||||
|
- record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||||
|
+ record: instance:node_network_receive_bytes_excluding_lo:rate5m
|
||||||
|
- expr: |-
|
||||||
|
sum without (device) (
|
||||||
|
- rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
+ rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
|
||||||
|
)
|
||||||
|
- record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||||
|
+ record: instance:node_network_transmit_bytes_excluding_lo:rate5m
|
||||||
|
- expr: |-
|
||||||
|
sum without (device) (
|
||||||
|
- rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
+ rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
|
||||||
|
)
|
||||||
|
- record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||||
|
+ record: instance:node_network_receive_drop_excluding_lo:rate5m
|
||||||
|
- expr: |-
|
||||||
|
sum without (device) (
|
||||||
|
- rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||||
|
+ rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
|
||||||
|
)
|
||||||
|
- record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||||
|
+ record: instance:node_network_transmit_drop_excluding_lo:rate5m
|
||||||
|
{{- end }}
|
||||||
|
\ No newline at end of file
|
|
@ -0,0 +1,178 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/node-exporter.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/node-exporter.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/node-exporter-prometheusRule.yaml
|
||||||
|
+Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/node-exporter-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -27,6 +27,7 @@
|
||||||
|
- alert: NodeFilesystemSpaceFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
|
||||||
|
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -45,6 +46,7 @@
|
||||||
|
- alert: NodeFilesystemSpaceFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemspacefillingup
|
||||||
|
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -63,6 +65,7 @@
|
||||||
|
- alert: NodeFilesystemAlmostOutOfSpace
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
|
||||||
|
summary: Filesystem has less than 5% space left.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -79,6 +82,7 @@
|
||||||
|
- alert: NodeFilesystemAlmostOutOfSpace
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutofspace
|
||||||
|
summary: Filesystem has less than 3% space left.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -95,6 +99,7 @@
|
||||||
|
- alert: NodeFilesystemFilesFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
|
||||||
|
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -113,6 +118,7 @@
|
||||||
|
- alert: NodeFilesystemFilesFillingUp
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemfilesfillingup
|
||||||
|
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -131,6 +137,7 @@
|
||||||
|
- alert: NodeFilesystemAlmostOutOfFiles
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
|
||||||
|
summary: Filesystem has less than 5% inodes left.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -147,6 +154,7 @@
|
||||||
|
- alert: NodeFilesystemAlmostOutOfFiles
|
||||||
|
annotations:
|
||||||
|
description: Filesystem on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefilesystemalmostoutoffiles
|
||||||
|
summary: Filesystem has less than 3% inodes left.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -163,6 +171,7 @@
|
||||||
|
- alert: NodeNetworkReceiveErrs
|
||||||
|
annotations:
|
||||||
|
description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkreceiveerrs
|
||||||
|
summary: Network interface is reporting many receive errors.
|
||||||
|
expr: rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
|
||||||
|
for: 1h
|
||||||
|
@@ -174,6 +183,7 @@
|
||||||
|
- alert: NodeNetworkTransmitErrs
|
||||||
|
annotations:
|
||||||
|
description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworktransmiterrs
|
||||||
|
summary: Network interface is reporting many transmit errors.
|
||||||
|
expr: rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
|
||||||
|
for: 1h
|
||||||
|
@@ -185,6 +195,7 @@
|
||||||
|
- alert: NodeHighNumberConntrackEntriesUsed
|
||||||
|
annotations:
|
||||||
|
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodehighnumberconntrackentriesused
|
||||||
|
summary: Number of conntrack are getting close to the limit.
|
||||||
|
expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||||
|
labels:
|
||||||
|
@@ -195,6 +206,7 @@
|
||||||
|
- alert: NodeTextFileCollectorScrapeError
|
||||||
|
annotations:
|
||||||
|
description: Node Exporter text file collector failed to scrape.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodetextfilecollectorscrapeerror
|
||||||
|
summary: Node Exporter text file collector failed to scrape.
|
||||||
|
expr: node_textfile_scrape_error{job="node-exporter"} == 1
|
||||||
|
labels:
|
||||||
|
@@ -204,7 +216,8 @@
|
||||||
|
{{- end }}
|
||||||
|
- alert: NodeClockSkewDetected
|
||||||
|
annotations:
|
||||||
|
- message: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
|
||||||
|
+ description: Clock on {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclockskewdetected
|
||||||
|
summary: Clock skew detected.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -226,7 +239,8 @@
|
||||||
|
{{- end }}
|
||||||
|
- alert: NodeClockNotSynchronising
|
||||||
|
annotations:
|
||||||
|
- message: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.
|
||||||
|
+ description: Clock on {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeclocknotsynchronising
|
||||||
|
summary: Clock not synchronising.
|
||||||
|
expr: |-
|
||||||
|
min_over_time(node_timex_sync_status[5m]) == 0
|
||||||
|
@@ -241,6 +255,7 @@
|
||||||
|
- alert: NodeRAIDDegraded
|
||||||
|
annotations:
|
||||||
|
description: RAID array '{{`{{`}} $labels.device {{`}}`}}' on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddegraded
|
||||||
|
summary: RAID Array is degraded
|
||||||
|
expr: node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
|
||||||
|
for: 15m
|
||||||
|
@@ -252,11 +267,42 @@
|
||||||
|
- alert: NodeRAIDDiskFailure
|
||||||
|
annotations:
|
||||||
|
description: At least one device in RAID array on {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-noderaiddiskfailure
|
||||||
|
summary: Failed device in RAID array
|
||||||
|
- expr: node_md_disks{state="fail"} > 0
|
||||||
|
+ expr: node_md_disks{state="failed"} > 0
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
+ - alert: NodeFileDescriptorLimit
|
||||||
|
+ annotations:
|
||||||
|
+ description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefiledescriptorlimit
|
||||||
|
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||||
|
+ expr: |-
|
||||||
|
+ (
|
||||||
|
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
|
||||||
|
+ )
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: warning
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: NodeFileDescriptorLimit
|
||||||
|
+ annotations:
|
||||||
|
+ description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodefiledescriptorlimit
|
||||||
|
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||||
|
+ expr: |-
|
||||||
|
+ (
|
||||||
|
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
|
||||||
|
+ )
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
\ No newline at end of file
|
|
@ -0,0 +1,20 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/node-network.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/node-network.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kube-prometheus-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -26,7 +26,9 @@
|
||||||
|
rules:
|
||||||
|
- alert: NodeNetworkInterfaceFlapping
|
||||||
|
annotations:
|
||||||
|
- message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
|
||||||
|
+ description: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodenetworkinterfaceflapping
|
||||||
|
+ summary: Network interface is often changin it's status
|
||||||
|
expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
|
||||||
|
for: 2m
|
||||||
|
labels:
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/node.rules.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/node.rules.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
+Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -0,0 +1,9 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/prometheus-operator.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/prometheus-operator.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-operator-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
|
@ -0,0 +1,231 @@
|
||||||
|
--- charts-original/templates/prometheus/rules-1.14/prometheus.yaml
|
||||||
|
+++ charts/templates/prometheus/rules-1.14/prometheus.yaml
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
{{- /*
|
||||||
|
-Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/master/manifests/prometheus-rules.yaml
|
||||||
|
+Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/prometheus-prometheusRule.yaml
|
||||||
|
Do not change in-place! In order to change this file first read following link:
|
||||||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||||||
|
*/ -}}
|
||||||
|
@@ -29,6 +29,7 @@
|
||||||
|
- alert: PrometheusBadConfig
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusbadconfig
|
||||||
|
summary: Failed Prometheus configuration reload.
|
||||||
|
expr: |-
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
@@ -43,6 +44,7 @@
|
||||||
|
- alert: PrometheusNotificationQueueRunningFull
|
||||||
|
annotations:
|
||||||
|
description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotificationqueuerunningfull
|
||||||
|
summary: Prometheus alert notification queue predicted to run full in less than 30m.
|
||||||
|
expr: |-
|
||||||
|
# Without min_over_time, failed scrapes could create false negatives, see
|
||||||
|
@@ -61,6 +63,7 @@
|
||||||
|
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||||
|
annotations:
|
||||||
|
description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuserrorsendingalertstosomealertmanagers
|
||||||
|
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
@@ -76,27 +79,10 @@
|
||||||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
- - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||||
|
- annotations:
|
||||||
|
- description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
|
||||||
|
- summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||||
|
- expr: |-
|
||||||
|
- min without(alertmanager) (
|
||||||
|
- rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
- /
|
||||||
|
- rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
- )
|
||||||
|
- * 100
|
||||||
|
- > 3
|
||||||
|
- for: 15m
|
||||||
|
- labels:
|
||||||
|
- severity: critical
|
||||||
|
-{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
-{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
-{{- end }}
|
||||||
|
- alert: PrometheusNotConnectedToAlertmanagers
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotconnectedtoalertmanagers
|
||||||
|
summary: Prometheus is not connected to any Alertmanagers.
|
||||||
|
expr: |-
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
@@ -111,6 +97,7 @@
|
||||||
|
- alert: PrometheusTSDBReloadsFailing
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustsdbreloadsfailing
|
||||||
|
summary: Prometheus has issues reloading blocks from disk.
|
||||||
|
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
|
||||||
|
for: 4h
|
||||||
|
@@ -122,6 +109,7 @@
|
||||||
|
- alert: PrometheusTSDBCompactionsFailing
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustsdbcompactionsfailing
|
||||||
|
summary: Prometheus has issues compacting blocks.
|
||||||
|
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0
|
||||||
|
for: 4h
|
||||||
|
@@ -133,8 +121,18 @@
|
||||||
|
- alert: PrometheusNotIngestingSamples
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusnotingestingsamples
|
||||||
|
summary: Prometheus is not ingesting samples.
|
||||||
|
- expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
|
||||||
|
+ expr: |-
|
||||||
|
+ (
|
||||||
|
+ rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
|
||||||
|
+ and
|
||||||
|
+ (
|
||||||
|
+ sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0
|
||||||
|
+ or
|
||||||
|
+ sum without(rule_group) (prometheus_rule_group_rules{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0
|
||||||
|
+ )
|
||||||
|
+ )
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
@@ -144,6 +142,7 @@
|
||||||
|
- alert: PrometheusDuplicateTimestamps
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusduplicatetimestamps
|
||||||
|
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||||
|
expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||||||
|
for: 10m
|
||||||
|
@@ -155,6 +154,7 @@
|
||||||
|
- alert: PrometheusOutOfOrderTimestamps
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoutofordertimestamps
|
||||||
|
summary: Prometheus drops samples with out-of-order timestamps.
|
||||||
|
expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||||||
|
for: 10m
|
||||||
|
@@ -166,15 +166,16 @@
|
||||||
|
- alert: PrometheusRemoteStorageFailures
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotestoragefailures
|
||||||
|
summary: Prometheus fails to send samples to remote storage.
|
||||||
|
expr: |-
|
||||||
|
(
|
||||||
|
- rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
+ (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
|
||||||
|
/
|
||||||
|
(
|
||||||
|
- rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
+ (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
|
||||||
|
+
|
||||||
|
- rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
+ (rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
* 100
|
||||||
|
@@ -188,13 +189,14 @@
|
||||||
|
- alert: PrometheusRemoteWriteBehind
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotewritebehind
|
||||||
|
summary: Prometheus remote write is behind.
|
||||||
|
expr: |-
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
(
|
||||||
|
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
- - on(job, instance) group_right
|
||||||
|
+ - ignoring(remote_name, url) group_right
|
||||||
|
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])
|
||||||
|
)
|
||||||
|
> 120
|
||||||
|
@@ -207,6 +209,7 @@
|
||||||
|
- alert: PrometheusRemoteWriteDesiredShards
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusremotewritedesiredshards
|
||||||
|
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
||||||
|
expr: |-
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
@@ -225,6 +228,7 @@
|
||||||
|
- alert: PrometheusRuleFailures
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusrulefailures
|
||||||
|
summary: Prometheus is failing rule evaluations.
|
||||||
|
expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
@@ -236,6 +240,7 @@
|
||||||
|
- alert: PrometheusMissingRuleEvaluations
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusmissingruleevaluations
|
||||||
|
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||||
|
expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
@@ -247,6 +252,7 @@
|
||||||
|
- alert: PrometheusTargetLimitHit
|
||||||
|
annotations:
|
||||||
|
description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustargetlimithit
|
||||||
|
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
|
||||||
|
expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
@@ -255,4 +261,47 @@
|
||||||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
+ - alert: PrometheusLabelLimitHit
|
||||||
|
+ annotations:
|
||||||
|
+ description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuslabellimithit
|
||||||
|
+ summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
|
||||||
|
+ expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: warning
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: PrometheusTargetSyncFailure
|
||||||
|
+ annotations:
|
||||||
|
+ description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheustargetsyncfailure
|
||||||
|
+ summary: Prometheus has failed to sync targets.
|
||||||
|
+ expr: increase(prometheus_target_sync_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[30m]) > 0
|
||||||
|
+ for: 5m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
+ - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||||
|
+ annotations:
|
||||||
|
+ description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.'
|
||||||
|
+ runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheuserrorsendingalertstoanyalertmanager
|
||||||
|
+ summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||||
|
+ expr: |-
|
||||||
|
+ min without (alertmanager) (
|
||||||
|
+ rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m])
|
||||||
|
+ /
|
||||||
|
+ rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m])
|
||||||
|
+ )
|
||||||
|
+ * 100
|
||||||
|
+ > 3
|
||||||
|
+ for: 15m
|
||||||
|
+ labels:
|
||||||
|
+ severity: critical
|
||||||
|
+{{- if .Values.defaultRules.additionalRuleLabels }}
|
||||||
|
+{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||||||
|
+{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
\ No newline at end of file
|
|
@ -756,7 +756,18 @@
|
||||||
|
|
||||||
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
|
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
|
||||||
##
|
##
|
||||||
@@ -1266,6 +1857,13 @@
|
@@ -1258,6 +1849,10 @@
|
||||||
|
# replacement: $1
|
||||||
|
# action: replace
|
||||||
|
|
||||||
|
+ # Enable self metrics configuration for Service Monitor
|
||||||
|
+ selfMonitor:
|
||||||
|
+ enabled: false
|
||||||
|
+
|
||||||
|
## Configuration for kube-state-metrics subchart
|
||||||
|
##
|
||||||
|
kube-state-metrics:
|
||||||
|
@@ -1266,6 +1861,13 @@
|
||||||
create: true
|
create: true
|
||||||
podSecurityPolicy:
|
podSecurityPolicy:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
@ -770,7 +781,7 @@
|
||||||
|
|
||||||
## Deploy node exporter as a daemonset to all nodes
|
## Deploy node exporter as a daemonset to all nodes
|
||||||
##
|
##
|
||||||
@@ -1319,6 +1917,16 @@
|
@@ -1319,6 +1921,16 @@
|
||||||
extraArgs:
|
extraArgs:
|
||||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
|
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
|
||||||
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
|
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
|
||||||
|
@ -787,7 +798,7 @@
|
||||||
|
|
||||||
## Manages Prometheus and Alertmanager components
|
## Manages Prometheus and Alertmanager components
|
||||||
##
|
##
|
||||||
@@ -1331,8 +1939,8 @@
|
@@ -1331,8 +1943,8 @@
|
||||||
enabled: true
|
enabled: true
|
||||||
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
|
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
|
||||||
tlsMinVersion: VersionTLS13
|
tlsMinVersion: VersionTLS13
|
||||||
|
@ -798,7 +809,7 @@
|
||||||
|
|
||||||
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
|
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
|
||||||
## rules from making their way into prometheus and potentially preventing the container from starting
|
## rules from making their way into prometheus and potentially preventing the container from starting
|
||||||
@@ -1349,7 +1957,7 @@
|
@@ -1349,7 +1961,7 @@
|
||||||
patch:
|
patch:
|
||||||
enabled: true
|
enabled: true
|
||||||
image:
|
image:
|
||||||
|
@ -807,7 +818,7 @@
|
||||||
tag: v1.5.2
|
tag: v1.5.2
|
||||||
sha: ""
|
sha: ""
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
@@ -1498,13 +2106,13 @@
|
@@ -1498,13 +2110,13 @@
|
||||||
|
|
||||||
## Resource limits & requests
|
## Resource limits & requests
|
||||||
##
|
##
|
||||||
|
@ -828,25 +839,43 @@
|
||||||
|
|
||||||
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
|
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
|
||||||
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
|
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
|
||||||
@@ -1557,7 +2165,7 @@
|
@@ -1557,8 +2169,8 @@
|
||||||
## Prometheus-operator image
|
## Prometheus-operator image
|
||||||
##
|
##
|
||||||
image:
|
image:
|
||||||
- repository: quay.io/prometheus-operator/prometheus-operator
|
- repository: quay.io/prometheus-operator/prometheus-operator
|
||||||
|
- tag: v0.48.0
|
||||||
+ repository: rancher/mirrored-prometheus-operator-prometheus-operator
|
+ repository: rancher/mirrored-prometheus-operator-prometheus-operator
|
||||||
tag: v0.48.0
|
+ tag: v0.49.0
|
||||||
sha: ""
|
sha: ""
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
@@ -1573,7 +2181,7 @@
|
|
||||||
|
@@ -1573,8 +2185,8 @@
|
||||||
## Prometheus-config-reloader image to use for config and rule reloading
|
## Prometheus-config-reloader image to use for config and rule reloading
|
||||||
##
|
##
|
||||||
prometheusConfigReloaderImage:
|
prometheusConfigReloaderImage:
|
||||||
- repository: quay.io/prometheus-operator/prometheus-config-reloader
|
- repository: quay.io/prometheus-operator/prometheus-config-reloader
|
||||||
|
- tag: v0.48.0
|
||||||
+ repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader
|
+ repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader
|
||||||
tag: v0.48.0
|
+ tag: v0.49.0
|
||||||
sha: ""
|
sha: ""
|
||||||
|
|
||||||
@@ -1659,7 +2267,7 @@
|
## Set the prometheus config reloader side-car CPU limit
|
||||||
|
@@ -1585,6 +2197,13 @@
|
||||||
|
##
|
||||||
|
configReloaderMemory: 50Mi
|
||||||
|
|
||||||
|
+ ## Thanos side-car image when configured
|
||||||
|
+ ##
|
||||||
|
+ thanosImage:
|
||||||
|
+ repository: quay.io/thanos/thanos
|
||||||
|
+ tag: v0.17.2
|
||||||
|
+ sha: ""
|
||||||
|
+
|
||||||
|
## Set a Field Selector to filter watched secrets
|
||||||
|
##
|
||||||
|
secretFieldSelector: ""
|
||||||
|
@@ -1659,7 +2278,7 @@
|
||||||
port: 9090
|
port: 9090
|
||||||
|
|
||||||
## To be used with a proxy extraContainer port
|
## To be used with a proxy extraContainer port
|
||||||
|
@ -855,16 +884,18 @@
|
||||||
|
|
||||||
## List of IP addresses at which the Prometheus server service is available
|
## List of IP addresses at which the Prometheus server service is available
|
||||||
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
|
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
|
||||||
@@ -1916,7 +2524,7 @@
|
@@ -1916,8 +2535,8 @@
|
||||||
## Image of Prometheus.
|
## Image of Prometheus.
|
||||||
##
|
##
|
||||||
image:
|
image:
|
||||||
- repository: quay.io/prometheus/prometheus
|
- repository: quay.io/prometheus/prometheus
|
||||||
|
- tag: v2.27.1
|
||||||
+ repository: rancher/mirrored-prometheus-prometheus
|
+ repository: rancher/mirrored-prometheus-prometheus
|
||||||
tag: v2.27.1
|
+ tag: v2.28.1
|
||||||
sha: ""
|
sha: ""
|
||||||
|
|
||||||
@@ -1979,6 +2587,11 @@
|
## Tolerations for use with node taints
|
||||||
|
@@ -1979,6 +2598,11 @@
|
||||||
##
|
##
|
||||||
externalUrl: ""
|
externalUrl: ""
|
||||||
|
|
||||||
|
@ -876,7 +907,7 @@
|
||||||
## Define which Nodes the Pods are scheduled on.
|
## Define which Nodes the Pods are scheduled on.
|
||||||
## ref: https://kubernetes.io/docs/user-guide/node-selection/
|
## ref: https://kubernetes.io/docs/user-guide/node-selection/
|
||||||
##
|
##
|
||||||
@@ -2011,7 +2624,7 @@
|
@@ -2011,7 +2635,7 @@
|
||||||
## prometheus resource to be created with selectors based on values in the helm deployment,
|
## prometheus resource to be created with selectors based on values in the helm deployment,
|
||||||
## which will also match the PrometheusRule resources created
|
## which will also match the PrometheusRule resources created
|
||||||
##
|
##
|
||||||
|
@ -885,7 +916,7 @@
|
||||||
|
|
||||||
## PrometheusRules to be selected for target discovery.
|
## PrometheusRules to be selected for target discovery.
|
||||||
## If {}, select all PrometheusRules
|
## If {}, select all PrometheusRules
|
||||||
@@ -2036,7 +2649,7 @@
|
@@ -2036,7 +2660,7 @@
|
||||||
## prometheus resource to be created with selectors based on values in the helm deployment,
|
## prometheus resource to be created with selectors based on values in the helm deployment,
|
||||||
## which will also match the servicemonitors created
|
## which will also match the servicemonitors created
|
||||||
##
|
##
|
||||||
|
@ -894,7 +925,7 @@
|
||||||
|
|
||||||
## ServiceMonitors to be selected for target discovery.
|
## ServiceMonitors to be selected for target discovery.
|
||||||
## If {}, select all ServiceMonitors
|
## If {}, select all ServiceMonitors
|
||||||
@@ -2059,7 +2672,7 @@
|
@@ -2059,7 +2683,7 @@
|
||||||
## prometheus resource to be created with selectors based on values in the helm deployment,
|
## prometheus resource to be created with selectors based on values in the helm deployment,
|
||||||
## which will also match the podmonitors created
|
## which will also match the podmonitors created
|
||||||
##
|
##
|
||||||
|
@ -903,7 +934,7 @@
|
||||||
|
|
||||||
## PodMonitors to be selected for target discovery.
|
## PodMonitors to be selected for target discovery.
|
||||||
## If {}, select all PodMonitors
|
## If {}, select all PodMonitors
|
||||||
@@ -2190,9 +2803,13 @@
|
@@ -2190,9 +2814,13 @@
|
||||||
|
|
||||||
## Resource limits & requests
|
## Resource limits & requests
|
||||||
##
|
##
|
||||||
|
@ -920,7 +951,7 @@
|
||||||
|
|
||||||
## Prometheus StorageSpec for persistent data
|
## Prometheus StorageSpec for persistent data
|
||||||
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
|
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
|
||||||
@@ -2215,7 +2832,13 @@
|
@@ -2215,7 +2843,13 @@
|
||||||
# medium: Memory
|
# medium: Memory
|
||||||
|
|
||||||
# Additional volumes on the output StatefulSet definition.
|
# Additional volumes on the output StatefulSet definition.
|
||||||
|
@ -935,7 +966,22 @@
|
||||||
|
|
||||||
# Additional VolumeMounts on the output StatefulSet definition.
|
# Additional VolumeMounts on the output StatefulSet definition.
|
||||||
volumeMounts: []
|
volumeMounts: []
|
||||||
@@ -2322,9 +2945,34 @@
|
@@ -2288,6 +2922,14 @@
|
||||||
|
# services:
|
||||||
|
# - metrics-prometheus-alertmanager
|
||||||
|
|
||||||
|
+ ## If additional alertmanager configurations are already deployed in a single secret, or you want to manage
|
||||||
|
+ ## them separately from the helm deployment, you can use this section.
|
||||||
|
+ ## Expected values are the secret name and key
|
||||||
|
+ ## Cannot be used with additionalAlertManagerConfigs
|
||||||
|
+ additionalAlertManagerConfigsSecret: {}
|
||||||
|
+ # name:
|
||||||
|
+ # key:
|
||||||
|
+
|
||||||
|
## AdditionalAlertRelabelConfigs allows specifying Prometheus alert relabel configurations. Alert relabel configurations specified are appended
|
||||||
|
## to the configurations generated by the Prometheus Operator. Alert relabel configurations specified must have the form as specified in the
|
||||||
|
## official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alert_relabel_configs.
|
||||||
|
@@ -2322,9 +2964,34 @@
|
||||||
##
|
##
|
||||||
thanos: {}
|
thanos: {}
|
||||||
|
|
||||||
|
@ -971,7 +1017,7 @@
|
||||||
|
|
||||||
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
|
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
|
||||||
## (permissions, dir tree) on mounted volumes before starting prometheus
|
## (permissions, dir tree) on mounted volumes before starting prometheus
|
||||||
@@ -2332,7 +2980,7 @@
|
@@ -2332,7 +2999,7 @@
|
||||||
|
|
||||||
## PortName to use for Prometheus.
|
## PortName to use for Prometheus.
|
||||||
##
|
##
|
||||||
|
|
Loading…
Reference in New Issue