mirror of https://git.rancher.io/charts
100 lines
5.5 KiB
YAML
100 lines
5.5 KiB
YAML
|
{{- /*
|
||
|
Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetes-prometheusRule.yaml
|
||
|
Do not change in-place! In order to change this file first read following link:
|
||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||
|
*/ -}}
|
||
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||
|
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: PrometheusRule
|
||
|
metadata:
|
||
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-apiserver" | trunc 63 | trimSuffix "-" }}
|
||
|
namespace: {{ template "kube-prometheus-stack.namespace" . }}
|
||
|
labels:
|
||
|
app: {{ template "kube-prometheus-stack.name" . }}
|
||
|
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
|
||
|
{{- if .Values.defaultRules.labels }}
|
||
|
{{ toYaml .Values.defaultRules.labels | indent 4 }}
|
||
|
{{- end }}
|
||
|
{{- if .Values.defaultRules.annotations }}
|
||
|
annotations:
|
||
|
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
|
||
|
{{- end }}
|
||
|
spec:
|
||
|
groups:
|
||
|
- name: kubernetes-system-apiserver
|
||
|
rules:
|
||
|
- alert: KubeClientCertificateExpiration
|
||
|
annotations:
|
||
|
description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
|
||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
|
||
|
summary: Client certificate is about to expire.
|
||
|
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||
|
labels:
|
||
|
severity: warning
|
||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||
|
{{- end }}
|
||
|
- alert: KubeClientCertificateExpiration
|
||
|
annotations:
|
||
|
description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
|
||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
|
||
|
summary: Client certificate is about to expire.
|
||
|
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||
|
labels:
|
||
|
severity: critical
|
||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||
|
{{- end }}
|
||
|
- alert: AggregatedAPIErrors
|
||
|
annotations:
|
||
|
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.
|
||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-aggregatedapierrors
|
||
|
summary: An aggregated API has reported errors.
|
||
|
expr: sum by(name, namespace)(increase(aggregator_unavailable_apiservice_total[10m])) > 4
|
||
|
labels:
|
||
|
severity: warning
|
||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||
|
{{- end }}
|
||
|
{{- if semverCompare ">=1.18.0-0" $kubeTargetVersion }}
|
||
|
- alert: AggregatedAPIDown
|
||
|
annotations:
|
||
|
description: An aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.
|
||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-aggregatedapidown
|
||
|
summary: An aggregated API is down.
|
||
|
expr: (1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||
|
{{- end }}
|
||
|
{{- end }}
|
||
|
{{- if .Values.kubeApiServer.enabled }}
|
||
|
- alert: KubeAPIDown
|
||
|
annotations:
|
||
|
description: KubeAPI has disappeared from Prometheus target discovery.
|
||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown
|
||
|
summary: Target disappeared from Prometheus target discovery.
|
||
|
expr: absent(up{job="apiserver"} == 1)
|
||
|
for: 15m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||
|
{{- end }}
|
||
|
{{- end }}
|
||
|
- alert: KubeAPITerminatedRequests
|
||
|
annotations:
|
||
|
description: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||
|
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapiterminatedrequests
|
||
|
summary: The apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.
|
||
|
expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
{{- if .Values.defaultRules.additionalRuleLabels }}
|
||
|
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
|
||
|
{{- end }}
|
||
|
{{- end }}
|