mirror of https://git.rancher.io/charts
109 lines
6.2 KiB
YAML
109 lines
6.2 KiB
YAML
|
{{- /*
|
||
|
Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
|
||
|
Do not change in-place! In order to change this file first read following link:
|
||
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
||
|
*/ -}}
|
||
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
||
|
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
|
||
|
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
|
||
|
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
|
||
|
apiVersion: monitoring.coreos.com/v1
|
||
|
kind: PrometheusRule
|
||
|
metadata:
|
||
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
|
||
|
namespace: {{ template "kube-prometheus-stack.namespace" . }}
|
||
|
labels:
|
||
|
app: {{ template "kube-prometheus-stack.name" . }}
|
||
|
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
|
||
|
{{- if .Values.defaultRules.labels }}
|
||
|
{{ toYaml .Values.defaultRules.labels | indent 4 }}
|
||
|
{{- end }}
|
||
|
{{- if .Values.defaultRules.annotations }}
|
||
|
annotations:
|
||
|
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
|
||
|
{{- end }}
|
||
|
spec:
|
||
|
groups:
|
||
|
- name: prometheus.rules
|
||
|
rules:
|
||
|
- alert: PrometheusConfigReloadFailed
|
||
|
annotations:
|
||
|
description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
|
||
|
summary: Reloading Prometheus' configuration failed
|
||
|
expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusNotificationQueueRunningFull
|
||
|
annotations:
|
||
|
description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
|
||
|
summary: Prometheus' alert notification queue is running full
|
||
|
expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusErrorSendingAlerts
|
||
|
annotations:
|
||
|
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
|
||
|
summary: Errors while sending alert from Prometheus
|
||
|
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusErrorSendingAlerts
|
||
|
annotations:
|
||
|
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
|
||
|
summary: Errors while sending alerts from Prometheus
|
||
|
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: PrometheusNotConnectedToAlertmanagers
|
||
|
annotations:
|
||
|
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
|
||
|
summary: Prometheus is not connected to any Alertmanagers
|
||
|
expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusTSDBReloadsFailing
|
||
|
annotations:
|
||
|
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.'
|
||
|
summary: Prometheus has issues reloading data blocks from disk
|
||
|
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
|
||
|
for: 12h
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusTSDBCompactionsFailing
|
||
|
annotations:
|
||
|
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.'
|
||
|
summary: Prometheus has issues compacting sample blocks
|
||
|
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
|
||
|
for: 12h
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusTSDBWALCorruptions
|
||
|
annotations:
|
||
|
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
|
||
|
summary: Prometheus write-ahead log is corrupted
|
||
|
expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
|
||
|
for: 4h
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusNotIngestingSamples
|
||
|
annotations:
|
||
|
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
|
||
|
summary: Prometheus isn't ingesting samples
|
||
|
expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: PrometheusTargetScrapesDuplicate
|
||
|
annotations:
|
||
|
description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
|
||
|
summary: Prometheus has many samples rejected
|
||
|
expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
{{- end }}
|