rancher-charts/charts/rancher-monitoring/templates/prometheus/rules/prometheus.rules.yaml

{{- /*
Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
  namespace: {{ template "kube-prometheus-stack.namespace" . }}
  labels:
    app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
  annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
  groups:
  - name: prometheus.rules
    rules:
    - alert: PrometheusConfigReloadFailed
      annotations:
        description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
        summary: Reloading Prometheus' configuration failed
      expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
      for: 10m
      labels:
        severity: warning
    - alert: PrometheusNotificationQueueRunningFull
      annotations:
        description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
        summary: Prometheus' alert notification queue is running full
      expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
      for: 10m
      labels:
        severity: warning
    - alert: PrometheusErrorSendingAlerts
      annotations:
        description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
        summary: Errors while sending alert from Prometheus
      expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
      for: 10m
      labels:
        severity: warning
    - alert: PrometheusErrorSendingAlerts
      annotations:
        description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
        summary: Errors while sending alerts from Prometheus
      expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
      for: 10m
      labels:
        severity: critical
    - alert: PrometheusNotConnectedToAlertmanagers
      annotations:
        description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
        summary: Prometheus is not connected to any Alertmanagers
      expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
      for: 10m
      labels:
        severity: warning
    - alert: PrometheusTSDBReloadsFailing
      annotations:
        description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.'
        summary: Prometheus has issues reloading data blocks from disk
      expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
      for: 12h
      labels:
        severity: warning
    - alert: PrometheusTSDBCompactionsFailing
      annotations:
        description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.'
        summary: Prometheus has issues compacting sample blocks
      expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
      for: 12h
      labels:
        severity: warning
    - alert: PrometheusTSDBWALCorruptions
      annotations:
        description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
        summary: Prometheus write-ahead log is corrupted
      expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
      for: 4h
      labels:
        severity: warning
    - alert: PrometheusNotIngestingSamples
      annotations:
        description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
        summary: Prometheus isn't ingesting samples
      expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
      for: 10m
      labels:
        severity: warning
    - alert: PrometheusTargetScrapesDuplicate
      annotations:
        description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
        summary: Prometheus has many samples rejected
      expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
      for: 10m
      labels:
        severity: warning
{{- end }}
Generated changes 2020-09-22 23:01:37 +00:00			`{{- /*`
			`Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml`
			`Do not change in-place! In order to change this file first read following link:`
			`https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack`
			`*/ -}}`
			`{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}`
			`{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}`
			`{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}`
			`{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}`
			`apiVersion: monitoring.coreos.com/v1`
			`kind: PrometheusRule`
			`metadata:`
			`name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" \| trunc 63 \| trimSuffix "-" }}`
			`namespace: {{ template "kube-prometheus-stack.namespace" . }}`
			`labels:`
			`app: {{ template "kube-prometheus-stack.name" . }}`
			`{{ include "kube-prometheus-stack.labels" . \| indent 4 }}`
			`{{- if .Values.defaultRules.labels }}`
			`{{ toYaml .Values.defaultRules.labels \| indent 4 }}`
			`{{- end }}`
			`{{- if .Values.defaultRules.annotations }}`
			`annotations:`
			`{{ toYaml .Values.defaultRules.annotations \| indent 4 }}`
			`{{- end }}`
			`spec:`
			`groups:`
			`- name: prometheus.rules`
			`rules:`
			`- alert: PrometheusConfigReloadFailed`
			`annotations:`
			description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
			`summary: Reloading Prometheus' configuration failed`
			`expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0`
			`for: 10m`
			`labels:`
			`severity: warning`
			`- alert: PrometheusNotificationQueueRunningFull`
			`annotations:`
			description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
			`summary: Prometheus' alert notification queue is running full`
			`expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}`
			`for: 10m`
			`labels:`
			`severity: warning`
			`- alert: PrometheusErrorSendingAlerts`
			`annotations:`
			description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
			`summary: Errors while sending alert from Prometheus`
			`expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01`
			`for: 10m`
			`labels:`
			`severity: warning`
			`- alert: PrometheusErrorSendingAlerts`
			`annotations:`
			description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
			`summary: Errors while sending alerts from Prometheus`
			`expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03`
			`for: 10m`
			`labels:`
			`severity: critical`
			`- alert: PrometheusNotConnectedToAlertmanagers`
			`annotations:`
			description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
			`summary: Prometheus is not connected to any Alertmanagers`
			`expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1`
			`for: 10m`
			`labels:`
			`severity: warning`
			`- alert: PrometheusTSDBReloadsFailing`
			`annotations:`
			description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value \| humanize{{`}}`}} reload failures over the last four hours.'
			`summary: Prometheus has issues reloading data blocks from disk`
			`expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0`
			`for: 12h`
			`labels:`
			`severity: warning`
			`- alert: PrometheusTSDBCompactionsFailing`
			`annotations:`
			description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value \| humanize{{`}}`}} compaction failures over the last four hours.'
			`summary: Prometheus has issues compacting sample blocks`
			`expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0`
			`for: 12h`
			`labels:`
			`severity: warning`
			`- alert: PrometheusTSDBWALCorruptions`
			`annotations:`
			description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
			`summary: Prometheus write-ahead log is corrupted`
			`expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0`
			`for: 4h`
			`labels:`
			`severity: warning`
			`- alert: PrometheusNotIngestingSamples`
			`annotations:`
			description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
			`summary: Prometheus isn't ingesting samples`
			`expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0`
			`for: 10m`
			`labels:`
			`severity: warning`
			`- alert: PrometheusTargetScrapesDuplicate`
			`annotations:`
			description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
			`summary: Prometheus has many samples rejected`
			`expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0`
			`for: 10m`
			`labels:`
			`severity: warning`
			`{{- end }}`