rancher-charts/charts/rancher-monitoring/104.1.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-resources.yaml

282 lines
17 KiB
YAML
Raw Normal View History

{{- /*
Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }}
{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: kubernetes-resources
rules:
{{- if not (.Values.defaultRules.disabled.KubeCPUOvercommit | default false) }}
- alert: KubeCPUOvercommit
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Pods by {{`{{`}} $value {{`}}`}} CPU shares and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: |-
sum(namespace_cpu:kube_pod_container_resource_requests:sum{job="{{ $kubeStateMetricsJob }}",}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - (sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0
and
(sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0
for: {{ dig "KubeCPUOvercommit" "for" "10m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeCPUOvercommit" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.KubeMemoryOvercommit | default false) }}
- alert: KubeMemoryOvercommit
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests.
expr: |-
sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - (sum(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0
and
(sum(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0
for: {{ dig "KubeMemoryOvercommit" "for" "10m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeMemoryOvercommit" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.KubeCPUQuotaOvercommit | default false) }}
- alert: KubeCPUQuotaOvercommit
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: |-
sum(min without(resource) (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard", resource=~"(cpu|requests.cpu)"})) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)
/
sum(kube_node_status_allocatable{resource="cpu", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)
> 1.5
for: {{ dig "KubeCPUQuotaOvercommit" "for" "5m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeCPUQuotaOvercommit" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.KubeMemoryQuotaOvercommit | default false) }}
- alert: KubeMemoryQuotaOvercommit
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests.
expr: |-
sum(min without(resource) (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard", resource=~"(memory|requests.memory)"})) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)
/
sum(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)
> 1.5
for: {{ dig "KubeMemoryQuotaOvercommit" "for" "5m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeMemoryQuotaOvercommit" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.KubeQuotaAlmostFull | default false) }}
- alert: KubeQuotaAlmostFull
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaalmostfull
summary: Namespace quota is going to be full.
expr: |-
kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} > 0)
> 0.9 < 1
for: {{ dig "KubeQuotaAlmostFull" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeQuotaAlmostFull" "severity" "info" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.KubeQuotaFullyUsed | default false) }}
- alert: KubeQuotaFullyUsed
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotafullyused
summary: Namespace quota is fully used.
expr: |-
kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} > 0)
== 1
for: {{ dig "KubeQuotaFullyUsed" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeQuotaFullyUsed" "severity" "info" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.KubeQuotaExceeded | default false) }}
- alert: KubeQuotaExceeded
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaexceeded
summary: Namespace quota has exceeded the limits.
expr: |-
kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} > 0)
> 1
for: {{ dig "KubeQuotaExceeded" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "KubeQuotaExceeded" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.CPUThrottlingHigh | default false) }}
- alert: CPUThrottlingHigh
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }}
{{- end }}
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: |-
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, container, pod, namespace)
/
sum(increase(container_cpu_cfs_periods_total{}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, container, pod, namespace)
> ( 25 / 100 )
for: {{ dig "CPUThrottlingHigh" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "CPUThrottlingHigh" "severity" "info" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}