{{- /* Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }} {{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} {{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }} namespace: {{ template "kube-prometheus-stack.namespace" . }} labels: app: {{ template "kube-prometheus-stack.name" . }} {{ include "kube-prometheus-stack.labels" . | indent 4 }} {{- if .Values.defaultRules.labels }} {{ toYaml .Values.defaultRules.labels | indent 4 }} {{- end }} {{- if .Values.defaultRules.annotations }} annotations: {{ toYaml .Values.defaultRules.annotations | indent 4 }} {{- end }} spec: groups: - name: kubernetes-apps rules: {{- if not (.Values.defaultRules.disabled.KubePodCrashLooping | default false) }} - alert: KubePodCrashLooping annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").' runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping summary: Pod is crash looping. expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[5m]) >= 1 for: {{ dig "KubePodCrashLooping" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubePodCrashLooping" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubePodNotReady | default false) }} - alert: KubePodNotReady annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready summary: Pod has been in a non-ready state for more than 15 minutes. expr: |- sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( kube_pod_status_phase{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown|Failed"} ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left(owner_kind) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}) ) ) > 0 for: {{ dig "KubePodNotReady" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubePodNotReady" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeDeploymentGenerationMismatch | default false) }} - alert: KubeDeploymentGenerationMismatch annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch summary: Deployment generation mismatch due to possible roll-back expr: |- kube_deployment_status_observed_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_deployment_metadata_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} for: {{ dig "KubeDeploymentGenerationMismatch" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeDeploymentGenerationMismatch" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeDeploymentReplicasMismatch | default false) }} - alert: KubeDeploymentReplicasMismatch annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch summary: Deployment has not matched the expected number of replicas. expr: |- ( kube_deployment_spec_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > kube_deployment_status_replicas_available{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) and ( changes(kube_deployment_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[10m]) == 0 ) for: {{ dig "KubeDeploymentReplicasMismatch" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeDeploymentReplicasMismatch" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeDeploymentRolloutStuck | default false) }} - alert: KubeDeploymentRolloutStuck annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentrolloutstuck summary: Deployment rollout is not progressing. expr: |- kube_deployment_status_condition{condition="Progressing", status="false",job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != 0 for: {{ dig "KubeDeploymentRolloutStuck" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeDeploymentRolloutStuck" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeStatefulSetReplicasMismatch | default false) }} - alert: KubeStatefulSetReplicasMismatch annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch summary: StatefulSet has not matched the expected number of replicas. expr: |- ( kube_statefulset_status_replicas_ready{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_statefulset_status_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) and ( changes(kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[10m]) == 0 ) for: {{ dig "KubeStatefulSetReplicasMismatch" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeStatefulSetReplicasMismatch" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeStatefulSetGenerationMismatch | default false) }} - alert: KubeStatefulSetGenerationMismatch annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch summary: StatefulSet generation mismatch due to possible roll-back expr: |- kube_statefulset_status_observed_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_statefulset_metadata_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} for: {{ dig "KubeStatefulSetGenerationMismatch" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeStatefulSetGenerationMismatch" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeStatefulSetUpdateNotRolledOut | default false) }} - alert: KubeStatefulSetUpdateNotRolledOut annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout summary: StatefulSet update has not been rolled out. expr: |- ( max without (revision) ( kube_statefulset_status_current_revision{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} unless kube_statefulset_status_update_revision{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) * ( kube_statefulset_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) ) and ( changes(kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[5m]) == 0 ) for: {{ dig "KubeStatefulSetUpdateNotRolledOut" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeStatefulSetUpdateNotRolledOut" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeDaemonSetRolloutStuck | default false) }} - alert: KubeDaemonSetRolloutStuck annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck summary: DaemonSet rollout is stuck. expr: |- ( ( kube_daemonset_status_current_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) or ( kube_daemonset_status_number_misscheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != 0 ) or ( kube_daemonset_status_updated_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) or ( kube_daemonset_status_number_available{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} ) ) and ( changes(kube_daemonset_status_updated_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[5m]) == 0 ) for: {{ dig "KubeDaemonSetRolloutStuck" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeDaemonSetRolloutStuck" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeContainerWaiting | default false) }} - alert: KubeContainerWaiting annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting summary: Pod container waiting longer than 1 hour expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) > 0 for: {{ dig "KubeContainerWaiting" "for" "1h" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeContainerWaiting" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeDaemonSetNotScheduled | default false) }} - alert: KubeDaemonSetNotScheduled annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled summary: DaemonSet pods are not scheduled. expr: |- kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} - kube_daemonset_status_current_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0 for: {{ dig "KubeDaemonSetNotScheduled" "for" "10m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeDaemonSetNotScheduled" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeDaemonSetMisScheduled | default false) }} - alert: KubeDaemonSetMisScheduled annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled summary: DaemonSet pods are misscheduled. expr: kube_daemonset_status_number_misscheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0 for: {{ dig "KubeDaemonSetMisScheduled" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeDaemonSetMisScheduled" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeJobNotCompleted | default false) }} - alert: KubeJobNotCompleted annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted summary: Job did not complete in time expr: |- time() - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, job_name, cluster) (kube_job_status_start_time{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} and kube_job_status_active{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0) > 43200 labels: severity: {{ dig "KubeJobNotCompleted" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeJobFailed | default false) }} - alert: KubeJobFailed annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed summary: Job failed to complete. expr: kube_job_failed{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0 for: {{ dig "KubeJobFailed" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeJobFailed" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeHpaReplicasMismatch | default false) }} - alert: KubeHpaReplicasMismatch annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch summary: HPA has not matched desired number of replicas. expr: |- (kube_horizontalpodautoscaler_status_desired_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} != kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) and (kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > kube_horizontalpodautoscaler_spec_min_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) and (kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} < kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) and changes(kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[15m]) == 0 for: {{ dig "KubeHpaReplicasMismatch" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeHpaReplicasMismatch" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- if not (.Values.defaultRules.disabled.KubeHpaMaxedOut | default false) }} - alert: KubeHpaMaxedOut annotations: {{- if .Values.defaultRules.additionalRuleAnnotations }} {{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} {{- end }} {{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} {{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} {{- end }} description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout summary: HPA is running at max replicas expr: |- kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} == kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} for: {{ dig "KubeHpaMaxedOut" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" {{- end }} labels: severity: {{ dig "KubeHpaMaxedOut" "severity" "warning" .Values.customRules }} {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- with .Values.defaultRules.additionalRuleLabels }} {{- toYaml . | nindent 8 }} {{- end }} {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} {{- end }} {{- end }}