mirror of https://git.rancher.io/charts
(dev-v2.6-archive) Manual changes to Monitoring for rebase
(partially cherry picked from commit a93a040b70
@ -0,0 +1,63 @@
{{- /*
Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: alertmanager.rules
- alert: AlertmanagerConfigInconsistent
message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync.
expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1
for: 5m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerFailedReload
message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}.
expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: AlertmanagerMembersInconsistent
message: Alertmanager has not found all other members of the cluster.
expr: |-
alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"})
for: 5m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,179 @@
{{- /*
Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: etcd
- alert: etcdInsufficientMembers
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).'
expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2)
for: 3m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdNoLeader
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.'
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
for: 1m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfLeaderChanges
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.'
expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 1
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedGRPCRequests
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method)
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method)
> 5
for: 5m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdGRPCRequestsSlow
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le))
> 0.15
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdMemberCommunicationSlow
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.15
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedProposals
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighFsyncDurations
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.5
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighCommitDurations
message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
> 0.25
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.01
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHighNumberOfFailedHTTPRequests
message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.'
expr: |-
sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
BY (method) > 0.05
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: etcdHTTPRequestsSlow
message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow.
expr: |-
histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
> 0.15
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,56 @@
{{- /*
Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: general.rules
- alert: TargetDown
message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.'
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: Watchdog
message: 'This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
expr: vector(1)
severity: none
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,83 @@
{{- /*
Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: k8s.rules
- expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |-
sum by (namespace, pod_name, container_name) (
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
- expr: |-
sum by(namespace) (
* on (endpoint, instance, job, namespace, pod, service)
group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
- expr: |-
sum by (namespace) (
* on (endpoint, instance, job, namespace, pod, service)
group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)
record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
- expr: |-
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
"replicaset", "$1", "owner_name", "(.*)"
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
"workload", "$1", "owner_name", "(.*)"
) by (namespace, workload, pod)
workload_type: deployment
record: mixin_pod_workload
- expr: |-
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
) by (namespace, workload, pod)
workload_type: daemonset
record: mixin_pod_workload
- expr: |-
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
) by (namespace, workload, pod)
workload_type: statefulset
record: mixin_pod_workload
{{- end }}
@ -0,0 +1,39 @@
{{- /*
Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kube-apiserver.rules
- expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
{{- end }}
@ -0,0 +1,47 @@
{{- /*
Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeAlerting }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kube-prometheus-node-alerting.rules
- alert: NodeDiskRunningFull
message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 24 hours.
expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)'
for: 30m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NodeDiskRunningFull
message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 2 hours.
expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)'
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,41 @@
{{- /*
Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kube-prometheus-node-recording.rules
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
record: instance:node_cpu:rate:sum
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
record: instance:node_filesystem_usage:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
record: cluster:node_cpu:ratio
{{- end }}
@ -0,0 +1,63 @@
{{- /*
Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kube-scheduler.rules
- expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
{{- end }}
@ -0,0 +1,159 @@
{{- /*
Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesAbsent }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kubernetes-absent
{{- if .Values.alertmanager.enabled }}
- alert: AlertmanagerDown
message: Alertmanager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerdown
expr: absent(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeDns.enabled }}
- alert: CoreDNSDown
message: CoreDNS has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-corednsdown
expr: absent(up{job="kube-dns"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeApiServer.enabled }}
- alert: KubeAPIDown
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown
expr: absent(up{job="apiserver"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeControllerManager.enabled }}
- alert: KubeControllerManagerDown
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown
expr: absent(up{job="kube-controller-manager"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeScheduler.enabled }}
- alert: KubeSchedulerDown
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown
expr: absent(up{job="kube-scheduler"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.kubeStateMetrics.enabled }}
- alert: KubeStateMetricsDown
message: KubeStateMetrics has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsdown
expr: absent(up{job="kube-state-metrics"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.prometheusOperator.kubeletService.enabled }}
- alert: KubeletDown
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown
expr: absent(up{job="kubelet"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- if .Values.nodeExporter.enabled }}
- alert: NodeExporterDown
message: NodeExporter has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeexporterdown
expr: absent(up{job="node-exporter"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
- alert: PrometheusDown
message: Prometheus has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusdown
expr: absent(up{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- if .Values.prometheusOperator.enabled }}
- alert: PrometheusOperatorDown
message: PrometheusOperator has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoperatordown
expr: absent(up{job="{{ $operatorJob }}",namespace="{{ $namespace }}"} == 1)
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
{{- end }}
@ -0,0 +1,200 @@
{{- /*
Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kubernetes-apps
- alert: KubePodCrashLooping
message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping
expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0
for: 1h
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePodNotReady
message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready
expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0
for: 1h
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDeploymentGenerationMismatch
message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch
expr: |-
kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDeploymentReplicasMismatch
message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch
expr: |-
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 1h
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetReplicasMismatch
message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch
expr: |-
kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetGenerationMismatch
message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch
expr: |-
kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeStatefulSetUpdateNotRolledOut
message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout
expr: |-
max without (revision) (
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetRolloutStuck
message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck
expr: |-
kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100
for: 15m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetNotScheduled
message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled
expr: |-
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeDaemonSetMisScheduled
message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeCronJobRunning
message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning
expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600
for: 1h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeJobCompletion
message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion
expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 1h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeJobFailed
message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed
expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0
for: 1h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,121 @@
{{- /*
Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kubernetes-resources
- alert: KubeCPUOvercommit
message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
expr: |-
(count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
for: 5m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeMemOvercommit
message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
expr: |-
for: 5m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeCPUOvercommit
message: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit
expr: |-
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
> 1.5
for: 5m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeMemOvercommit
message: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit
expr: |-
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
> 1.5
for: 5m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeQuotaExceeded
message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded
expr: |-
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
> 90
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: CPUThrottlingHigh
message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh
expr: |-
100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace)
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace)
> 25
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,72 @@
{{- /*
Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }}
{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kubernetes-storage
- alert: KubePersistentVolumeUsageCritical
message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical
expr: |-
100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
< 3
for: 1m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePersistentVolumeFullInFourDays
message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} printf "%0.2f" $value {{`}}`}}% is available.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays
expr: |-
100 * (
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
) < 15
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
for: 5m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubePersistentVolumeErrors
message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
for: 5m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,184 @@
{{- /*
Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: kubernetes-system
- alert: KubeNodeNotReady
message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than an hour.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready
expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
for: 1h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeVersionMismatch
message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch
expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
for: 1h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientErrors
message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}}% errors.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
expr: |-
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
sum(rate(rest_client_requests_total[5m])) by (instance, job))
* 100 > 1
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientErrors
message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}} errors / second.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors
expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeletTooManyPods
message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods
expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
for: 15m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPILatencyHigh
message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPILatencyHigh
message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh
expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeAPIErrorsHigh
message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh
expr: |-
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientCertificateExpiration
message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: KubeClientCertificateExpiration
message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,57 @@
{{- /*
Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: node-network
- alert: NetworkReceiveErrors
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing receive errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
for: 2m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NetworkTransmitErrors
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing transmit errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
for: 2m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: NodeNetworkInterfaceFlapping
message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}"
expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,37 @@
{{- /*
Generated from 'node-time' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.time }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-time" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: node-time
- alert: ClockSkewDetected
message: Clock skew detected on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}. Ensure NTP is configured correctly on this host.
expr: abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03
for: 2m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,202 @@
{{- /*
Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: node.rules
- expr: sum(min(kube_pod_info) by (node))
record: ':kube_pod_info_node_count:'
- expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
record: 'node_namespace_pod:kube_pod_info:'
- expr: |-
count by (node) (sum by (node, cpu) (
* on (namespace, pod) group_left(node)
record: node:node_num_cpu:sum
- expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
record: :node_cpu_utilisation:avg1m
- expr: |-
1 - avg by (node) (
* on (namespace, pod) group_left(node)
record: node:node_cpu_utilisation:avg1m
- expr: |-
record: node:cluster_cpu_utilisation:ratio
- expr: |-
record: ':node_cpu_saturation_load1:'
- expr: |-
sum by (node) (
* on (namespace, pod) group_left(node)
record: 'node:node_cpu_saturation_load1:'
- expr: |-
1 -
sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
record: ':node_memory_utilisation:'
- expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
record: :node_memory_MemFreeCachedBuffers_bytes:sum
- expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: :node_memory_MemTotal_bytes:sum
- expr: |-
sum by (node) (
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node)
record: node:node_memory_bytes_available:sum
- expr: |-
sum by (node) (
* on (namespace, pod) group_left(node)
record: node:node_memory_bytes_total:sum
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
record: node:node_memory_utilisation:ratio
- expr: |-
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
record: node:cluster_memory_utilisation:ratio
- expr: |-
1e3 * sum(
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
record: :node_memory_swap_io_bytes:sum_rate
- expr: |-
1 -
sum by (node) (
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node)
sum by (node) (
* on (namespace, pod) group_left(node)
record: 'node:node_memory_utilisation:'
- expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
record: 'node:node_memory_utilisation_2:'
- expr: |-
1e3 * sum by (node) (
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
* on (namespace, pod) group_left(node)
record: node:node_memory_swap_io_bytes:sum_rate
- expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_utilisation:avg_irate
- expr: |-
avg by (node) (
* on (namespace, pod) group_left(node)
record: node:node_disk_utilisation:avg_irate
- expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_saturation:avg_irate
- expr: |-
avg by (node) (
* on (namespace, pod) group_left(node)
record: node:node_disk_saturation:avg_irate
- expr: |-
max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_usage:'
- expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_avail:'
- expr: |-
sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
record: :node_net_utilisation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
* on (namespace, pod) group_left(node)
record: node:node_net_utilisation:sum_irate
- expr: |-
sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
record: :node_net_saturation:sum_irate
- expr: |-
sum by (node) (
(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
* on (namespace, pod) group_left(node)
record: node:node_net_saturation:sum_irate
- expr: |-
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
(max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
) by (node)
record: 'node:node_inodes_total:'
- expr: |-
kube_pod_info{job="kube-state-metrics", host_ip!=""}
) by (node, host_ip)
* on (host_ip) group_right (node)
(max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
) by (node)
record: 'node:node_inodes_free:'
{{- end }}
@ -0,0 +1,49 @@
{{- /*
Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: prometheus-operator
- alert: PrometheusOperatorReconcileErrors
message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusOperatorNodeLookupErrors
message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -0,0 +1,139 @@
{{- /*
Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }}
{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
- name: prometheus.rules
- alert: PrometheusConfigReloadFailed
description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}}
summary: Reloading Prometheus' configuration failed
expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotificationQueueRunningFull
description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}
summary: Prometheus' alert notification queue is running full
expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlerts
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
summary: Errors while sending alert from Prometheus
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusErrorSendingAlerts
description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}}
summary: Errors while sending alerts from Prometheus
expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03
for: 10m
severity: critical
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotConnectedToAlertmanagers
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers
summary: Prometheus is not connected to any Alertmanagers
expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBReloadsFailing
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.'
summary: Prometheus has issues reloading data blocks from disk
expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
for: 12h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBCompactionsFailing
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.'
summary: Prometheus has issues compacting sample blocks
expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0
for: 12h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTSDBWALCorruptions
description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).'
summary: Prometheus write-ahead log is corrupted
expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0
for: 4h
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusNotIngestingSamples
description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples.
summary: Prometheus isn't ingesting samples
expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
- alert: PrometheusTargetScrapesDuplicate
description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values'
summary: Prometheus has many samples rejected
expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: 10m
severity: warning
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
@ -1415,7 +1415,7 @@
"multi": false,
"name": "ingress",
"options": [],
"query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller=~\"$controller\"}, ingress) ",
"query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller_pod=~\"$controller\"}, ingress) ",
"refresh": 1,
"regex": "",
"sort": 2,
@ -1460,4 +1460,4 @@
"title": "NGINX / Ingress Controller",
"uid": "nginx",
"version": 1
@ -481,7 +481,7 @@
"steppedLine": false,
"targets": [
"expr": "sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress = \"$ingress\",\n status =~ \"[4-5].*\"\n}[1m])) / sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress = \"$ingress\",\n}[1m]))",
"expr": "sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~ \"[4-5].*\"\n}[1m])) / sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n}[1m]))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
@ -573,7 +573,7 @@
"steppedLine": false,
"targets": [
"expr": "sum by (path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress = \"$ingress\"}[1m]))",
"expr": "sum by (path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress =~ \"$ingress\"}[1m]))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ path }}",
@ -764,7 +764,7 @@
"refId": "D"
"expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n namespace =~ \"$namespace\",\n ingress =~ \"$ingress\",\n }[1m])) by (le)\n",
"expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n ingress =~ \"$ingress\",\n }[1m])) by (le)\n",
"hide": true,
"legendFormat": "{{le}}",
"refId": "A"
@ -978,4 +978,4 @@
"title": "NGINX / Request Handling Performance",
"uid": "4GFbkOsZk",
"version": 1
@ -470,7 +470,7 @@
"tableColumn": "",
"targets": [
"expr": "sum(kube_node_status_allocatable_cpu_cores{})",
"expr": "sum(kube_node_status_allocatable_cpu_cores{}) OR sum(kube_node_status_allocatable{resource=\"cpu\",unit=\"core\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
@ -654,7 +654,7 @@
"tableColumn": "",
"targets": [
"expr": "sum (kube_node_status_allocatable_memory_bytes{})",
"expr": "sum(kube_node_status_allocatable_memory_bytes{}) OR sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
File diff suppressed because it is too large
Load Diff
@ -1,959 +0,0 @@
{{- /*
Generated from 'k8s-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
k8s-cluster-rsrc-use.json: |-
"annotations": {
"list": [
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"links": [
"refresh": "10s",
"rows": [
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Saturation (Load1)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Saturation (Swap I/O)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Saturation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Disk",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Net Utilisation (Transmitted)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Net Saturation (Dropped)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Network",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}node{{`}}`}}",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Disk Capacity",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Storage",
"titleSize": "h6"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / USE Method / Cluster",
"uid": "a6e7d1362e1ddbb79db21d5bb40d7137",
"version": 0
{{- end }}
@ -1,986 +0,0 @@
{{- /*
Generated from 'k8s-node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-node-rsrc-use" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
k8s-node-rsrc-use.json: |-
"annotations": {
"list": [
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"links": [
"refresh": "10s",
"rows": [
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Utilisation",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Saturation",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Saturation (Load1)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Memory",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Swap IO",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Saturation (Swap I/O)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Utilisation",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Saturation",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Disk IO Saturation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Disk",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Utilisation",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Net Utilisation (Transmitted)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Saturation",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Net Saturation (Dropped)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Net",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}device{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Disk Utilisation",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Disk",
"titleSize": "h6"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "node",
"multi": false,
"name": "node",
"options": [
"query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / USE Method / Node",
"uid": "4ac4f123aae0ff6dbaf4f4f66120033b",
"version": 0
{{- end }}
File diff suppressed because it is too large
Load Diff
@ -1,963 +0,0 @@
{{- /*
Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
k8s-resources-namespace.json: |-
"annotations": {
"list": [
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"links": [
"refresh": "10s",
"rows": [
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod_name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod_name{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU Usage",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
"alias": "CPU Usage",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Requests",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Requests %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "CPU Limits",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Limits %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Pod",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
"type": "string",
"unit": "short"
"targets": [
"expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
"expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
"expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
"expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
"expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU Quota",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"}) by (pod_name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod_name{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory Usage",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
"alias": "Memory Usage",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Requests",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Requests %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Memory Limits",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Limits %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Memory Usage (RSS)",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Usage (Cache)",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Usage (Swap",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #H",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Pod",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
"type": "string",
"unit": "short"
"targets": [
"expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
"expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
"expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
"expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
"expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
"expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
"expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "G",
"step": 10
"expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "H",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory Quota",
"titleSize": "h6"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / Compute Resources / Namespace (Pods)",
"uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0
{{- end }}
File diff suppressed because it is too large
Load Diff
@ -1,936 +0,0 @@
{{- /*
Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
k8s-resources-workload.json: |-
"annotations": {
"list": [
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"links": [
"refresh": "10s",
"rows": [
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU Usage",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
"alias": "CPU Usage",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Requests",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Requests %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "CPU Limits",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Limits %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Pod",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
"type": "string",
"unit": "short"
"targets": [
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU Quota",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}pod{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory Usage",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
"alias": "Memory Usage",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Requests",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Requests %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Memory Limits",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Limits %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Pod",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
"type": "string",
"unit": "short"
"targets": [
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory Quota",
"titleSize": "h6"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "workload",
"multi": false,
"name": "workload",
"options": [
"query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "type",
"multi": false,
"name": "type",
"options": [
"query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / Compute Resources / Workload",
"uid": "a164a7f0339f99e89cea5cb47e9be617",
"version": 0
{{- end }}
@ -1,972 +0,0 @@
{{- /*
Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
k8s-resources-workloads-namespace.json: |-
"annotations": {
"list": [
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"links": [
"refresh": "10s",
"rows": [
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU Usage",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
"alias": "Running Pods",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Usage",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Requests",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Requests %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "CPU Limits",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "CPU Limits %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Workload",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"pattern": "workload",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "Workload Type",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "workload_type",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
"type": "string",
"unit": "short"
"targets": [
"expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
"expr": "sum(\n label_replace(\n namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Quota",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "CPU Quota",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 10,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 0,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}",
"legendLink": null,
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory Usage",
"titleSize": "h6"
"collapse": false,
"height": "250px",
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
"alias": "Running Pods",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "Memory Usage",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Requests",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Requests %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Memory Limits",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
"thresholds": [
"type": "number",
"unit": "bytes"
"alias": "Memory Limits %",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
"thresholds": [
"type": "number",
"unit": "percentunit"
"alias": "Workload",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"pattern": "workload",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "Workload Type",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "workload_type",
"thresholds": [
"type": "number",
"unit": "short"
"alias": "",
"colorMode": null,
"colors": [
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [
"type": "string",
"unit": "short"
"targets": [
"expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "B",
"step": 10
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "C",
"step": 10
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "D",
"step": 10
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "E",
"step": 10
"expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"},\n \"pod\", \"$1\", \"pod_name\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "F",
"step": 10
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Quota",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory Quota",
"titleSize": "h6"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"text": "prod",
"value": "prod"
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / Compute Resources / Namespace (Workloads)",
"uid": "a87fb0d919ec0ea5f6543124e16c42a5",
"version": 0
{{- end }}
File diff suppressed because it is too large
Load Diff
@ -1,573 +0,0 @@
{{- /*
Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
persistentvolumesusage.json: |-
"__inputs": [
"__requires": [
"annotations": {
"list": [
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
"refresh": "",
"rows": [
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"span": 9,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Used Space",
"refId": "A"
"expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Free Space",
"refId": "B"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Volume Space Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
"datasource": "$datasource",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 3,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
"tableColumn": "",
"targets": [
"expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "80, 90",
"title": "Volume Space Usage",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "N/A",
"value": "null"
"valueName": "current"
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"span": 9,
"stack": true,
"steppedLine": false,
"targets": [
"expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Used inodes",
"refId": "A"
"expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": " Free inodes",
"refId": "B"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Volume inodes Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
"datasource": "$datasource",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 5,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
"tableColumn": "",
"targets": [
"expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "80, 90",
"title": "Volume inodes Usage",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "N/A",
"value": "null"
"valueName": "current"
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [
"query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "PersistentVolumeClaim",
"multi": false,
"name": "volume",
"options": [
"query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\"}, persistentvolumeclaim)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-7d",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / Persistent Volumes",
"uid": "919b92a8e8041bd567af9edab12c840c",
"version": 0
{{- end }}
@ -1,680 +0,0 @@
{{- /*
Generated from 'pods' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pods" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
pods.json: |-
"__inputs": [
"__requires": [
"annotations": {
"list": [
"builtIn": 1,
"datasource": "$datasource",
"enable": true,
"expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)",
"hide": false,
"iconColor": "rgba(215, 44, 44, 1)",
"name": "Restarts",
"showIn": 0,
"tags": [
"type": "rows"
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
"refresh": "",
"rows": [
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "sum by(container_name) (container_memory_usage_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Current: {{`{{`}} container_name {{`}}`}}",
"refId": "A"
"expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Requested: {{`{{`}} container {{`}}`}}",
"refId": "B"
"expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Limit: {{`{{`}} container {{`}}`}}",
"refId": "C"
"expr": "sum by(container_name) (container_memory_cache{job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", pod_name=~\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Cache: {{`{{`}} container_name {{`}}`}}",
"refId": "D"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 3,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Current: {{`{{`}} container_name {{`}}`}}",
"refId": "A"
"expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Requested: {{`{{`}} container {{`}}`}}",
"refId": "B"
"expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Limit: {{`{{`}} container {{`}}`}}",
"refId": "C"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "RX: {{`{{`}} pod_name {{`}}`}}",
"refId": "A"
"expr": "sort_desc(sum by (pod_name) (rate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "TX: {{`{{`}} pod_name {{`}}`}}",
"refId": "B"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Network I/O",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 5,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Restarts: {{`{{`}} container {{`}}`}}",
"refId": "A"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Total Restarts Per Container",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(kube_pod_info, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Pod",
"multi": false,
"name": "pod",
"options": [
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "Container",
"multi": false,
"name": "container",
"options": [
"query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / Pods",
"uid": "ab4f13a9892a76a4d21ce8c2445bf4ea",
"version": 0
{{- end }}
@ -1,926 +0,0 @@
{{- /*
Generated from 'statefulset' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml
Do not change in-place! In order to change this file first read following link:
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }}
apiVersion: v1
kind: ConfigMap
namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "statefulset" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{- end }}
app: {{ template "kube-prometheus-stack.name" $ }}-grafana
{{ include "kube-prometheus-stack.labels" $ | indent 4 }}
statefulset.json: |-
"__inputs": [
"__requires": [
"annotations": {
"list": [
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
"refresh": "",
"rows": [
"collapse": false,
"collapsed": false,
"panels": [
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 2,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "cores",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"lineColor": "rgb(31, 120, 193)",
"show": true
"tableColumn": "",
"targets": [
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "CPU",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 3,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "GB",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"lineColor": "rgb(31, 120, 193)",
"show": true
"tableColumn": "",
"targets": [
"expr": "sum(container_memory_usage_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "Memory",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 4,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "Bps",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"lineColor": "rgb(31, 120, 193)",
"show": true
"tableColumn": "",
"targets": [
"expr": "sum(rate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "Network",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"collapse": false,
"collapsed": false,
"height": "100px",
"panels": [
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 5,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
"tableColumn": "",
"targets": [
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "Desired Replicas",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 6,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
"tableColumn": "",
"targets": [
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "Replicas of current version",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 7,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
"tableColumn": "",
"targets": [
"expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "Observed Generation",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(237, 129, 40, 0.89)",
"datasource": "$datasource",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
"gridPos": {
"id": 8,
"interval": null,
"links": [
"mappingType": 1,
"mappingTypes": [
"name": "value to text",
"value": 1
"name": "range to text",
"value": 2
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
"from": "null",
"text": "N/A",
"to": "null"
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
"tableColumn": "",
"targets": [
"expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
"thresholds": "",
"title": "Metadata Generation",
"tooltip": {
"shared": false
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
"op": "=",
"text": "0",
"value": "null"
"valueName": "current"
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"collapse": false,
"collapsed": false,
"panels": [
"aliasColors": {
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
"lines": true,
"linewidth": 1,
"links": [
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "replicas specified",
"refId": "A"
"expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "replicas created",
"refId": "B"
"expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "ready",
"refId": "C"
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "replicas of current version",
"refId": "D"
"expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "updated",
"refId": "E"
"thresholds": [
"timeFrom": null,
"timeShift": null,
"title": "Replicas",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"yaxes": [
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
"schemaVersion": 14,
"style": "dark",
"tags": [
"templating": {
"list": [
"current": {
"text": "Prometheus",
"value": "Prometheus"
"hide": 0,
"label": null,
"name": "datasource",
"options": [
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [
"query": "label_values(kube_statefulset_metadata_generation, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}, namespace)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"allValue": null,
"current": {
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Name",
"multi": false,
"name": "statefulset",
"options": [
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", namespace=\"$namespace\"}, statefulset)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
"tagsQuery": "",
"type": "query",
"useTags": false
"time": {
"from": "now-1h",
"to": "now"
"timepicker": {
"refresh_intervals": [
"time_options": [
"timezone": "",
"title": "Kubernetes / StatefulSets",
"uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0
{{- end }}
@ -24,6 +24,9 @@ spec:
{{- if .Values.ingressNginx.serviceMonitor.interval}}
interval: {{ .Values.ingressNginx.serviceMonitor.interval }}
{{- end }}
{{- if .Values.ingressNginx.serviceMonitor.proxyUrl }}
proxyUrl: {{ .Values.ingressNginx.serviceMonitor.proxyUrl}}
{{- end }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
{{- if .Values.ingressNginx.serviceMonitor.metricRelabelings }}
@ -131,3 +131,12 @@
{{/* vim: set filetype=mustache: */}}
{{/* Expand the name of the chart. This is suffixed with -alertmanager, which means subtract 13 from longest 63 available */}}
{{- define "kube-prometheus-stack.name" -}}
@@ -48,7 +175,7 @@
{{- define "kube-prometheus-stack.labels" }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
-app.kubernetes.io/version: "{{ .Chart.Version }}"
+app.kubernetes.io/version: "{{ replace "+" "_" .Chart.Version }}"
app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }}
chart: {{ template "kube-prometheus-stack.chartref" . }}
release: {{ $.Release.Name | quote }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1744,4 +1744,4 @@
"uid": "09ec8aa1e996d6ffcd6817bbaff4db1b",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,19 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1825,7 +1825,7 @@
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
@@ -1879,4 +1879,4 @@
"uid": "ff635a025bcfea7bc3dd4f508990a3e9",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -19,7 +19,6 @@
"title": "etcd",
"version": 215
-{{- end }}
+{{- end }}
{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}
@ -1,14 +1,11 @@
--- charts-original/templates/grafana/dashboards-1.14/k8s-coredns.yaml
+++ charts/templates/grafana/dashboards-1.14/k8s-coredns.yaml
@@ -4,10 +4,8 @@
@@ -4,7 +4,7 @@
apiVersion: v1
kind: ConfigMap
- namespace: {{ template "kube-prometheus-stack.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }}
- annotations:
-{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
{{- if $.Values.grafana.sidecar.dashboards.label }}
{{ $.Values.grafana.sidecar.dashboards.label }}: "1"
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -3021,4 +3021,4 @@
"uid": "efa86fd1d0c121a26444b636a3f509a8",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -2741,4 +2741,4 @@
"uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -975,4 +975,4 @@
"uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -2424,4 +2424,4 @@
"uid": "6581e46e4e5c7ba40a07646395ef7b23",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1983,4 +1983,4 @@
"uid": "a164a7f0339f99e89cea5cb47e9be617",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -2148,4 +2148,4 @@
"uid": "a87fb0d919ec0ea5f6543124e16c42a5",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -284,7 +284,6 @@
"uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0
-{{- end }}
+{{- end }}
{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}
@ -9,19 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1295,7 +1295,7 @@
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
@@ -1461,4 +1461,4 @@
"uid": "8b7a8b326d7a6f1f04244066368c67af",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,19 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1535,7 +1535,7 @@
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
@@ -1733,4 +1733,4 @@
"uid": "bbb2a765a623ae38130206c7d94a160f",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -961,4 +961,4 @@
"uid": "",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -988,4 +988,4 @@
"uid": "",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -988,4 +988,4 @@
"title": "Nodes",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -79,10 +79,3 @@
"refresh": 2,
"regex": "",
"sort": 1,
@@ -574,4 +574,4 @@
"uid": "919b92a8e8041bd567af9edab12c840c",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,19 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1027,7 +1027,7 @@
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
@@ -1225,4 +1225,4 @@
"uid": "7a18067ce943a40ae25454675c19ff5c",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1667,4 +1667,4 @@
"title": "Prometheus / Remote Write",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,10 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1224,4 +1224,4 @@
"uid": "",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -158,3 +158,4 @@
-{{- end }}
\ No newline at end of file
+{{- end }}{{- end }}
\ No newline at end of file
@ -185,7 +185,6 @@
"uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
"version": 0
-{{- end }}
+{{- end }}
{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}
@ -36,3 +36,10 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -925,4 +925,4 @@
"uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0
-{{- end }}
+{{- end }}
\ No newline at end of file
@ -9,19 +9,3 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }}
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1205,7 +1205,7 @@
"datasource": "$datasource",
"hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }},
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
@@ -1435,4 +1435,4 @@
"uid": "728bf77cc1166d2f3133bf25846876cc",
"version": 0
-{{- end }}
\ No newline at end of file
+{{- end }}
@ -9,7 +9,7 @@
version: {{ .Values.prometheus.prometheusSpec.image.tag }}
{{- if .Values.prometheus.prometheusSpec.image.sha }}
sha: {{ .Values.prometheus.prometheusSpec.image.sha }}
@@ -56,11 +56,16 @@
@@ -56,11 +56,13 @@
externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}"
{{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }}
externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}"
@ -18,16 +18,13 @@
{{- else }}
externalUrl: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }}
{{- end }}
+{{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }}
+ ignoreNamespaceSelectors: {{ .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }}
+{{- end }}
+ nodeSelector: {{ include "linux-node-selector" . | nindent 4 }}
{{- if .Values.prometheus.prometheusSpec.nodeSelector }}
- nodeSelector:
{{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }}
{{- end }}
paused: {{ .Values.prometheus.prometheusSpec.paused }}
@@ -232,8 +237,8 @@
@@ -232,8 +234,8 @@
- {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]}
{{- end }}
{{- end }}
@ -37,7 +34,7 @@
{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }}
@@ -266,7 +271,7 @@
@@ -266,7 +268,7 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.containers }}
@ -46,7 +43,7 @@
{{- end }}
{{- if .Values.prometheus.prometheusSpec.initContainers }}
@@ -282,6 +287,7 @@
@@ -282,6 +284,7 @@
{{- if .Values.prometheus.prometheusSpec.disableCompaction }}
disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }}
{{- end }}
@ -54,7 +51,7 @@
portName: {{ .Values.prometheus.prometheusSpec.portName }}
{{- end }}
{{- if .Values.prometheus.prometheusSpec.volumes }}
@@ -326,3 +332,4 @@
@@ -326,3 +329,4 @@
{{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }}
{{- end }}
@ -14,7 +14,6 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
-{{- end }}
+{{- end }}
{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}
@ -1,20 +0,0 @@
--- charts-original/templates/prometheus/rules/etcd.yaml
+++ charts/templates/prometheus/rules/etcd.yaml
@@ -4,7 +4,8 @@
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
-{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.etcd }}
+{{- if (include "exporter.kubeEtcd.enabled" .)}}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
@@ -176,4 +177,5 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}
@ -1,19 +0,0 @@
--- charts-original/templates/prometheus/rules/k8s.rules.yaml
+++ charts/templates/prometheus/rules/k8s.rules.yaml
@@ -24,13 +24,13 @@
- name: k8s.rules
- - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
+ - expr: sum(rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", image!="", container_name!=""}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- - expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
+ - expr: sum(container_memory_usage_bytes{job="{{ include "exporter.kubelet.jobName" . }}", image!="", container_name!=""}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |-
sum by (namespace, pod_name, container_name) (
- rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", image!="", container_name!=""}[5m])
record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
- expr: |-
@ -1,64 +0,0 @@
--- charts-original/templates/prometheus/rules/kube-scheduler.rules.yaml
+++ charts/templates/prometheus/rules/kube-scheduler.rules.yaml
@@ -4,7 +4,8 @@
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
-{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }}
+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubeScheduler }}
+{{- if (include "exporter.kubeScheduler.enabled" .)}}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
@@ -24,40 +25,41 @@
- name: kube-scheduler.rules
- - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.99'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.9'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
- - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
+ - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06
quantile: '0.5'
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
+{{- end }}
{{- end }}
\ No newline at end of file
@ -1,47 +0,0 @@
--- charts-original/templates/prometheus/rules/kubernetes-absent.yaml
+++ charts/templates/prometheus/rules/kubernetes-absent.yaml
@@ -67,12 +67,12 @@
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
-{{- if .Values.kubeControllerManager.enabled }}
+{{- if (include "exporter.kubeControllerManager.enabled" .)}}
- alert: KubeControllerManagerDown
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown
- expr: absent(up{job="kube-controller-manager"} == 1)
+ expr: absent(up{job="{{ include "exporter.kubeControllerManager.jobName" . }}"} == 1)
for: 15m
severity: critical
@@ -80,12 +80,12 @@
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
-{{- if .Values.kubeScheduler.enabled }}
+{{- if (include "exporter.kubeScheduler.enabled" .)}}
- alert: KubeSchedulerDown
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown
- expr: absent(up{job="kube-scheduler"} == 1)
+ expr: absent(up{job="{{ include "exporter.kubeScheduler.jobName" . }}"} == 1)
for: 15m
severity: critical
@@ -106,12 +106,12 @@
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
{{- end }}
-{{- if .Values.prometheusOperator.kubeletService.enabled }}
+{{- if (include "exporter.kubeletService.enabled" .) }}
- alert: KubeletDown
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown
- expr: absent(up{job="kubelet"} == 1)
+ expr: absent(up{job="{{ include "exporter.kubelet.jobName" . }}"} == 1)
for: 15m
severity: critical
@ -1,30 +0,0 @@
--- charts-original/templates/prometheus/rules/kubernetes-storage.yaml
+++ charts/templates/prometheus/rules/kubernetes-storage.yaml
@@ -30,9 +30,9 @@
message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical
expr: |-
- 100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ 100 * kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"}
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"}
< 3
for: 1m
@@ -46,12 +46,12 @@
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays
expr: |-
100 * (
- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"}
- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}
+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"}
) < 15
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0
for: 5m
severity: critical
@ -1,11 +0,0 @@
--- charts-original/templates/prometheus/rules/kubernetes-system.yaml
+++ charts/templates/prometheus/rules/kubernetes-system.yaml
@@ -76,7 +76,7 @@
message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110.
runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods
- expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
+ expr: kubelet_running_pod_count{job="{{ include "exporter.kubelet.jobName" . }}"} > 110 * 0.9
for: 15m
severity: warning
@ -1,6 +1,6 @@
--- charts-original/values.yaml
+++ charts/values.yaml
@@ -2,13 +2,423 @@
@@ -2,13 +2,427 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
@ -393,6 +393,10 @@
+ ##
+ interval: ""
+ ## proxyUrl: URL of a proxy that should be used for scraping.
+ ##
+ proxyUrl: ""
+ ## metric relabel configs to apply to samples before ingestion.
+ ##
+ metricRelabelings: []
@ -426,7 +430,7 @@
## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6
@@ -93,8 +503,32 @@
@@ -93,8 +507,32 @@
@ -459,30 +463,7 @@
pspEnabled: true
pspAnnotations: {}
## Specify pod annotations
@@ -151,6 +585,22 @@
## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file
## https://prometheus.io/webtools/alerting/routing-tree-editor/
+ ## Example Slack Config
+ ## config:
+ ## route:
+ ## group_by: ['job']
+ ## group_wait: 30s
+ ## group_interval: 5m
+ ## repeat_interval: 3h
+ ## receiver: 'slack-notifications'
+ ## receivers:
+ ## - name: 'slack-notifications'
+ ## slack_configs:
+ ## - send_resolved: true
+ ## text: '{{ template "slack.rancher.text" . }}'
+ ## api_url: <slack-webhook-url-here>
+ ## templates:
+ ## - /etc/alertmanager/config/*.tmpl
resolve_timeout: 5m
@@ -187,25 +637,76 @@
@@ -187,25 +625,76 @@
## ref: https://prometheus.io/docs/alerting/notifications/
## https://prometheus.io/docs/alerting/notification_examples/
@ -578,7 +559,7 @@
enabled: false
@@ -243,6 +744,25 @@
@@ -243,6 +732,25 @@
## Configuration for Alertmanager secret
@ -604,16 +585,7 @@
annotations: {}
## Configuration for creating an Ingress that will map to each Alertmanager replica service
@@ -364,7 +884,7 @@
## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig
tlsConfig: {}
- bearerTokenFile:
+ bearerTokenFile: ""
## metric relabel configs to apply to samples before ingestion.
@@ -395,7 +915,7 @@
@@ -395,7 +903,7 @@
## Image of Alertmanager
@ -622,7 +594,7 @@
tag: v0.22.2
sha: ""
@@ -507,9 +1027,13 @@
@@ -507,9 +1015,13 @@
## Define resources requests and limits for single Pods.
## ref: https://kubernetes.io/docs/user-guide/compute-resources/
@ -639,7 +611,7 @@
## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node.
## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided.
@@ -613,6 +1137,30 @@
@@ -613,6 +1125,30 @@
enabled: true
namespaceOverride: ""
@ -670,7 +642,7 @@
## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled
forceDeployDatasources: false
@@ -625,6 +1173,18 @@
@@ -625,6 +1161,18 @@
defaultDashboardsEnabled: true
@ -689,7 +661,7 @@
adminPassword: prom-operator
@@ -664,6 +1224,7 @@
@@ -664,6 +1212,7 @@
enabled: true
label: grafana_dashboard
@ -697,7 +669,7 @@
## Annotations for Grafana dashboard configmaps
@@ -716,7 +1277,60 @@
@@ -716,7 +1265,60 @@
## Passed to grafana subchart and used by servicemonitor below
@ -759,7 +731,7 @@
## If true, create a serviceMonitor for grafana
@@ -746,6 +1360,14 @@
@@ -746,6 +1348,14 @@
# targetLabel: nodename
# replacement: $1
# action: replace
@ -774,7 +746,7 @@
## Component scraping the kube api server
@@ -907,7 +1529,7 @@
@@ -907,7 +1517,7 @@
## Component scraping the kube controller manager
@ -783,7 +755,7 @@
## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
@@ -1054,7 +1676,7 @@
@@ -1054,7 +1664,7 @@
## Component scraping etcd
@ -792,7 +764,7 @@
## If your etcd is not deployed as a pod, specify IPs it can be found on
@@ -1119,7 +1741,7 @@
@@ -1119,7 +1729,7 @@
## Component scraping kube scheduler
@ -801,7 +773,7 @@
## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
@@ -1177,7 +1799,7 @@
@@ -1177,7 +1787,7 @@
## Component scraping kube proxy
@ -810,7 +782,7 @@
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
@@ -1266,6 +1888,13 @@
@@ -1266,6 +1876,13 @@
create: true
enabled: true
@ -824,7 +796,7 @@
## Deploy node exporter as a daemonset to all nodes
@@ -1319,6 +1948,16 @@
@@ -1319,6 +1936,16 @@
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@ -841,7 +813,7 @@
## Manages Prometheus and Alertmanager components
@@ -1331,8 +1970,8 @@
@@ -1331,8 +1958,8 @@
enabled: true
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
tlsMinVersion: VersionTLS13
@ -852,7 +824,7 @@
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
## rules from making their way into prometheus and potentially preventing the container from starting
@@ -1349,7 +1988,7 @@
@@ -1349,7 +1976,7 @@
enabled: true
@ -861,7 +833,7 @@
tag: v1.5.2
sha: ""
pullPolicy: IfNotPresent
@@ -1498,13 +2137,13 @@
@@ -1498,13 +2125,13 @@
## Resource limits & requests
@ -882,7 +854,7 @@
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
@@ -1557,7 +2196,7 @@
@@ -1557,7 +2184,7 @@
## Prometheus-operator image
@ -891,7 +863,7 @@
tag: v0.48.0
sha: ""
pullPolicy: IfNotPresent
@@ -1573,7 +2212,7 @@
@@ -1573,7 +2200,7 @@
## Prometheus-config-reloader image to use for config and rule reloading
@ -900,7 +872,7 @@
tag: v0.48.0
sha: ""
@@ -1659,7 +2298,7 @@
@@ -1659,7 +2286,7 @@
port: 9090
## To be used with a proxy extraContainer port
@ -909,7 +881,7 @@
## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
@@ -1916,7 +2555,7 @@
@@ -1916,7 +2543,7 @@
## Image of Prometheus.
@ -918,7 +890,7 @@
tag: v2.27.1
sha: ""
@@ -1979,6 +2618,11 @@
@@ -1979,6 +2606,11 @@
externalUrl: ""
@ -930,7 +902,7 @@
## Define which Nodes the Pods are scheduled on.
## ref: https://kubernetes.io/docs/user-guide/node-selection/
@@ -2011,7 +2655,7 @@
@@ -2011,7 +2643,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the PrometheusRule resources created
@ -939,7 +911,7 @@
## PrometheusRules to be selected for target discovery.
## If {}, select all PrometheusRules
@@ -2036,7 +2680,7 @@
@@ -2036,7 +2668,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the servicemonitors created
@ -948,7 +920,7 @@
## ServiceMonitors to be selected for target discovery.
## If {}, select all ServiceMonitors
@@ -2059,7 +2703,7 @@
@@ -2059,7 +2691,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the podmonitors created
@ -957,7 +929,7 @@
## PodMonitors to be selected for target discovery.
## If {}, select all PodMonitors
@@ -2190,9 +2834,13 @@
@@ -2190,9 +2822,13 @@
## Resource limits & requests
@ -974,7 +946,7 @@
## Prometheus StorageSpec for persistent data
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
@@ -2215,7 +2863,13 @@
@@ -2215,7 +2851,13 @@
# medium: Memory
# Additional volumes on the output StatefulSet definition.
@ -989,7 +961,7 @@
# Additional VolumeMounts on the output StatefulSet definition.
volumeMounts: []
@@ -2322,9 +2976,34 @@
@@ -2322,9 +2964,34 @@
thanos: {}
@ -1025,7 +997,7 @@
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
## (permissions, dir tree) on mounted volumes before starting prometheus
@@ -2332,7 +3011,7 @@
@@ -2332,7 +2999,7 @@
## PortName to use for Prometheus.
@ -1,5 +1,5 @@
apiVersion: v1
version: 14.5.1
version: 16.6.0
description: Installs the CRDs for rancher-monitoring.
name: rancher-monitoring-crd
type: application
Reference in New Issue