mirror of https://git.rancher.io/charts
203 lines
9.1 KiB
YAML
203 lines
9.1 KiB
YAML
{{- /*
|
|
Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml
|
|
Do not change in-place! In order to change this file first read following link:
|
|
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
|
|
*/ -}}
|
|
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
|
|
{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.nodeExporter.enabled .Values.defaultRules.rules.node }}
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }}
|
|
namespace: {{ template "kube-prometheus-stack.namespace" . }}
|
|
labels:
|
|
app: {{ template "kube-prometheus-stack.name" . }}
|
|
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
|
|
{{- if .Values.defaultRules.labels }}
|
|
{{ toYaml .Values.defaultRules.labels | indent 4 }}
|
|
{{- end }}
|
|
{{- if .Values.defaultRules.annotations }}
|
|
annotations:
|
|
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
|
|
{{- end }}
|
|
spec:
|
|
groups:
|
|
- name: node.rules
|
|
rules:
|
|
- expr: sum(min(kube_pod_info) by (node))
|
|
record: ':kube_pod_info_node_count:'
|
|
- expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
|
|
record: 'node_namespace_pod:kube_pod_info:'
|
|
- expr: |-
|
|
count by (node) (sum by (node, cpu) (
|
|
node_cpu_seconds_total{job="node-exporter"}
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
))
|
|
record: node:node_num_cpu:sum
|
|
- expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
|
|
record: :node_cpu_utilisation:avg1m
|
|
- expr: |-
|
|
1 - avg by (node) (
|
|
rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:)
|
|
record: node:node_cpu_utilisation:avg1m
|
|
- expr: |-
|
|
node:node_cpu_utilisation:avg1m
|
|
*
|
|
node:node_num_cpu:sum
|
|
/
|
|
scalar(sum(node:node_num_cpu:sum))
|
|
record: node:cluster_cpu_utilisation:ratio
|
|
- expr: |-
|
|
sum(node_load1{job="node-exporter"})
|
|
/
|
|
sum(node:node_num_cpu:sum)
|
|
record: ':node_cpu_saturation_load1:'
|
|
- expr: |-
|
|
sum by (node) (
|
|
node_load1{job="node-exporter"}
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
/
|
|
node:node_num_cpu:sum
|
|
record: 'node:node_cpu_saturation_load1:'
|
|
- expr: |-
|
|
1 -
|
|
sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
|
|
/
|
|
sum(node_memory_MemTotal_bytes{job="node-exporter"})
|
|
record: ':node_memory_utilisation:'
|
|
- expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
|
|
record: :node_memory_MemFreeCachedBuffers_bytes:sum
|
|
- expr: sum(node_memory_MemTotal_bytes{job="node-exporter"})
|
|
record: :node_memory_MemTotal_bytes:sum
|
|
- expr: |-
|
|
sum by (node) (
|
|
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_memory_bytes_available:sum
|
|
- expr: |-
|
|
sum by (node) (
|
|
node_memory_MemTotal_bytes{job="node-exporter"}
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_memory_bytes_total:sum
|
|
- expr: |-
|
|
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
|
|
/
|
|
node:node_memory_bytes_total:sum
|
|
record: node:node_memory_utilisation:ratio
|
|
- expr: |-
|
|
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
|
|
/
|
|
scalar(sum(node:node_memory_bytes_total:sum))
|
|
record: node:cluster_memory_utilisation:ratio
|
|
- expr: |-
|
|
1e3 * sum(
|
|
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
|
|
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
|
|
)
|
|
record: :node_memory_swap_io_bytes:sum_rate
|
|
- expr: |-
|
|
1 -
|
|
sum by (node) (
|
|
(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
/
|
|
sum by (node) (
|
|
node_memory_MemTotal_bytes{job="node-exporter"}
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: 'node:node_memory_utilisation:'
|
|
- expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
|
|
record: 'node:node_memory_utilisation_2:'
|
|
- expr: |-
|
|
1e3 * sum by (node) (
|
|
(rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
|
|
+ rate(node_vmstat_pgpgout{job="node-exporter"}[1m]))
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_memory_swap_io_bytes:sum_rate
|
|
- expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
|
|
record: :node_disk_utilisation:avg_irate
|
|
- expr: |-
|
|
avg by (node) (
|
|
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_disk_utilisation:avg_irate
|
|
- expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
|
|
record: :node_disk_saturation:avg_irate
|
|
- expr: |-
|
|
avg by (node) (
|
|
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_disk_saturation:avg_irate
|
|
- expr: |-
|
|
max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
|
|
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
|
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
|
record: 'node:node_filesystem_usage:'
|
|
- expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
|
record: 'node:node_filesystem_avail:'
|
|
- expr: |-
|
|
sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) +
|
|
sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
|
|
record: :node_net_utilisation:sum_irate
|
|
- expr: |-
|
|
sum by (node) (
|
|
(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) +
|
|
irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m]))
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_net_utilisation:sum_irate
|
|
- expr: |-
|
|
sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) +
|
|
sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
|
|
record: :node_net_saturation:sum_irate
|
|
- expr: |-
|
|
sum by (node) (
|
|
(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) +
|
|
irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m]))
|
|
* on (namespace, pod) group_left(node)
|
|
node_namespace_pod:kube_pod_info:
|
|
)
|
|
record: node:node_net_saturation:sum_irate
|
|
- expr: |-
|
|
max(
|
|
max(
|
|
kube_pod_info{job="kube-state-metrics", host_ip!=""}
|
|
) by (node, host_ip)
|
|
* on (host_ip) group_right (node)
|
|
label_replace(
|
|
(max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
|
|
)
|
|
) by (node)
|
|
record: 'node:node_inodes_total:'
|
|
- expr: |-
|
|
max(
|
|
max(
|
|
kube_pod_info{job="kube-state-metrics", host_ip!=""}
|
|
) by (node, host_ip)
|
|
* on (host_ip) group_right (node)
|
|
label_replace(
|
|
(max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*"
|
|
)
|
|
) by (node)
|
|
record: 'node:node_inodes_free:'
|
|
{{- end }}
|