{{- /* Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }} namespace: {{ template "kube-prometheus-stack.namespace" . }} labels: app: {{ template "kube-prometheus-stack.name" . }} {{ include "kube-prometheus-stack.labels" . | indent 4 }} {{- if .Values.defaultRules.labels }} {{ toYaml .Values.defaultRules.labels | indent 4 }} {{- end }} {{- if .Values.defaultRules.annotations }} annotations: {{ toYaml .Values.defaultRules.annotations | indent 4 }} {{- end }} spec: groups: - name: node.rules rules: - expr: sum(min(kube_pod_info) by (node)) record: ':kube_pod_info_node_count:' - expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod) record: 'node_namespace_pod:kube_pod_info:' - expr: |- count by (node) (sum by (node, cpu) ( node_cpu_seconds_total{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: )) record: node:node_num_cpu:sum - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) record: :node_cpu_utilisation:avg1m - expr: |- 1 - avg by (node) ( rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:) record: node:node_cpu_utilisation:avg1m - expr: |- node:node_cpu_utilisation:avg1m * node:node_num_cpu:sum / scalar(sum(node:node_num_cpu:sum)) record: node:cluster_cpu_utilisation:ratio - expr: |- sum(node_load1{job="node-exporter"}) / sum(node:node_num_cpu:sum) record: ':node_cpu_saturation_load1:' - expr: |- sum by (node) ( node_load1{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) / node:node_num_cpu:sum record: 'node:node_cpu_saturation_load1:' - expr: |- 1 - sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) / sum(node_memory_MemTotal_bytes{job="node-exporter"}) record: ':node_memory_utilisation:' - expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) record: :node_memory_MemFreeCachedBuffers_bytes:sum - expr: sum(node_memory_MemTotal_bytes{job="node-exporter"}) record: :node_memory_MemTotal_bytes:sum - expr: |- sum by (node) ( (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_memory_bytes_available:sum - expr: |- sum by (node) ( node_memory_MemTotal_bytes{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_memory_bytes_total:sum - expr: |- (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) / node:node_memory_bytes_total:sum record: node:node_memory_utilisation:ratio - expr: |- (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) / scalar(sum(node:node_memory_bytes_total:sum)) record: node:cluster_memory_utilisation:ratio - expr: |- 1e3 * sum( (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) ) record: :node_memory_swap_io_bytes:sum_rate - expr: |- 1 - sum by (node) ( (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) / sum by (node) ( node_memory_MemTotal_bytes{job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: 'node:node_memory_utilisation:' - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) record: 'node:node_memory_utilisation_2:' - expr: |- 1e3 * sum by (node) ( (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_memory_swap_io_bytes:sum_rate - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) record: :node_disk_utilisation:avg_irate - expr: |- avg by (node) ( irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_disk_utilisation:avg_irate - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) record: :node_disk_saturation:avg_irate - expr: |- avg by (node) ( irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_disk_saturation:avg_irate - expr: |- max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) record: 'node:node_filesystem_usage:' - expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) record: 'node:node_filesystem_avail:' - expr: |- sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) record: :node_net_utilisation:sum_irate - expr: |- sum by (node) ( (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_net_utilisation:sum_irate - expr: |- sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) record: :node_net_saturation:sum_irate - expr: |- sum by (node) ( (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_net_saturation:sum_irate - expr: |- max( max( kube_pod_info{job="kube-state-metrics", host_ip!=""} ) by (node, host_ip) * on (host_ip) group_right (node) label_replace( (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" ) ) by (node) record: 'node:node_inodes_total:' - expr: |- max( max( kube_pod_info{job="kube-state-metrics", host_ip!=""} ) by (node, host_ip) * on (host_ip) group_right (node) label_replace( (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" ) ) by (node) record: 'node:node_inodes_free:' {{- end }}