rancher-partner-charts/charts/avesha/kubeslice-controller/templates/prometheus.yaml

200 lines
5.6 KiB
YAML

---
{{ if .Values.kubeslice.prometheus.enabled}}
apiVersion: v1
data:
prometheus.rules: |-
groups:
- name: Resource Quota violation alerts
rules:
- alert: Slice Resource Quota Violation
expr: kubeslice_controller_slice_quota_violation > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Slice Resource Usage Exceeded Quota"
description: "The slice {{ "{{" }} $labels.slice_name}} has CPU usage {{ "{{" }} $labels.cpu}} and memory usage {{ "{{" }} $labels.memory}}: violated by {{ "{{" }} $labels.violated_resource_type}}"
- alert: Cluster Resource Quota Violation
expr: kubeslice_controller_cluster_quota_violation > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Cluster Resource Usage Exceeded Quota"
description: "The cluster {{ "{{" }} $labels.cluster_name}} has CPU usage {{ "{{" }} $labels.cpu}} and memory usage {{ "{{" }} $labels.memory}}: violated by {{ "{{" }} $labels.violated_resource_type}}"
- alert: Namespace Resource Quota Violation
expr: kubeslice_controller_namespace_quota_violation > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Namespace Resource Usage Exceeded Quota"
description: "The namespace {{ "{{" }}$labels.namespace}} has CPU usage {{ "{{" }}$labels.cpu}} and memory usage {{ "{{" }}$labels.memory}}: violated by {{ "{{" }}$labels.violated_resource_type}}"
prometheus.yml: |-
global:
scrape_interval: 5s
evaluation_interval: 5s
rule_files:
- /etc/prometheus/prometheus.rules
scrape_configs:
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
kind: ConfigMap
metadata:
labels:
name: prometheus-server-conf
name: kubeslice-controller-prometheus-server-conf
namespace: kubeslice-controller
{{ end }}
---
{{ if .Values.kubeslice.prometheus.enabled}}
apiVersion: v1
kind: Service
metadata:
name: kubeslice-controller-prometheus-service
namespace: kubeslice-controller
spec:
ports:
- port: 9090
targetPort: 9090
selector:
app: prometheus-server
type: ClusterIP
{{ end }}
---
{{ if .Values.kubeslice.prometheus.enabled}}
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: prometheus-server
name: kubeslice-controller-prometheus
namespace: kubeslice-controller
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-server
strategy:
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
type: RollingUpdate
template:
metadata:
labels:
app: prometheus-server
spec:
containers:
- args:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus/
image: prom/prometheus
name: prometheus
ports:
- containerPort: 9090
volumeMounts:
- mountPath: /etc/prometheus/
name: prometheus-config-volume
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- configMap:
defaultMode: 420
name: kubeslice-controller-prometheus-server-conf
name: prometheus-config-volume
- emptyDir: {}
name: prometheus-storage-volume
{{ end }}
---
{{ if .Values.kubeslice.prometheus.enabled}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubeslice-controller-prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubeslice-controller-prometheus
subjects:
- kind: ServiceAccount
name: default
namespace: kubeslice-controller
{{ end }}
---
{{ if .Values.kubeslice.prometheus.enabled}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeslice-controller-prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- watch
- nonResourceURLs:
- /metrics
verbs:
- get
{{ end }}
---
{{ if .Values.kubeslice.prometheus.enabled}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: v1.8.0
name: kubeslice-controller-kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubeslice-controller-kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
{{ end }}