200 lines
5.6 KiB
YAML
200 lines
5.6 KiB
YAML
---
|
|
{{ if .Values.kubeslice.prometheus.enabled}}
|
|
apiVersion: v1
|
|
data:
|
|
prometheus.rules: |-
|
|
groups:
|
|
- name: Resource Quota violation alerts
|
|
rules:
|
|
- alert: Slice Resource Quota Violation
|
|
expr: kubeslice_controller_slice_quota_violation > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Slice Resource Usage Exceeded Quota"
|
|
description: "The slice {{ "{{" }} $labels.slice_name}} has CPU usage {{ "{{" }} $labels.cpu}} and memory usage {{ "{{" }} $labels.memory}}: violated by {{ "{{" }} $labels.violated_resource_type}}"
|
|
- alert: Cluster Resource Quota Violation
|
|
expr: kubeslice_controller_cluster_quota_violation > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Cluster Resource Usage Exceeded Quota"
|
|
description: "The cluster {{ "{{" }} $labels.cluster_name}} has CPU usage {{ "{{" }} $labels.cpu}} and memory usage {{ "{{" }} $labels.memory}}: violated by {{ "{{" }} $labels.violated_resource_type}}"
|
|
- alert: Namespace Resource Quota Violation
|
|
expr: kubeslice_controller_namespace_quota_violation > 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Namespace Resource Usage Exceeded Quota"
|
|
description: "The namespace {{ "{{" }}$labels.namespace}} has CPU usage {{ "{{" }}$labels.cpu}} and memory usage {{ "{{" }}$labels.memory}}: violated by {{ "{{" }}$labels.violated_resource_type}}"
|
|
prometheus.yml: |-
|
|
global:
|
|
scrape_interval: 5s
|
|
evaluation_interval: 5s
|
|
rule_files:
|
|
- /etc/prometheus/prometheus.rules
|
|
scrape_configs:
|
|
- job_name: 'kubernetes-pods'
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
action: replace
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
target_label: __address__
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_pod_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
action: replace
|
|
target_label: kubernetes_pod_name
|
|
kind: ConfigMap
|
|
metadata:
|
|
labels:
|
|
name: prometheus-server-conf
|
|
name: kubeslice-controller-prometheus-server-conf
|
|
namespace: kubeslice-controller
|
|
{{ end }}
|
|
|
|
---
|
|
{{ if .Values.kubeslice.prometheus.enabled}}
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: kubeslice-controller-prometheus-service
|
|
namespace: kubeslice-controller
|
|
spec:
|
|
ports:
|
|
- port: 9090
|
|
targetPort: 9090
|
|
selector:
|
|
app: prometheus-server
|
|
type: ClusterIP
|
|
{{ end }}
|
|
|
|
---
|
|
{{ if .Values.kubeslice.prometheus.enabled}}
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
labels:
|
|
app: prometheus-server
|
|
name: kubeslice-controller-prometheus
|
|
namespace: kubeslice-controller
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: prometheus-server
|
|
strategy:
|
|
rollingUpdate:
|
|
maxSurge: 1
|
|
maxUnavailable: 0
|
|
type: RollingUpdate
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: prometheus-server
|
|
spec:
|
|
containers:
|
|
- args:
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --storage.tsdb.path=/prometheus/
|
|
image: prom/prometheus
|
|
name: prometheus
|
|
ports:
|
|
- containerPort: 9090
|
|
volumeMounts:
|
|
- mountPath: /etc/prometheus/
|
|
name: prometheus-config-volume
|
|
- mountPath: /prometheus/
|
|
name: prometheus-storage-volume
|
|
volumes:
|
|
- configMap:
|
|
defaultMode: 420
|
|
name: kubeslice-controller-prometheus-server-conf
|
|
name: prometheus-config-volume
|
|
- emptyDir: {}
|
|
name: prometheus-storage-volume
|
|
{{ end }}
|
|
|
|
---
|
|
{{ if .Values.kubeslice.prometheus.enabled}}
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: kubeslice-controller-prometheus
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: kubeslice-controller-prometheus
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: default
|
|
namespace: kubeslice-controller
|
|
{{ end }}
|
|
|
|
---
|
|
{{ if .Values.kubeslice.prometheus.enabled}}
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: kubeslice-controller-prometheus
|
|
rules:
|
|
- apiGroups:
|
|
- ""
|
|
resources:
|
|
- nodes
|
|
- nodes/proxy
|
|
- services
|
|
- endpoints
|
|
- pods
|
|
verbs:
|
|
- get
|
|
- list
|
|
- watch
|
|
- apiGroups:
|
|
- extensions
|
|
resources:
|
|
- ingresses
|
|
verbs:
|
|
- get
|
|
- list
|
|
- watch
|
|
- nonResourceURLs:
|
|
- /metrics
|
|
verbs:
|
|
- get
|
|
{{ end }}
|
|
---
|
|
{{ if .Values.kubeslice.prometheus.enabled}}
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: kube-state-metrics
|
|
app.kubernetes.io/version: v1.8.0
|
|
name: kubeslice-controller-kube-state-metrics
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: kubeslice-controller-kube-state-metrics
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: kube-state-metrics
|
|
namespace: kube-system
|
|
{{ end }}
|