--- {{ if .Values.kubeslice.prometheus.enabled}} apiVersion: v1 data: prometheus.rules: |- groups: - name: Resource Quota violation alerts rules: - alert: Slice Resource Quota Violation expr: kubeslice_controller_slice_quota_violation > 0 for: 1m labels: severity: critical annotations: summary: "Slice Resource Usage Exceeded Quota" description: "The slice {{ "{{" }} $labels.slice_name}} has CPU usage {{ "{{" }} $labels.cpu}} and memory usage {{ "{{" }} $labels.memory}}: violated by {{ "{{" }} $labels.violated_resource_type}}" - alert: Cluster Resource Quota Violation expr: kubeslice_controller_cluster_quota_violation > 0 for: 1m labels: severity: critical annotations: summary: "Cluster Resource Usage Exceeded Quota" description: "The cluster {{ "{{" }} $labels.cluster_name}} has CPU usage {{ "{{" }} $labels.cpu}} and memory usage {{ "{{" }} $labels.memory}}: violated by {{ "{{" }} $labels.violated_resource_type}}" - alert: Namespace Resource Quota Violation expr: kubeslice_controller_namespace_quota_violation > 0 for: 1m labels: severity: critical annotations: summary: "Namespace Resource Usage Exceeded Quota" description: "The namespace {{ "{{" }}$labels.namespace}} has CPU usage {{ "{{" }}$labels.cpu}} and memory usage {{ "{{" }}$labels.memory}}: violated by {{ "{{" }}$labels.violated_resource_type}}" prometheus.yml: |- global: scrape_interval: 5s evaluation_interval: 5s rule_files: - /etc/prometheus/prometheus.rules scrape_configs: - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name kind: ConfigMap metadata: labels: name: prometheus-server-conf name: kubeslice-controller-prometheus-server-conf namespace: kubeslice-controller {{ end }} --- {{ if .Values.kubeslice.prometheus.enabled}} apiVersion: v1 kind: Service metadata: name: kubeslice-controller-prometheus-service namespace: kubeslice-controller spec: ports: - port: 9090 targetPort: 9090 selector: app: prometheus-server type: ClusterIP {{ end }} --- {{ if .Values.kubeslice.prometheus.enabled}} apiVersion: apps/v1 kind: Deployment metadata: labels: app: prometheus-server name: kubeslice-controller-prometheus namespace: kubeslice-controller spec: replicas: 1 selector: matchLabels: app: prometheus-server strategy: rollingUpdate: maxSurge: 1 maxUnavailable: 0 type: RollingUpdate template: metadata: labels: app: prometheus-server spec: containers: - args: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus/ image: prom/prometheus name: prometheus ports: - containerPort: 9090 volumeMounts: - mountPath: /etc/prometheus/ name: prometheus-config-volume - mountPath: /prometheus/ name: prometheus-storage-volume volumes: - configMap: defaultMode: 420 name: kubeslice-controller-prometheus-server-conf name: prometheus-config-volume - emptyDir: {} name: prometheus-storage-volume {{ end }} --- {{ if .Values.kubeslice.prometheus.enabled}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: kubeslice-controller-prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: kubeslice-controller-prometheus subjects: - kind: ServiceAccount name: default namespace: kubeslice-controller {{ end }} --- {{ if .Values.kubeslice.prometheus.enabled}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: kubeslice-controller-prometheus rules: - apiGroups: - "" resources: - nodes - nodes/proxy - services - endpoints - pods verbs: - get - list - watch - apiGroups: - extensions resources: - ingresses verbs: - get - list - watch - nonResourceURLs: - /metrics verbs: - get {{ end }} --- {{ if .Values.kubeslice.prometheus.enabled}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: labels: app.kubernetes.io/name: kube-state-metrics app.kubernetes.io/version: v1.8.0 name: kubeslice-controller-kube-state-metrics roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: kubeslice-controller-kube-state-metrics subjects: - kind: ServiceAccount name: kube-state-metrics namespace: kube-system {{ end }}