[rancher-monitoring] some more fixes

pull/1567/head
Jiaqi Luo 2021-11-02 16:14:48 -07:00
parent d121e5f797
commit 2126dfed6b
19 changed files with 224 additions and 35 deletions

View File

@ -11,3 +11,14 @@
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `{}` |
| `service.enabled` | Enable grafana service | `true` |
@@ -188,8 +188,8 @@
| `rbac.extraRoleRules` | Additional rules to add to the Role | [] |
| `rbac.extraClusterRoleRules` | Additional rules to add to the ClusterRole | [] |
| `command` | Define command to be executed by grafana container at startup | `nil` |
-| `testFramework.enabled` | Whether to create test-related resources | `true` |
-| `testFramework.image` | `test-framework` image repository. | `bats/bats` |
+| `testFramework.enabled` | Whether to create test-related resources | `false` |
+| `testFramework.image` | `test-framework` image repository. | `rancher/mirrored-bats-bats` |
| `testFramework.tag` | `test-framework` image tag. | `v1.1.0` |
| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` |
| `testFramework.securityContext` | `test-framework` securityContext | `{}` |

View File

@ -0,0 +1,11 @@
--- charts-original/templates/tests/test.yaml
+++ charts/templates/tests/test.yaml
@@ -33,7 +33,7 @@
{{- end }}
containers:
- name: {{ .Release.Name }}-test
- image: "{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
+ image: "{{ template "system_default_registry" . }}{{ .Values.testFramework.image}}:{{ .Values.testFramework.tag }}"
imagePullPolicy: "{{ .Values.testFramework.imagePullPolicy}}"
command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"]
volumeMounts:

View File

@ -36,11 +36,13 @@
sha: ""
pullPolicy: IfNotPresent
@@ -83,12 +97,15 @@
@@ -82,13 +96,16 @@
# - myRegistrKeySecretName
testFramework:
enabled: true
- enabled: true
- image: "bats/bats"
+ enabled: false
+ image: "rancher/mirrored-bats-bats"
tag: "v1.1.0"
imagePullPolicy: IfNotPresent
@ -65,6 +67,15 @@
sha: ""
pullPolicy: IfNotPresent
@@ -189,7 +206,7 @@
labels: {}
path: /
- # pathType is only for k8s >= 1.1=
+ # pathType is only for k8s >= 1.18
pathType: Prefix
hosts:
@@ -303,7 +320,7 @@
## initChownData container image
##

View File

@ -0,0 +1,9 @@
--- charts-original/templates/role.yaml
+++ charts/templates/role.yaml
@@ -1,3 +1,6 @@
+{{- if not (kindIs "slice" .Values.collectors) }}
+{{- fail "Collectors need to be a List since kube-state-metrics chart 3.2.2. Please check README for more information."}}
+{{- end }}
{{- if and (eq .Values.rbac.create true) (not .Values.rbac.useExistingRole) -}}
{{- range (split "," .Values.namespaces) }}
---

View File

@ -0,0 +1,10 @@
--- charts-original/templates/exporters/kube-state-metrics/serviceMonitor.yaml
+++ charts/templates/exporters/kube-state-metrics/serviceMonitor.yaml
@@ -1,4 +1,7 @@
{{- if .Values.kubeStateMetrics.enabled }}
+{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }}
+{{- fail "kubeStateMetrics.serviceMonitor.namespaceOverride was removed. Please use kube-state-metrics.namespaceOverride instead." }}
+{{- end }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:

View File

@ -0,0 +1,23 @@
--- charts-original/templates/grafana/dashboards-1.14/alertmanager-overview.yaml
+++ charts/templates/grafana/dashboards-1.14/alertmanager-overview.yaml
@@ -5,10 +5,11 @@
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }}
+{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
apiVersion: v1
kind: ConfigMap
metadata:
- namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }}
+ namespace: {{ .Values.grafana.defaultDashboards.namespace }}
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -607,4 +608,5 @@
"uid": "alertmanager-overview",
"version": 0
}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}

View File

@ -9,3 +9,12 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1831,7 +1831,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -133,6 +133,24 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1100,7 +1105,7 @@
"options": [
],
- "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1126,7 +1131,7 @@
"options": [
],
- "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)",
+ "query": "label_values(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1174,4 +1179,5 @@
"uid": "72e0e05bef5099e5f049b05fdc429ed4",
"version": 0

View File

@ -9,3 +9,12 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -2973,7 +2973,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,

View File

@ -9,3 +9,12 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1301,7 +1301,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -9,3 +9,12 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1541,7 +1541,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -9,3 +9,12 @@
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }}
annotations:
{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }}
@@ -1033,7 +1033,7 @@
"options": [
],
- "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,

View File

@ -172,6 +172,15 @@
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{`{{`}}instance{{`}}`}}",
@@ -1023,7 +1028,7 @@
"options": [
],
- "query": "label_values(up{job=\"kube-scheduler\"}, cluster)",
+ "query": "label_values(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -1049,7 +1054,7 @@
"options": [

View File

@ -24,6 +24,18 @@
{{- end }}
- --config-reloader-cpu-request={{ .Values.prometheusOperator.configReloaderCpu }}
- --config-reloader-cpu-limit={{ .Values.prometheusOperator.configReloaderCpu }}
@@ -81,9 +81,9 @@
- --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }}
{{- end }}
{{- if .Values.prometheusOperator.thanosImage.sha }}
- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
+ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }}
{{- else }}
- - --thanos-default-base-image={{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
+ - --thanos-default-base-image={{ template "system_default_registry" . }}{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}
{{- end }}
{{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }}
- --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }}
@@ -137,16 +137,16 @@
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet

View File

@ -0,0 +1,18 @@
--- charts-original/templates/prometheus/rules-1.14/alertmanager.rules.yaml
+++ charts/templates/prometheus/rules-1.14/alertmanager.rules.yaml
@@ -7,6 +7,7 @@
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }}
{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
+{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
@@ -172,4 +173,5 @@
{{- if .Values.defaultRules.additionalRuleLabels }}
{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }}
{{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}
+{{- end }}

View File

@ -706,7 +706,7 @@
## If true, create a serviceMonitor for grafana
##
@@ -773,6 +1357,14 @@
@@ -773,6 +1357,17 @@
# replacement: $1
# action: replace
@ -717,11 +717,14 @@
+ requests:
+ memory: 100Mi
+ cpu: 100m
+
+ testFramework:
+ enabled: false
+
## Component scraping the kube api server
##
kubeApiServer:
@@ -952,7 +1544,7 @@
@@ -952,7 +1547,7 @@
## Component scraping the kube controller manager
##
kubeControllerManager:
@ -730,7 +733,7 @@
## If your kube controller manager is not deployed as a pod, specify IPs it can be found on
##
@@ -1110,7 +1702,7 @@
@@ -1110,7 +1705,7 @@
## Component scraping etcd
##
kubeEtcd:
@ -739,7 +742,7 @@
## If your etcd is not deployed as a pod, specify IPs it can be found on
##
@@ -1177,7 +1769,7 @@
@@ -1177,7 +1772,7 @@
## Component scraping kube scheduler
##
kubeScheduler:
@ -748,7 +751,7 @@
## If your kube scheduler is not deployed as a pod, specify IPs it can be found on
##
@@ -1237,7 +1829,7 @@
@@ -1237,7 +1832,7 @@
## Component scraping kube proxy
##
kubeProxy:
@ -757,7 +760,7 @@
## If your kube proxy is not deployed as a pod, specify IPs it can be found on
##
@@ -1337,6 +1929,13 @@
@@ -1337,6 +1932,13 @@
create: true
podSecurityPolicy:
enabled: true
@ -771,7 +774,7 @@
## Deploy node exporter as a daemonset to all nodes
##
@@ -1392,6 +1991,16 @@
@@ -1392,6 +1994,16 @@
extraArgs:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@ -788,7 +791,7 @@
## Manages Prometheus and Alertmanager components
##
@@ -1404,8 +2013,8 @@
@@ -1404,8 +2016,8 @@
enabled: true
# Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants
tlsMinVersion: VersionTLS13
@ -799,7 +802,7 @@
## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted
## rules from making their way into prometheus and potentially preventing the container from starting
@@ -1422,9 +2031,9 @@
@@ -1422,9 +2034,9 @@
patch:
enabled: true
image:
@ -811,7 +814,7 @@
pullPolicy: IfNotPresent
resources: {}
## Provide a priority class name to the webhook patching job
@@ -1571,13 +2180,13 @@
@@ -1571,13 +2183,13 @@
## Resource limits & requests
##
@ -832,7 +835,7 @@
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
@@ -1630,7 +2239,7 @@
@@ -1630,7 +2242,7 @@
## Prometheus-operator image
##
image:
@ -841,7 +844,7 @@
tag: v0.50.0
sha: ""
pullPolicy: IfNotPresent
@@ -1646,7 +2255,7 @@
@@ -1646,7 +2258,7 @@
## Prometheus-config-reloader image to use for config and rule reloading
##
prometheusConfigReloaderImage:
@ -850,7 +853,16 @@
tag: v0.50.0
sha: ""
@@ -1781,7 +2390,7 @@
@@ -1661,7 +2273,7 @@
## Thanos side-car image when configured
##
thanosImage:
- repository: quay.io/thanos/thanos
+ repository: rancher/mirrored-thanos-thanos
tag: v0.17.2
sha: ""
@@ -1781,7 +2393,7 @@
port: 9090
## To be used with a proxy extraContainer port
@ -859,7 +871,7 @@
## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
@@ -2054,7 +2663,7 @@
@@ -2054,7 +2666,7 @@
## Image of Prometheus.
##
image:
@ -868,19 +880,7 @@
tag: v2.28.1
sha: ""
@@ -2117,6 +2726,11 @@
##
externalUrl: ""
+ ## Ignore NamespaceSelector settings from the PodMonitor and ServiceMonitor configs
+ ## If true, PodMonitors and ServiceMonitors can only discover Pods and Services within the namespace they are deployed into
+ ##
+ ignoreNamespaceSelectors: false
+
## Define which Nodes the Pods are scheduled on.
## ref: https://kubernetes.io/docs/user-guide/node-selection/
##
@@ -2149,7 +2763,7 @@
@@ -2149,7 +2761,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the PrometheusRule resources created
##
@ -889,7 +889,7 @@
## PrometheusRules to be selected for target discovery.
## If {}, select all PrometheusRules
@@ -2174,7 +2788,7 @@
@@ -2174,7 +2786,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the servicemonitors created
##
@ -898,7 +898,7 @@
## ServiceMonitors to be selected for target discovery.
## If {}, select all ServiceMonitors
@@ -2197,7 +2811,7 @@
@@ -2197,7 +2809,7 @@
## prometheus resource to be created with selectors based on values in the helm deployment,
## which will also match the podmonitors created
##
@ -907,7 +907,7 @@
## PodMonitors to be selected for target discovery.
## If {}, select all PodMonitors
@@ -2328,9 +2942,13 @@
@@ -2328,9 +2940,13 @@
## Resource limits & requests
##
@ -924,7 +924,7 @@
## Prometheus StorageSpec for persistent data
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md
@@ -2353,7 +2971,13 @@
@@ -2353,7 +2969,13 @@
# medium: Memory
# Additional volumes on the output StatefulSet definition.
@ -939,7 +939,7 @@
# Additional VolumeMounts on the output StatefulSet definition.
volumeMounts: []
@@ -2475,9 +3099,34 @@
@@ -2475,9 +3097,34 @@
# fileName: "objstore.yaml"
# objectStorageConfigFile: /var/secrets/object-store.yaml
@ -975,7 +975,7 @@
## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes
## (permissions, dir tree) on mounted volumes before starting prometheus
@@ -2485,7 +3134,7 @@
@@ -2485,7 +3132,7 @@
## PortName to use for Prometheus.
##

View File

@ -0,0 +1,3 @@
service:
targetPort: 9102
port: 9102

View File

@ -0,0 +1,9 @@
rules:
external:
- seriesQuery: '{__name__=~"^some_metric_count$"}'
resources:
template: <<.Resource>>
name:
matches: ""
as: "my_custom_metric"
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)