rancher-partner-charts/charts/kubecost/cost-analyzer/2.4.0/values.yaml

3553 lines
133 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

global:
# zone: cluster.local (use only if your DNS server doesn't live in the same zone as kubecost)
prometheus:
enabled: true # Kubecost depends on Prometheus data, it is not optional. When enabled: false, Prometheus will not be installed and you must configure your own Prometheus to scrape kubecost as well as provide the fqdn below. -- Warning: Before changing this setting, please read to understand the risks https://docs.kubecost.com/install-and-configure/install/custom-prom
fqdn: http://cost-analyzer-prometheus-server.default.svc # example address of a prometheus to connect to. Include protocol (http:// or https://) Ignored if enabled: true
# insecureSkipVerify: false # If true, kubecost will not check the TLS cert of prometheus
# queryServiceBasicAuthSecretName: dbsecret # kubectl create secret generic dbsecret -n kubecost --from-file=USERNAME --from-file=PASSWORD
# queryServiceBearerTokenSecretName: mcdbsecret # kubectl create secret generic mcdbsecret -n kubecost --from-file=TOKEN
grafana:
enabled: true # If false, Grafana will not be installed
domainName: cost-analyzer-grafana.default.svc # example grafana domain Ignored if enabled: true
scheme: "http" # http or https, for the domain name above.
proxy: true # If true, the kubecost frontend will route to your grafana through its service endpoint
# fqdn: cost-analyzer-grafana.default.svc
# Enable only when you are using GCP Marketplace ENT listing. Learn more at https://console.cloud.google.com/marketplace/product/kubecost-public/kubecost-ent
gcpstore:
enabled: false
# Google Cloud Managed Service for Prometheus
gmp:
# Remember to set up these parameters when install the Kubecost Helm chart with `global.gmp.enabled=true` if you want to use GMP self-deployed collection (Recommended) to utilize Kubecost scrape configs.
# If enabling GMP, it is highly recommended to utilize Google's distribution of Prometheus.
# Learn more at https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-unmanaged
# --set prometheus.server.image.repository="gke.gcr.io/prometheus-engine/prometheus" \
# --set prometheus.server.image.tag="v2.35.0-gmp.2-gke.0"
enabled: false # If true, kubecost will be configured to use GMP Prometheus image and query from Google Cloud Managed Service for Prometheus.
prometheusServerEndpoint: http://localhost:8085/ # The prometheus service endpoint used by kubecost. The calls are forwarded through the GMP Prom proxy side car to the GMP database.
gmpProxy:
enabled: false
image: gke.gcr.io/prometheus-engine/frontend:v0.4.1-gke.0 # GMP Prometheus proxy image that serve as an endpoint to query metrics from GMP
imagePullPolicy: IfNotPresent
name: gmp-proxy
port: 8085
projectId: YOUR_PROJECT_ID # example GCP project ID
# Amazon Managed Service for Prometheus
amp:
enabled: false # If true, kubecost will be configured to remote_write and query from Amazon Managed Service for Prometheus.
prometheusServerEndpoint: http://localhost:8005/workspaces/<workspaceId>/ # The prometheus service endpoint used by kubecost. The calls are forwarded through the SigV4Proxy side car to the AMP workspace.
remoteWriteService: https://aps-workspaces.us-west-2.amazonaws.com/workspaces/<workspaceId>/api/v1/remote_write # The remote_write endpoint for the AMP workspace.
sigv4:
region: us-west-2
# access_key: ACCESS_KEY # AWS Access key
# secret_key: SECRET_KEY # AWS Secret key
# role_arn: ROLE_ARN # AWS role arn
# profile: PROFILE # AWS profile
# Mimir Proxy to help Kubecost to query metrics from multi-tenant Grafana Mimir.
# Set `global.mimirProxy.enabled=true` and `global.prometheus.enabled=false` to enable Mimir Proxy.
# You also need to set `global.prometheus.fqdn=http://kubecost-cost-analyzer-mimir-proxy.kubecost.svc:8085/prometheus`
# or `global.prometheus.fqdn=http://{{ template "cost-analyzer.fullname" . }}-mimir-proxy.{{ .Release.Namespace }}.svc:8085/prometheus'
# Learn more at https://grafana.com/docs/mimir/latest/operators-guide/secure/authentication-and-authorization/#without-an-authenticating-reverse-proxy
mimirProxy:
enabled: false
name: mimir-proxy
image: nginxinc/nginx-unprivileged
port: 8085
mimirEndpoint: $mimir_endpoint # Your Mimir query endpoint. If your Mimir query endpoint is http://example.com/prometheus, replace $mimir_endpoint with http://example.com/
orgIdentifier: $your_tenant_ID # Your Grafana Mimir tenant ID
# basicAuth:
# username: user
# password: pwd
# Azure Monitor Managed Service for Prometheus
# See https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-monitor/essentials/prometheus-metrics-overview.md for information
# and https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/prometheus-remote-write-virtual-machines for more information on setting this up
ammsp:
enabled: false
prometheusServerEndpoint: http://localhost:8081/
remoteWriteService: $<AMMSP_METRICS_INGESTION_ENDPOINT>
queryEndpoint: $<AMMSP_QUERY_ENDPOINT>
aadAuthProxy:
enabled: false
# per https://github.com/Azure/aad-auth-proxy/releases/tag/0.1.0-main-04-10-2024-7067ac84
image: $<IMAGE> # Example: mcr.microsoft.com/azuremonitor/auth-proxy/prod/aad-auth-proxy/images/aad-auth-proxy:0.1.0-main-04-10-2024-7067ac84
imagePullPolicy: IfNotPresent
name: aad-auth-proxy
port: 8081
audience: https://prometheus.monitor.azure.com/.default
identityType: userAssigned
aadClientId: $<AZURE_MANAGED_IDENTITY_CLIENT_ID>
aadTenantId: $<AZURE_MANAGED_IDENTITY_TENANT_ID>
notifications:
# Kubecost alerting configuration
# Ref: http://docs.kubecost.com/alerts
# alertConfigs:
# frontendUrl: http://localhost:9090 # optional, used for linkbacks
# globalSlackWebhookUrl: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX # optional, used for Slack alerts
# globalMsTeamsWebhookUrl: https://xxxxx.webhook.office.com/webhookb2/XXXXXXXXXXXXXXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXX # optional, used for Microsoft Teams alerts
# globalAlertEmails:
# - recipient@example.com
# - additionalRecipient@example.com
# globalEmailSubject: Custom Subject
# Alerts generated by kubecost, about cluster data
# alerts:
# Daily namespace budget alert on namespace `kubecost`
# - type: budget # supported: budget, recurringUpdate
# threshold: 50 # optional, required for budget alerts
# window: daily # or 1d
# aggregation: namespace
# filter: kubecost
# ownerContact: # optional, overrides globalAlertEmails default
# - owner@example.com
# - owner2@example.com
# # optional, used for alert-specific Slack and Microsoft Teams alerts
# slackWebhookUrl: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX
# msTeamsWebhookUrl: https://xxxxx.webhook.office.com/webhookb2/XXXXXXXXXXXXXXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXX
# Daily cluster budget alert on cluster `cluster-one`
# - type: budget
# threshold: 200.8 # optional, required for budget alerts
# window: daily # or 1d
# aggregation: cluster
# filter: cluster-one # does not accept csv
# Recurring weekly update (weeklyUpdate alert)
# - type: recurringUpdate
# window: weekly # or 7d
# aggregation: namespace
# filter: '*'
# Recurring weekly namespace update on kubecost namespace
# - type: recurringUpdate
# window: weekly # or 7d
# aggregation: namespace
# filter: kubecost
# Spend Change Alert
# - type: spendChange # change relative to moving avg
# relativeThreshold: 0.20 # Proportional change relative to baseline. Must be greater than -1 (can be negative)
# window: 1d # accepts d, h
# baselineWindow: 30d # previous window, offset by window
# aggregation: namespace
# filter: kubecost, default # accepts csv
# Health Score Alert
# - type: health # Alerts when health score changes by a threshold
# window: 10m
# threshold: 5 # Send Alert if health scores changes by 5 or more
# Kubecost Health Diagnostic
# - type: diagnostic # Alerts when kubecost is unable to compute costs - ie: Prometheus unreachable
# window: 10m
alertmanager: # Supply an alertmanager FQDN to receive notifications from the app.
enabled: false # If true, allow kubecost to write to your alertmanager
fqdn: http://cost-analyzer-prometheus-server.default.svc # example fqdn. Ignored if prometheus.enabled: true
# Set saved Cost Allocation report(s) accessible from /reports
# Ref: http://docs.kubecost.com/saved-reports
savedReports:
enabled: false # If true, overwrites report parameters set through UI
reports:
- title: "Example Saved Report 0"
window: "today"
aggregateBy: "namespace"
chartDisplay: "category"
idle: "separate"
rate: "cumulative"
accumulate: false # daily resolution
filters: # Ref: https://docs.kubecost.com/apis/filters-api
- key: "cluster" # Ref: https://docs.kubecost.com/apis/filters-api#allocation-apis-request-sizing-v2-api
operator: ":" # Ref: https://docs.kubecost.com/apis/filters-api#filter-operators
value: "dev"
- title: "Example Saved Report 1"
window: "month"
aggregateBy: "controllerKind"
chartDisplay: "category"
idle: "share"
rate: "monthly"
accumulate: false
filters: # Ref: https://docs.kubecost.com/apis/filters-api
- key: "namespace" # Ref: https://docs.kubecost.com/apis/filters-api#allocation-apis-request-sizing-v2-api
operator: "!:" # Ref: https://docs.kubecost.com/apis/filters-api#filter-operators
value: "kubecost"
- title: "Example Saved Report 2"
window: "2020-11-11T00:00:00Z,2020-12-09T23:59:59Z"
aggregateBy: "service"
chartDisplay: "category"
idle: "hide"
rate: "daily"
accumulate: true # entire window resolution
filters: [] # if no filters, specify empty array
# Set saved Asset report(s) accessible from /reports
# Ref: http://docs.kubecost.com/saved-reports
assetReports:
enabled: false # If true, overwrites report parameters set through UI
reports:
- title: "Example Asset Report 0"
window: "today"
aggregateBy: "type"
accumulate: false # daily resolution
filters:
- property: "cluster"
value: "cluster-one"
# Set saved Cloud Cost report(s) accessible from /reports
# Ref: http://docs.kubecost.com/saved-reports
cloudCostReports:
enabled: false # If true, overwrites report parameters set through UI
reports:
- title: "Cloud Cost Report 0"
window: "today"
aggregateBy: "service"
accumulate: false # daily resolution
# filters:
# - property: "service"
# value: "service1" # corresponds to a value to filter cloud cost aggregate by service data on.
podAnnotations: {}
# iam.amazonaws.com/role: role-arn
# Applies these labels to all Deployments, StatefulSets, DaemonSets, and their pod templates.
additionalLabels: {}
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
fsGroup: 1001
runAsGroup: 1001
runAsUser: 1001
fsGroupChangePolicy: OnRootMismatch
containerSecurityContext:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
# Platforms is a higher-level abstraction for platform-specific values and settings.
platforms:
# Deploying to OpenShift (OCP) requires enabling this option.
openshift:
enabled: false # Deploy Kubecost to OpenShift.
route:
enabled: false # Create an OpenShift Route.
annotations: {} # Add annotations to the Route.
# host: kubecost.apps.okd4.example.com # Add a custom host for your Route.
# Create Security Context Constraint resources for the DaemonSets requiring additional privileges.
scc:
nodeExporter: false # Creates an SCC for Prometheus Node Exporter. This requires Node Exporter be enabled.
networkCosts: false # Creates an SCC for Kubecost network-costs. This requires network-costs be enabled.
# When OpenShift is enabled, the following securityContext will be applied to all resources unless they define their own.
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
# Set options for deploying with CI/CD tools like Argo CD.
cicd:
enabled: false # Set to true when using affected CI/CD tools for access to the below configuration options.
skipSanityChecks: false # If true, skip all sanity/existence checks for resources like Secrets.
## Kubecost Integrations
## Ref: https://docs.kubecost.com/integrations
##
integrations:
postgres:
enabled: false
runInterval: "12h" # How frequently to run the integration.
databaseHost: "" # REQUIRED. ex: my.postgres.database.azure.com
databasePort: "" # REQUIRED. ex: 5432
databaseName: "" # REQUIRED. ex: postgres
databaseUser: "" # REQUIRED. ex: myusername
databasePassword: "" # REQUIRED. ex: mypassword
databaseSecretName: "" # OPTIONAL. Specify your own k8s secret containing the above credentials. Must have key "creds.json".
## Configure what Postgres table to write to, and what parameters to pass
## when querying Kubecost's APIs. Ensure all parameters are enclosed in
## quotes. Ref: https://docs.kubecost.com/apis/apis-overview
queryConfigs:
allocations: []
# - databaseTable: "kubecost_allocation_data"
# window: "7d"
# aggregate: "namespace"
# idle: "true"
# shareIdle: "true"
# shareNamespaces: "kubecost,kube-system"
# shareLabels: ""
# - databaseTable: "kubecost_allocation_data_by_cluster"
# window: "10d"
# aggregate: "cluster"
# idle: "true"
# shareIdle: "false"
# shareNamespaces: ""
# shareLabels: ""
assets: []
# - databaseTable: "kubecost_assets_data"
# window: "7d"
# aggregate: "cluster"
cloudCosts: []
# - databaseTable: "kubecost_cloudcosts_data"
# window: "7d"
# aggregate: "service"
## Provide a name override for the chart.
# nameOverride: ""
## Provide a full name override option for the chart.
# fullnameOverride: ""
## This flag is only required for users upgrading to a new version of Kubecost.
## The flag is used to ensure users are aware of important
## (potentially breaking) changes included in the new version.
##
upgrade:
toV2: false
# generated at http://kubecost.com/install, used for alerts tracking and free trials
kubecostToken: # ""
# Advanced pipeline for custom prices, enterprise key required
pricingCsv:
enabled: false
location:
provider: "AWS"
region: "us-east-1"
URI: s3://kc-csv-test/pricing_schema.csv # a valid file URI
csvAccessCredentials: pricing-schema-access-secret
# SAML integration for user management and RBAC, enterprise key required
# Ref: https://github.com/kubecost/docs/blob/main/user-management.md
saml:
enabled: false
# secretName: "kubecost-authzero"
# metadataSecretName: "kubecost-authzero-metadata" # One of metadataSecretName or idpMetadataURL must be set. defaults to metadataURL if set
# idpMetadataURL: "https://dev-elu2z98r.auth0.com/samlp/metadata/c6nY4M37rBP0qSO1IYIqBPPyIPxLS8v2"
# appRootURL: "http://localhost:9090" # sample URL
# authTimeout: 1440 # number of minutes the JWT will be valid
# redirectURL: "https://dev-elu2z98r.auth0.com/v2/logout" # callback URL redirected to after logout
# audienceURI: "http://localhost:9090" # by convention, the same as the appRootURL, but any string uniquely identifying kubecost to your samp IDP. Optional if you follow the convention
# nameIDFormat: "urn:oasis:names:tc:SAML:1.1:nameid-format:unspecified" If your SAML provider requires a specific nameid format
# isGLUUProvider: false # An additional URL parameter must be appended for GLUU providers
# encryptionCertSecret: "kubecost-saml-cert" # k8s secret where the x509 certificate used to encrypt an Okta saml response is stored
# decryptionKeySecret: "kubecost-sank-decryption-key" # k8s secret where the private key associated with the encryptionCertSecret is stored
# authSecret: "random-string" # value of SAML secret used to issue tokens, will be autogenerated as random string if not provided
# authSecretName: "kubecost-saml-secret" # name of k8s secret where the authSecret will be stored, defaults to "kubecost-saml-secret" if not provided
rbac:
enabled: false
# groups:
# - name: admin
# enabled: false # if admin is disabled, all SAML users will be able to make configuration changes to the kubecost frontend
# assertionName: "http://schemas.auth0.com/userType" # a SAML Assertion, one of whose elements has a value that matches on of the values in assertionValues
# assertionValues:
# - "admin"
# - "superusers"
# - name: readonly
# enabled: false # if readonly is disabled, all users authorized on SAML will default to readonly
# assertionName: "http://schemas.auth0.com/userType"
# assertionValues:
# - "readonly"
# - name: editor
# enabled: true # if editor is enabled, editors will be allowed to edit reports/alerts scoped to them, and act as readers otherwise. Users will never default to editor.
# assertionName: "http://schemas.auth0.com/userType"
# assertionValues:
# - "editor"
oidc:
enabled: false
clientID: "" # application/client client_id parameter obtained from provider, used to make requests to server
clientSecret: "" # application/client client_secret parameter obtained from provider, used to make requests to server
# secretName: "kubecost-oidc-secret" # k8s secret where clientsecret will be stored
# For use to provide a custom OIDC Secret. Overrides the usage of oidc.clientSecret and oidc.secretName.
# Should contain the field directly.
# Can be created using raw k8s secrets, external secrets, sealed secrets, or any other method.
existingCustomSecret:
enabled: false
name: "" # name of the secret containing the client secret
# authURL: "https://my.auth.server/authorize" # endpoint for login to auth server
# loginRedirectURL: "http://my.kubecost.url/model/oidc/authorize" # Kubecost url configured in provider for redirect after authentication
# discoveryURL: "https://my.auth.server/.well-known/openid-configuration" # url for OIDC endpoint discovery
skipOnlineTokenValidation: false # if true, will skip accessing OIDC introspection endpoint for online token verification, and instead try to locally validate JWT claims
useClientSecretPost: false # if true, client secret will specifically only use client_secret_post method, otherwise it will attempt to send the secret in both the header and the body.
# hostedDomain: "example.com" # optional, blocks access to the auth domain specified in the hd claim of the provider ID token
rbac:
enabled: false
# groups:
# - name: admin
# enabled: false # if admin is disabled, all authenticated users will be able to make configuration changes to the kubecost frontend
# claimName: "roles" # Kubecost matches this string against the JWT's payload key containing RBAC info (this value is unique across identity providers)
# claimValues: # Kubecost matches these strings with the roles created in your identity provider
# - "admin"
# - "superusers"
# - name: readonly
# enabled: false # if readonly is disabled, all authenticated users will default to readonly
# claimName: "roles"
# claimValues:
# - "readonly"
# - name: editor
# enabled: false # if editor is enabled, editors will be allowed to edit reports/alerts scoped to them, and act as readers otherwise. Users will never default to editor.
# claimName: "roles"
# claimValues:
# - "editor"
## Adds the HTTP_PROXY, HTTPS_PROXY, and NO_PROXY environment variables to all
## containers. Typically used in environments that have firewall rules which
## prevent kubecost from accessing cloud provider resources.
## Ref: https://www.oreilly.com/library/view/security-with-go/9781788627917/5ea6a02b-3d96-44b1-ad3c-6ab60fcbbe4f.xhtml
##
systemProxy:
enabled: false
httpProxyUrl: ""
httpsProxyUrl: ""
noProxy: ""
# imagePullSecrets:
# - name: "image-pull-secret"
# imageVersion uses the base image name (image:) but overrides the version
# pulled. It should be avoided. If non-default behavior is needed, use
# fullImageName for the relevant component.
# imageVersion:
kubecostFrontend:
enabled: true
deployMethod: singlepod # haMode or singlepod - haMode is currently only supported with Enterprise tier
haReplicas: 2 # only used with haMode
image: "gcr.io/kubecost1/frontend"
imagePullPolicy: IfNotPresent
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for the frontend.
# fullImageName:
# extraEnv:
# - name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE
# value: "1"
# securityContext:
# readOnlyRootFilesystem: true
resources:
requests:
cpu: "10m"
memory: "55Mi"
# limits:
# cpu: "100m"
# memory: "256Mi"
deploymentStrategy: {}
# rollingUpdate:
# maxSurge: 1
# maxUnavailable: 1
# type: RollingUpdate
# Define a readiness probe for the Kubecost frontend container.
readinessProbe:
enabled: true
initialDelaySeconds: 1
periodSeconds: 5
failureThreshold: 6
# Define a liveness probe for the Kubecost frontend container.
livenessProbe:
enabled: true
initialDelaySeconds: 1
periodSeconds: 5
failureThreshold: 6
ipv6:
enabled: true # disable if the cluster does not support ipv6
# timeoutSeconds: 600 # should be rarely used, but can be increased if needed
# allow customizing nginx-conf server block
# extraServerConfig: |-
# proxy_busy_buffers_size 512k;
# proxy_buffers 4 512k;
# proxy_buffer_size 256k;
# large_client_header_buffers 4 64k;
# hideDiagnostics: false # useful if the primary is not monitored. Supported in limited environments.
# hideOrphanedResources: false # OrphanedResources works on the primary-cluster's cloud-provider only.
# set to true to set all upstreams to use <service>.<namespace>.svc.cluster.local instead of just <service>.<namespace>
useDefaultFqdn: false
# api:
# fqdn: kubecost-api.kubecost.svc.cluster.local:9001
# model:
# fqdn: kubecost-model.kubecost.svc.cluster.local:9003
# forecasting:
# fqdn: kubecost-forcasting.kubecost.svc.cluster.local:5000
# aggregator:
# fqdn: kubecost-aggregator.kubecost.svc.cluster.local:9004
# cloudCost:
# fqdn: kubecost-cloud-cost.kubecost.svc.cluster.local:9005
# multiClusterDiagnostics:
# fqdn: kubecost-multi-diag.kubecost.svc.cluster.local:9007
# clusterController:
# fqdn: cluster-controller.kubecost.svc.cluster.local:9731
# Kubecost Metrics deploys a separate pod which will emit kubernetes specific metrics required
# by the cost-model. This pod is designed to remain active and decoupled from the cost-model itself.
# However, disabling this service/pod deployment will flag the cost-model to emit the metrics instead.
kubecostMetrics:
# emitPodAnnotations: false
# emitNamespaceAnnotations: false
# emitKsmV1Metrics: true # emit all KSM metrics in KSM v1.
# emitKsmV1MetricsOnly: false # emit only the KSM metrics missing from KSM v2. Advanced users only.
# Optional
# The metrics exporter is a separate deployment and service (for prometheus scrape auto-discovery)
# which emits metrics cost-model relies on. Enabling this deployment also removes the KSM dependency
# from the cost-model. If the deployment is not enabled, the metrics will continue to be emitted from
# the cost-model.
exporter:
enabled: false
port: 9005
# Adds the default Prometheus scrape annotations to the metrics exporter service.
# Set to false and use service.annotations (below) to set custom scrape annotations.
prometheusScrape: true
resources: {}
# requests:
# cpu: "200m"
# memory: "55Mi"
## Node tolerations for server scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
affinity: {}
service:
annotations: {}
# Service Monitor for Kubecost Metrics
serviceMonitor: # the kubecost included prometheus uses scrapeConfigs and does not support service monitors. The following options assume an existing prometheus that supports serviceMonitors.
enabled: false
additionalLabels: {}
metricRelabelings: []
relabelings: []
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
additionalLabels: {}
nodeSelector: {}
extraArgs: []
sigV4Proxy:
image: public.ecr.aws/aws-observability/aws-sigv4-proxy:latest
imagePullPolicy: IfNotPresent
name: aps
port: 8005
region: us-west-2 # The AWS region
host: aps-workspaces.us-west-2.amazonaws.com # The hostname for AMP service.
# role_arn: arn:aws:iam::<account>:role/role-name # The AWS IAM role to assume.
extraEnv: # Pass extra env variables to sigV4Proxy
# - name: AWS_ACCESS_KEY_ID
# value: <access_key>
# - name: AWS_SECRET_ACCESS_KEY
# value: <secret_key>
# Optional resource requests and limits for the sigV4proxy container.
resources: {}
kubecostModel:
image: "gcr.io/kubecost1/cost-model"
imagePullPolicy: IfNotPresent
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for cost-model.
# fullImageName:
# securityContext:
# readOnlyRootFilesystem: true
# Build local cost allocation cache
warmCache: false
# Run allocation ETL pipelines
etl: true
# Enable the ETL filestore backing storage
etlFileStoreEnabled: true
# The total number of days the ETL pipelines will build
# Set to 0 to disable daily ETL (not recommended)
etlDailyStoreDurationDays: 91
# The total number of hours the ETL pipelines will build
# Set to 0 to disable hourly ETL (recommended for large environments)
# Must be < prometheus server retention, otherwise empty data may overwrite
# known-good data
etlHourlyStoreDurationHours: 49
# For deploying kubecost in a cluster that does not self-monitor
etlReadOnlyMode: false
## The name of the Secret containing a bucket config for Federated storage.
## The contents should be stored under a key named federated-store.yaml.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/long-term-storage-configuration
# federatedStorageConfigSecret: federated-store
## Federated storage config can be supplied via a secret or the yaml block
## below when using the block below, only a single provider is supported,
## others are for example purposes.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/long-term-storage-configuration
# federatedStorageConfig: |-
# # AWS EXAMPLE
# type: S3
# config:
# bucket: kubecost-federated-storage-bucket
# endpoint: s3.amazonaws.com
# region: us-east-1
# # best practice is to use pod identities to access AWS resources. Otherwise it is possible to use an access_key and secret_key
# access_key: "<your-access-key>"
# secret_key: "<your-secret-key>"
# # AZURE EXAMPLE
# type: AZURE
# config:
# storage_account: ""
# storage_account_key: ""
# container: ""
# max_retries: 0
# # GCP EXAMPLE
# type: GCS
# config:
# bucket: kubecost-federated-storage-bucket
# service_account: |-
# {
# "type": "service_account",
# "project_id": "...",
# "private_key_id": "...",
# "private_key": "...",
# "client_email": "...",
# "client_id": "...",
# "auth_uri": "https://accounts.google.com/o/oauth2/auth",
# "token_uri": "https://oauth2.googleapis.com/token",
# "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
# "client_x509_cert_url": ""
# }
# Installs Kubecost/OpenCost plugins
plugins:
enabled: false
install:
enabled: false
fullImageName: curlimages/curl:latest
securityContext:
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
folder: /opt/opencost/plugin
# leave this commented to always download most recent version of plugins
# version: <INSERT_SPECIFIC_PLUGINS_VERSION>
# the list of enabled plugins
enabledPlugins: []
# - datadog
# pre-existing secret for plugin configuration
existingCustomSecret:
enabled: false
name: "" # name of the secret containing plugin config
secretName: kubecost-plugin-secret
# uncomment this to define plugin configuration via the values file
# configs:
# datadog: |
# {
# "datadog_site": "<INSERT_DATADOG_SITE>",
# "datadog_api_key": "<INSERT_DATADOG_API_KEY>",
# "datadog_app_key": "<INSERT_DATADOG_APP_KEY>"
# }
allocation:
# Enables or disables adding node labels to allocation data (i.e. workloads).
# Defaults to "true" and starts with a sensible includeList for basics like
# topology (e.g. zone, region) and instance type labels.
# nodeLabels:
# enabled: true
# includeList: "node.kubernetes.io/instance-type,topology.kubernetes.io/region,topology.kubernetes.io/zone"
# Enables or disables the ContainerStats pipeline, used for quantile-based
# queries like for request sizing recommendations.
# ContainerStats provides support for quantile-based request right-sizing
# recommendations.
#
# It is disabled by default to avoid problems in extremely high-scale Thanos
# environments. If you would like to try quantile-based request-sizing
# recommendations, enable this! If you are in a high-scale environment,
# please monitor Kubecost logs, Thanos query logs, and Thanos load closely.
# We hope to make major improvements at scale here soon!
#
containerStatsEnabled: true # enabled by default as of v2.2.0
# max number of concurrent Prometheus queries
maxQueryConcurrency: 5
resources:
requests:
cpu: "200m"
memory: "55Mi"
# limits:
# cpu: "800m"
# memory: "256Mi"
# Define a readiness probe for the Kubecost cost-model container.
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
# Define a liveness probe for the Kubecost cost-model container.
livenessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
extraArgs: []
# Optional. A list of extra environment variables to be added to the cost-model container.
# extraEnv:
# - name: LOG_LEVEL
# value: trace
# - name: LOG_FORMAT
# value: json
# # When false, Kubecost will not show Asset costs for local disks physically
# # attached to nodes (e.g. ephemeral storage). This needs to be applied to
# # each cluster monitored.
# - name: ASSET_INCLUDE_LOCAL_DISK_COST
# value: "true"
# creates an ingress directly to the model container, for API access
ingress:
enabled: false
# className: nginx
labels:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
annotations:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
paths: ["/"]
pathType: ImplementationSpecific
hosts:
- cost-analyzer-model.local
tls: []
# - secretName: cost-analyzer-model-tls
# hosts:
# - cost-analyzer-model.local
utcOffset: "+00:00"
# Optional - add extra ports to the cost-model container. For kubecost development purposes only - not recommended for users.
extraPorts: []
# - name: debug
# port: 40000
# targetPort: 40000
# containerPort: 40000
## etlUtils is a utility typically used by Enterprise customers transitioning
## from v1 to v2 of Kubecost. It translates the data from the "/etl" dir of the
## bucket, to the "/federated" dir of the bucket.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl/thanos-migration-guide
##
etlUtils:
enabled: false
fullImageName: null
resources: {}
env: {}
nodeSelector: {}
tolerations: []
affinity: {}
# Basic Kubecost ingress, more examples available at https://docs.kubecost.com/install-and-configure/install/ingress-examples
ingress:
enabled: false
# className: nginx
labels:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
annotations:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
paths: ["/"] # There's no need to route specifically to the pods-- we have an nginx deployed that handles routing
pathType: ImplementationSpecific
hosts:
- cost-analyzer.local
tls: []
# - secretName: cost-analyzer-tls
# hosts:
# - cost-analyzer.local
nodeSelector: {}
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
affinity: {}
topologySpreadConstraints: []
# If true, creates a PriorityClass to be used by the cost-analyzer pod
priority:
enabled: false
name: "" # Provide name of existing priority class only. If left blank, upstream chart will create one from default template.
# If true, enable creation of NetworkPolicy resources.
networkPolicy:
enabled: false
denyEgress: true # create a network policy that denies egress from kubecost
sameNamespace: true # Set to true if cost analyzer and prometheus are on the same namespace
# namespace: kubecost # Namespace where prometheus is installed
# Cost-analyzer specific vars using the new template
costAnalyzer:
enabled: false # If true, create a network policy for cost-analyzer
annotations: {} # annotations to be added to the network policy
additionalLabels: {} # additional labels to be added to the network policy
# Examples rules:
# ingressRules:
# - selectors: # allow ingress from self on all ports
# - podSelector:
# matchLabels:
# app.kubernetes.io/name: cost-analyzer
# - selectors: # allow egress access to prometheus
# - namespaceSelector:
# matchLabels:
# name: prometheus
# podSelector:
# matchLabels:
# app: prometheus
# ports:
# - protocol: TCP
# port: 9090
# egressRules:
# - selectors: # restrict egress to inside cluster
# - namespaceSelector: {}
## @param extraVolumes A list of volumes to be added to the pod
##
extraVolumes: []
## @param extraVolumeMounts A list of volume mounts to be added to the pod
##
extraVolumeMounts: []
# Define persistence volume for cost-analyzer, more information at https://docs.kubecost.com/install-and-configure/install/storage
persistentVolume:
size: 32Gi
dbSize: 32.0Gi
enabled: true # Note that setting this to false means configurations will be wiped out on pod restart.
# storageClass: "-" #
# existingClaim: kubecost-cost-analyzer # a claim in the same namespace as kubecost
labels: {}
annotations: {}
# helm.sh/resource-policy: keep # https://helm.sh/docs/howto/charts_tips_and_tricks/#tell-helm-not-to-uninstall-a-resource
# Enables a separate PV specifically for ETL data. This should be avoided, but
# is kept for legacy compatibility.
dbPVEnabled: false
service:
type: ClusterIP
port: 9090
targetPort: 9090
nodePort: {}
labels: {}
annotations: {}
# loadBalancerSourceRanges: []
sessionAffinity:
enabled: false # Makes sure that connections from a client are passed to the same Pod each time, when set to `true`. You should set it when you enabled authentication through OIDC or SAML integration.
timeoutSeconds: 10800
prometheus:
## Provide a full name override for Prometheus.
# fullnameOverride: ""
## Provide a name override for Prometheus.
# nameOverride: ""
rbac:
create: true # Create the RBAC resources for Prometheus.
## Define serviceAccount names for components. Defaults to component's fully qualified name.
##
serviceAccounts:
alertmanager:
create: true
name:
nodeExporter:
create: true
name:
pushgateway:
create: true
name:
server:
create: true
name:
## Prometheus server ServiceAccount annotations.
## Can be used for AWS IRSA annotations when using Remote Write mode with Amazon Managed Prometheus.
annotations: {}
## Specify an existing ConfigMap to be used by Prometheus when using self-signed certificates.
##
# selfsignedCertConfigMapName: ""
imagePullSecrets:
# - name: "image-pull-secret"
extraScrapeConfigs: |
- job_name: kubecost
honor_labels: true
scrape_interval: 1m
scrape_timeout: 60s
metrics_path: /metrics
scheme: http
dns_sd_configs:
- names:
- {{ template "cost-analyzer.serviceName" . }}
type: 'A'
port: 9003
- job_name: kubecost-networking
kubernetes_sd_configs:
- role: pod
relabel_configs:
# Scrape only the the targets matching the following metadata
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance]
action: keep
regex: kubecost
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: network-costs
- job_name: kubecost-aggregator
scrape_interval: 1m
scrape_timeout: 60s
metrics_path: /metrics
scheme: http
dns_sd_configs:
- names:
- {{ template "aggregator.serviceName" . }}
type: 'A'
{{- if or .Values.saml.enabled .Values.oidc.enabled }}
port: 9008
{{- else }}
port: 9004
{{- end }}
## Enables scraping of NVIDIA GPU metrics via dcgm-exporter. Scrapes all
## endpoints which contain "dcgm-exporter" in labels "app",
## "app.kubernetes.io/component", or "app.kubernetes.io/name" with a case
## insensitive match.
## Refs:
## https://github.com/NVIDIA/gpu-operator/blob/d4316a415bbd684ce8416a88042305fc1a093aa4/assets/state-dcgm-exporter/0600_service.yaml#L7
## https://github.com/NVIDIA/dcgm-exporter/blob/54fd1ca137c66511a87a720390613680b9bdabdd/deployment/templates/service.yaml#L23
- job_name: kubecost-dcgm-exporter
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app, __meta_kubernetes_pod_label_app_kubernetes_io_component, __meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: (?i)(.*dcgm-exporter.*|.*dcgm-exporter.*|.*dcgm-exporter.*)
server:
# If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID
# to use as unique cluster ID in kubecost cost-analyzer deployment.
# This overrides the cluster_id set in prometheus.server.global.external_labels.
# NOTE: This does not affect the external_labels set in prometheus config.
# clusterIDConfigmap: cluster-id-configmap
## Provide a full name override for the Prometheus server.
# fullnameOverride: ""
## Prometheus server container name
##
enabled: true
name: server
sidecarContainers:
strategy:
type: Recreate
rollingUpdate: null
## Prometheus server container image
##
image:
repository: cgr.dev/chainguard/prometheus
tag: latest
pullPolicy: IfNotPresent
## prometheus server priorityClassName
##
priorityClassName: ""
## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
## so that the various internal URLs are still able to access as they are in the default case.
## (Optional)
prefixURL: ""
## External URL which can access alertmanager
## Maybe same with Ingress host name
baseURL: ""
## Additional server container environment variables
##
## You specify this manually like you would a raw deployment manifest.
## This means you can bind in environment variables from secrets.
##
## e.g. static environment variable:
## - name: DEMO_GREETING
## value: "Hello from the environment"
##
## e.g. secret environment variable:
## - name: USERNAME
## valueFrom:
## secretKeyRef:
## name: mysecret
## key: username
env: []
extraFlags:
- web.enable-lifecycle
## web.enable-admin-api flag controls access to the administrative HTTP API which includes functionality such as
## deleting time series. This is disabled by default.
# - web.enable-admin-api
##
## storage.tsdb.no-lockfile flag controls BD locking
# - storage.tsdb.no-lockfile
##
## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL)
# - storage.tsdb.wal-compression
## Path to a configuration file on prometheus server container FS
configPath: /etc/config/prometheus.yml
global:
## How frequently to scrape targets by default
##
scrape_interval: 1m
## How long until a scrape request times out
##
scrape_timeout: 60s
## How frequently to evaluate rules
##
evaluation_interval: 1m
external_labels:
cluster_id: cluster-one # Each cluster should have a unique ID
## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
##
remoteWrite: {}
## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read
##
remoteRead: {}
## Additional Prometheus server container arguments
##
extraArgs:
query.max-concurrency: 1
query.max-samples: 100000000
## Additional InitContainers to initialize the pod
##
extraInitContainers: []
## Additional Prometheus server Volume mounts
##
extraVolumeMounts: []
## Additional Prometheus server Volumes
##
extraVolumes: []
## Additional Prometheus server hostPath mounts
##
extraHostPathMounts: []
# - name: certs-dir
# mountPath: /etc/kubernetes/certs
# subPath: ""
# hostPath: /etc/kubernetes/certs
# readOnly: true
extraConfigmapMounts: []
# - name: certs-configmap
# mountPath: /prometheus
# subPath: ""
# configMap: certs-configmap
# readOnly: true
## Additional Prometheus server Secret mounts
# Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: []
# - name: secret-files
# mountPath: /etc/secrets
# subPath: ""
# secretName: prom-secret-files
# readOnly: true
## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}}
## Defining configMapOverrideName will cause templates/server-configmap.yaml
## to NOT generate a ConfigMap resource
##
configMapOverrideName: ""
ingress:
## If true, Prometheus server Ingress will be created
##
enabled: false
# className: nginx
## Prometheus server Ingress annotations
##
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: 'true'
## Prometheus server Ingress additional labels
##
extraLabels: {}
## Prometheus server Ingress hostnames with optional path
## Must be provided if Ingress is enabled
##
hosts: []
# - prometheus.domain.com
# - domain.com/prometheus
## PathType determines the interpretation of the Path matching
pathType: "Prefix"
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## Prometheus server Ingress TLS configuration
## Secrets must be manually created in the namespace
##
tls: []
# - secretName: prometheus-server-tls
# hosts:
# - prometheus.domain.com
## Server Deployment Strategy type
# strategy:
# type: Recreate
## Node tolerations for server scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for Prometheus server pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Pod affinity
##
affinity: {}
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
##
podDisruptionBudget:
enabled: false
maxUnavailable: 1
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
##
# schedulerName:
persistentVolume:
## If true, Prometheus server will create/use a Persistent Volume Claim
## If false, use emptyDir
##
enabled: true
## Prometheus server data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
##
accessModes:
- ReadWriteOnce
## Prometheus server data Persistent Volume annotations
##
annotations: {}
# helm.sh/resource-policy: keep # https://helm.sh/docs/howto/charts_tips_and_tricks/#tell-helm-not-to-uninstall-a-resource
## Prometheus server data Persistent Volume existing claim name
## Requires server.persistentVolume.enabled: true
## If defined, PVC must be created manually before volume will be bound
existingClaim: ""
## Prometheus server data Persistent Volume mount root path
##
mountPath: /data
## Prometheus server data Persistent Volume size
##
size: 32Gi
## Prometheus server data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
##
# storageClass: "-"
## Prometheus server data Persistent Volume Binding Mode
## If defined, volumeBindingMode: <volumeBindingMode>
## If undefined (the default) or set to null, no volumeBindingMode spec is
## set, choosing the default mode.
##
# volumeBindingMode: ""
## Subdirectory of Prometheus server data Persistent Volume to mount
## Useful if the volume's root directory is not empty
##
subPath: ""
emptyDir:
sizeLimit: ""
## Annotations to be added to Prometheus server pods
##
podAnnotations: {}
# iam.amazonaws.com/role: prometheus
## Annotations to be added to the Prometheus Server deployment
##
deploymentAnnotations: {}
## Labels to be added to Prometheus server pods
##
podLabels: {}
## Prometheus AlertManager configuration
##
alertmanagers: []
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
##
replicaCount: 1
statefulSet:
## If true, use a statefulset instead of a deployment for pod management.
## This allows to scale replicas to more than 1 pod
##
enabled: false
annotations: {}
labels: {}
podManagementPolicy: OrderedReady
## Alertmanager headless service to use for the statefulset
##
headless:
annotations: {}
labels: {}
servicePort: 80
## Prometheus server readiness and liveness probe initial delay and timeout
## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
##
readinessProbeInitialDelay: 5
readinessProbeTimeout: 3
readinessProbeFailureThreshold: 3
readinessProbeSuccessThreshold: 1
livenessProbeInitialDelay: 5
livenessProbeTimeout: 3
livenessProbeFailureThreshold: 3
livenessProbeSuccessThreshold: 1
## Prometheus server resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# limits:
# cpu: 500m
# memory: 512Mi
# requests:
# cpu: 500m
# memory: 512Mi
## Vertical Pod Autoscaler config
## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
verticalAutoscaler:
## If true a VPA object will be created for the controller (either StatefulSet or Deployment, based on above configs)
enabled: false
## Optional. Defaults to "Auto" if not specified.
# updateMode: "Auto"
## Mandatory. Without, VPA will not be created.
# containerPolicies:
# - containerName: 'prometheus-server'
## Security context to be added to server pods
##
securityContext: {}
# runAsUser: 1001
# runAsNonRoot: true
# runAsGroup: 1001
# fsGroup: 1001
containerSecurityContext: {}
service:
annotations: {}
labels: {}
clusterIP: ""
# nodePort: ""
## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
##
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 80
sessionAffinity: None
type: ClusterIP
## Enable gRPC port on service to allow auto discovery with thanos-querier
gRPC:
enabled: false
servicePort: 10901
# nodePort: 10901
## If using a statefulSet (statefulSet.enabled=true), configure the
## service to connect to a specific replica to have a consistent view
## of the data.
statefulsetReplica:
enabled: false
replica: 0
## Prometheus server pod termination grace period
##
terminationGracePeriodSeconds: 300
## Prometheus data retention period (default if not specified is 97 hours)
##
## Kubecost builds up its own persistent store of metric data on the
## filesystem (usually a PV) and, when using ETL Backup and/or Federated
## ETL, in more durable object storage like S3 or GCS. Kubecost's data
## retention is _not_ tied to the configured Prometheus retention.
##
## For data durability, we recommend using ETL Backup instead of relying on
## Prometheus retention.
##
## Lower retention values will affect Prometheus by reducing resource
## consumption and increasing stability. It _must not_ be set below or equal
## to kubecostModel.etlHourlyStoreDurationHours, otherwise empty data sets
## may overwrite good data sets. For now, it must also be >= 49h for Daily
## ETL stability.
##
## "ETL Rebuild" and "ETL Repair" is only possible on data available within
## this retention window. This is an extremely rare operation.
##
## If you want maximum security in the event of a Kubecost agent
## (cost-model) outage, increase this value. The current default of 97h is
## intended to balance Prometheus stability and resource consumption
## against the event of an outage in Kubecost which would necessitate a
## version change. 4 days should provide enough time for most users to
## notice a problem and initiate corrective action.
retention: 97h
# retentionSize: should be significantly greater than the storage used in the number of hours set in etlHourlyStoreDurationHours
# Install Prometheus Alert Manager
alertmanager:
## If false, alertmanager will not be installed
##
enabled: false
## Provide a full name override for Prometheus alertmanager.
# fullnameOverride: ""
strategy:
type: Recreate
rollingUpdate: null
## alertmanager container name
##
name: alertmanager
## alertmanager container image
##
image:
repository: cgr.dev/chainguard/prometheus-alertmanager
tag: latest
pullPolicy: IfNotPresent
## alertmanager priorityClassName
##
priorityClassName: ""
## Additional alertmanager container arguments
##
extraArgs: {}
## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
## so that the various internal URLs are still able to access as they are in the default case.
## (Optional)
prefixURL: ""
## External URL which can access alertmanager
baseURL: "http://localhost:9093"
## Additional alertmanager container environment variable
## For instance to add a http_proxy
##
extraEnv: {}
## Additional alertmanager Secret mounts
# Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: []
# - name: secret-files
# mountPath: /etc/secrets
# subPath: ""
# secretName: alertmanager-secret-files
# readOnly: true
## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.alertmanager.configMapOverrideName}}
## Defining configMapOverrideName will cause templates/alertmanager-configmap.yaml
## to NOT generate a ConfigMap resource
##
configMapOverrideName: ""
## The name of a secret in the same kubernetes namespace which contains the Alertmanager config
## Defining configFromSecret will cause templates/alertmanager-configmap.yaml
## to NOT generate a ConfigMap resource
##
configFromSecret: ""
## The configuration file name to be loaded to alertmanager
## Must match the key within configuration loaded from ConfigMap/Secret
##
configFileName: alertmanager.yml
ingress:
## If true, alertmanager Ingress will be created
##
enabled: false
## alertmanager Ingress annotations
##
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: 'true'
## alertmanager Ingress additional labels
##
extraLabels: {}
## alertmanager Ingress hostnames with optional path
## Must be provided if Ingress is enabled
##
hosts: []
# - alertmanager.domain.com
# - domain.com/alertmanager
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## alertmanager Ingress TLS configuration
## Secrets must be manually created in the namespace
##
tls: []
# - secretName: prometheus-alerts-tls
# hosts:
# - alertmanager.domain.com
## Alertmanager Deployment Strategy type
# strategy:
# type: Recreate
## Node tolerations for alertmanager scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for alertmanager pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Pod affinity
##
affinity: {}
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
##
podDisruptionBudget:
enabled: false
maxUnavailable: 1
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
##
# schedulerName:
persistentVolume:
## If true, alertmanager will create/use a Persistent Volume Claim
## If false, use emptyDir
##
enabled: true
## alertmanager data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
##
accessModes:
- ReadWriteOnce
## alertmanager data Persistent Volume Claim annotations
##
annotations: {}
## alertmanager data Persistent Volume existing claim name
## Requires alertmanager.persistentVolume.enabled: true
## If defined, PVC must be created manually before volume will be bound
existingClaim: ""
## alertmanager data Persistent Volume mount root path
##
mountPath: /data
## alertmanager data Persistent Volume size
##
size: 2Gi
## alertmanager data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
##
# storageClass: "-"
## alertmanager data Persistent Volume Binding Mode
## If defined, volumeBindingMode: <volumeBindingMode>
## If undefined (the default) or set to null, no volumeBindingMode spec is
## set, choosing the default mode.
##
# volumeBindingMode: ""
## Subdirectory of alertmanager data Persistent Volume to mount
## Useful if the volume's root directory is not empty
##
subPath: ""
## Annotations to be added to alertmanager pods
##
podAnnotations: {}
## Tell prometheus to use a specific set of alertmanager pods
## instead of all alertmanager pods found in the same namespace
## Useful if you deploy multiple releases within the same namespace
##
## prometheus.io/probe: alertmanager-teamA
## Labels to be added to Prometheus AlertManager pods
##
podLabels: {}
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
##
replicaCount: 1
statefulSet:
## If true, use a statefulset instead of a deployment for pod management.
## This allows to scale replicas to more than 1 pod
##
enabled: false
podManagementPolicy: OrderedReady
## Alertmanager headless service to use for the statefulset
##
headless:
annotations: {}
labels: {}
## Enabling peer mesh service end points for enabling the HA alert manager
## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
# enableMeshPeer : true
servicePort: 80
## alertmanager resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# limits:
# cpu: 10m
# memory: 32Mi
# requests:
# cpu: 10m
# memory: 32Mi
## Security context to be added to alertmanager pods
##
securityContext:
runAsUser: 1001
runAsNonRoot: true
runAsGroup: 1001
fsGroup: 1001
service:
annotations: {}
labels: {}
clusterIP: ""
## Enabling peer mesh service end points for enabling the HA alert manager
## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
# enableMeshPeer : true
## List of IP addresses at which the alertmanager service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
##
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 80
# nodePort: 30000
sessionAffinity: None
type: ClusterIP
# Define a custom scheduler for Alertmanager pods
# schedulerName: default-scheduler
## alertmanager ConfigMap entries
##
alertmanagerFiles:
alertmanager.yml:
global: {}
# slack_api_url: ''
receivers:
- name: default-receiver
# slack_configs:
# - channel: '@you'
# send_resolved: true
route:
group_wait: 10s
group_interval: 5m
receiver: default-receiver
repeat_interval: 3h
## Monitors ConfigMap changes and POSTs to a URL
configmapReload:
prometheus:
## If false, the configmap-reload container will not be deployed
##
enabled: false
## configmap-reload container name
##
name: configmap-reload
## configmap-reload container image
##
image:
repository: cgr.dev/chainguard/prometheus-config-reloader
tag: latest
pullPolicy: IfNotPresent
## Additional configmap-reload container arguments
##
extraArgs: {}
## Additional configmap-reload volume directories
##
extraVolumeDirs: []
## Additional configmap-reload mounts
##
extraConfigmapMounts: []
# - name: prometheus-alerts
# mountPath: /etc/alerts.d
# subPath: ""
# configMap: prometheus-alerts
# readOnly: true
## configmap-reload resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
## configmap-reload container securityContext
containerSecurityContext: {}
alertmanager:
## If false, the configmap-reload container will not be deployed
##
enabled: false
## configmap-reload container name
##
name: configmap-reload
## configmap-reload container image
##
image:
repository: cgr.dev/chainguard/prometheus-config-reloader
tag: latest
pullPolicy: IfNotPresent
## Additional configmap-reload container arguments
##
extraArgs: {}
## Additional configmap-reload volume directories
##
extraVolumeDirs: []
## Additional configmap-reload mounts
##
extraConfigmapMounts: []
# - name: prometheus-alerts
# mountPath: /etc/alerts.d
# subPath: ""
# configMap: prometheus-alerts
# readOnly: true
## configmap-reload resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# node-export must be disabled if there is an existing daemonset: https://guide.kubecost.com/hc/en-us/articles/4407601830679-Troubleshoot-Install#a-name-node-exporter-a-issue-failedscheduling-kubecost-prometheus-node-exporter
nodeExporter:
## If false, node-exporter will not be installed.
## This is disabled by default in Kubecost 2.0, though it can be enabled as needed.
##
enabled: false
## Provide a full name override for node exporter.
# fullnameOverride: ""
## If true, node-exporter pods share the host network namespace
##
hostNetwork: true
## If true, node-exporter pods share the host PID namespace
##
hostPID: true
## node-exporter dns policy
##
dnsPolicy: ClusterFirstWithHostNet
## node-exporter container name
##
name: node-exporter
## node-exporter container image
##
image:
repository: prom/node-exporter
tag: v1.8.2
pullPolicy: IfNotPresent
## node-exporter priorityClassName
##
priorityClassName: ""
## Custom Update Strategy
##
updateStrategy:
type: RollingUpdate
## Additional node-exporter container arguments
##
extraArgs: {}
## Additional node-exporter hostPath mounts
##
extraHostPathMounts: []
# - name: textfile-dir
# mountPath: /srv/txt_collector
# hostPath: /var/lib/node-exporter
# readOnly: true
# mountPropagation: HostToContainer
extraConfigmapMounts: []
# - name: certs-configmap
# mountPath: /prometheus
# configMap: certs-configmap
# readOnly: true
## Set a custom affinity for node-exporter
##
# affinity:
## Node tolerations for node-exporter scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for node-exporter pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Annotations to be added to node-exporter pods
##
podAnnotations: {}
## Annotations to be added to the node-exporter DaemonSet
##
deploymentAnnotations: {}
## Labels to be added to node-exporter pods
##
pod:
labels: {}
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
##
podDisruptionBudget:
enabled: false
maxUnavailable: 1
## node-exporter resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# limits:
# cpu: 200m
# memory: 50Mi
# requests:
# cpu: 100m
# memory: 30Mi
## Security context to be added to node-exporter pods
##
securityContext: {}
# runAsUser: 0
service:
annotations:
prometheus.io/scrape: "true"
labels: {}
# Exposed as a headless service:
# https://kubernetes.io/docs/concepts/services-networking/service/#headless-services
clusterIP: None
## List of IP addresses at which the node-exporter service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
##
externalIPs: []
hostPort: 9100
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 9100
type: ClusterIP
# Install Prometheus Push Gateway.
pushgateway:
## If false, pushgateway will not be installed
##
enabled: false
## Provide a full name override for Prometheus push gateway.
# fullnameOverride: ""
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
##
# schedulerName:
## pushgateway container name
##
name: pushgateway
## pushgateway container image
##
image:
repository: prom/pushgateway
tag: v1.9.0
pullPolicy: IfNotPresent
## pushgateway priorityClassName
##
priorityClassName: ""
## Additional pushgateway container arguments
##
## for example: persistence.file: /data/pushgateway.data
extraArgs: {}
ingress:
## If true, pushgateway Ingress will be created
##
enabled: false
## pushgateway Ingress annotations
##
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: 'true'
## pushgateway Ingress hostnames with optional path
## Must be provided if Ingress is enabled
##
hosts: []
# - pushgateway.domain.com
# - domain.com/pushgateway
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## pushgateway Ingress TLS configuration
## Secrets must be manually created in the namespace
##
tls: []
# - secretName: prometheus-alerts-tls
# hosts:
# - pushgateway.domain.com
## Node tolerations for pushgateway scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for pushgateway pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}
## Annotations to be added to pushgateway pods
##
podAnnotations: {}
replicaCount: 1
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
##
podDisruptionBudget:
enabled: false
maxUnavailable: 1
## pushgateway resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources: {}
# limits:
# cpu: 10m
# memory: 32Mi
# requests:
# cpu: 10m
# memory: 32Mi
## Security context to be added to push-gateway pods
##
securityContext:
runAsUser: 1001
runAsNonRoot: true
service:
annotations:
prometheus.io/probe: pushgateway
labels: {}
clusterIP: ""
## List of IP addresses at which the pushgateway service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
##
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 9091
type: ClusterIP
strategy:
type: Recreate
rollingUpdate: null
persistentVolume:
## If true, pushgateway will create/use a Persistent Volume Claim
## If false, use emptyDir
##
enabled: true
## pushgateway data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
##
accessModes:
- ReadWriteOnce
## pushgateway data Persistent Volume Claim annotations
##
annotations: {}
## pushgateway data Persistent Volume existing claim name
## Requires pushgateway.persistentVolume.enabled: true
## If defined, PVC must be created manually before volume will be bound
existingClaim: ""
## pushgateway data Persistent Volume mount root path
##
mountPath: /data
## pushgateway data Persistent Volume size
##
size: 2Gi
## pushgateway data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
##
# storageClass: "-"
## pushgateway data Persistent Volume Binding Mode
## If defined, volumeBindingMode: <volumeBindingMode>
## If undefined (the default) or set to null, no volumeBindingMode spec is
## set, choosing the default mode.
##
# volumeBindingMode: ""
## Subdirectory of pushgateway data Persistent Volume to mount
## Useful if the volume's root directory is not empty
##
subPath: ""
serverFiles:
## Alerts configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
alerting_rules.yml: {}
# groups:
# - name: Instances
# rules:
# - alert: InstanceDown
# expr: up == 0
# for: 5m
# labels:
# severity: page
# annotations:
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
# summary: 'Instance {{ $labels.instance }} down'
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
alerts: {}
## Records configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
recording_rules.yml: {}
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
prometheus.yml:
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
## Below two files are DEPRECATED will be removed from this default values file
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
- job_name: 'kubernetes-nodes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# This configuration will work only on kubelet 1.7.3+
# As the scrape endpoints for cAdvisor have changed
# if you are using older version you need to change the replacement to
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
# more info here https://github.com/coreos/prometheus-operator/issues/633
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
metric_relabel_configs:
- source_labels: [__name__]
regex: (container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_receive_errors_total|container_network_transmit_errors_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_memory_usage_bytes|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_periods_total|container_fs_inodes_free|container_fs_inodes_total|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_fs_inodes_free|container_fs_inodes_total|container_fs_usage_bytes|container_fs_limit_bytes|container_spec_cpu_shares|container_spec_memory_limit_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_fs_reads_bytes_total|container_network_receive_bytes_total|container_fs_writes_bytes_total|container_fs_reads_bytes_total|cadvisor_version_info|kubecost_pv_info)
action: keep
- source_labels: [container]
target_label: container_name
regex: (.+)
action: replace
- source_labels: [pod]
target_label: pod_name
regex: (.+)
action: replace
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics
metric_relabel_configs:
- source_labels: [__name__]
regex: (kubelet_volume_stats_used_bytes) # this metric is in alpha
action: keep
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_endpoints_name]
action: keep
regex: (.*node-exporter|kubecost-network-costs)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: kubernetes_node
metric_relabel_configs:
- source_labels: [__name__]
regex: (container_cpu_allocation|container_cpu_usage_seconds_total|container_fs_limit_bytes|container_fs_writes_bytes_total|container_gpu_allocation|container_memory_allocation_bytes|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|DCGM_FI_DEV_GPU_UTIL|deployment_match_labels|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_ready|kube_deployment_spec_replicas|kube_deployment_status_replicas|kube_deployment_status_replicas_available|kube_job_status_failed|kube_namespace_annotations|kube_namespace_labels|kube_node_info|kube_node_labels|kube_node_status_allocatable|kube_node_status_allocatable_cpu_cores|kube_node_status_allocatable_memory_bytes|kube_node_status_capacity|kube_node_status_capacity_cpu_cores|kube_node_status_capacity_memory_bytes|kube_node_status_condition|kube_persistentvolume_capacity_bytes|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_limits_cpu_cores|kube_pod_container_resource_limits_memory_bytes|kube_pod_container_resource_requests|kube_pod_container_resource_requests_cpu_cores|kube_pod_container_resource_requests_memory_bytes|kube_pod_container_status_restarts_total|kube_pod_container_status_running|kube_pod_container_status_terminated_reason|kube_pod_labels|kube_pod_owner|kube_pod_status_phase|kube_replicaset_owner|kube_statefulset_replicas|kube_statefulset_status_replicas|kubecost_cluster_info|kubecost_cluster_management_cost|kubecost_cluster_memory_working_set_bytes|kubecost_load_balancer_cost|kubecost_network_internet_egress_cost|kubecost_network_region_egress_cost|kubecost_network_zone_egress_cost|kubecost_node_is_spot|kubecost_pod_network_egress_bytes_total|node_cpu_hourly_cost|node_cpu_seconds_total|node_disk_reads_completed|node_disk_reads_completed_total|node_disk_writes_completed|node_disk_writes_completed_total|node_filesystem_device_error|node_gpu_count|node_gpu_hourly_cost|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_network_transmit_bytes_total|node_ram_hourly_cost|node_total_hourly_cost|pod_pvc_allocation|pv_hourly_cost|service_selector_labels|statefulSet_match_labels|kubecost_pv_info|up)
action: keep
# prometheus.yml: # Sample block -- enable if using an in cluster durable store.
# remote_write:
# - url: "http://pgprometheus-adapter:9201/write"
# write_relabel_configs:
# - source_labels: [__name__]
# regex: 'container_.*_allocation|container_.*_allocation_bytes|.*_hourly_cost|kube_pod_container_resource_requests{resource="memory", unit="byte"}|container_memory_working_set_bytes|kube_pod_container_resource_requests{resource="cpu", unit="core"}|kube_pod_container_resource_requests|pod_pvc_allocation|kube_namespace_labels|kube_pod_labels'
# action: keep
# queue_config:
# max_samples_per_send: 1000
# remote_read:
# - url: "http://pgprometheus-adapter:9201/read"
rules:
groups:
- name: CPU
rules:
- expr: sum(rate(container_cpu_usage_seconds_total{container!=""}[5m]))
record: cluster:cpu_usage:rate5m
- expr: rate(container_cpu_usage_seconds_total{container!=""}[5m])
record: cluster:cpu_usage_nosum:rate5m
- expr: avg(irate(container_cpu_usage_seconds_total{container!="POD", container!=""}[5m])) by (container,pod,namespace)
record: kubecost_container_cpu_usage_irate
- expr: sum(container_memory_working_set_bytes{container!="POD",container!=""}) by (container,pod,namespace)
record: kubecost_container_memory_working_set_bytes
- expr: sum(container_memory_working_set_bytes{container!="POD",container!=""})
record: kubecost_cluster_memory_working_set_bytes
- name: Savings
rules:
- expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod))
record: kubecost_savings_cpu_allocation
labels:
daemonset: "false"
- expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod)) / sum(kube_node_info)
record: kubecost_savings_cpu_allocation
labels:
daemonset: "true"
- expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod))
record: kubecost_savings_memory_allocation_bytes
labels:
daemonset: "false"
- expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod)) / sum(kube_node_info)
record: kubecost_savings_memory_allocation_bytes
labels:
daemonset: "true"
# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager
# useful in H/A prometheus with different external labels but the same alerts
alertRelabelConfigs:
# alert_relabel_configs:
# - source_labels: [dc]
# regex: (.+)\d+
# target_label: dc
networkPolicy:
## Enable creation of NetworkPolicy resources.
##
enabled: false
## Optional daemonset to more accurately attribute network costs to the correct workload
## https://docs.kubecost.com/install-and-configure/advanced-configuration/network-costs-configuration
networkCosts:
enabled: false
image:
repository: gcr.io/kubecost1/kubecost-network-costs
tag: v0.17.6
imagePullPolicy: IfNotPresent
updateStrategy:
type: RollingUpdate
# For existing Prometheus Installs, use the serviceMonitor: or prometheusScrape below.
# the below setting annotates the networkCost service endpoints for each of the network-costs pods.
# The Service is annotated with prometheus.io/scrape: "true" to automatically get picked up by the prometheus config.
# NOTE: Setting this option to true and leaving the above extraScrapeConfig "job_name: kubecost-networking" configured will cause the
# NOTE: pods to be scraped twice.
prometheusScrape: false
# Traffic Logging will enable logging the top 5 destinations for each source
# every 30 minutes.
trafficLogging: true
logLevel: info
# Port will set both the containerPort and hostPort to this value.
# These must be identical due to network-costs being run on hostNetwork
port: 3001
# this daemonset can use significant resources on large clusters: https://guide.kubecost.com/hc/en-us/articles/4407595973527-Network-Traffic-Cost-Allocation
resources:
limits: # remove the limits by setting cpu: null
cpu: 500m # can be less, will depend on cluster size
# memory: it is not recommended to set a memory limit
requests:
cpu: 50m
memory: 20Mi
extraArgs: []
config:
# Configuration for traffic destinations, including specific classification
# for IPs and CIDR blocks. This configuration will act as an override to the
# automatic classification provided by network-costs.
destinations:
# In Zone contains a list of address/range that will be
# classified as in zone.
in-zone:
# Loopback Addresses in "IANA IPv4 Special-Purpose Address Registry"
- "127.0.0.0/8"
# IPv4 Link Local Address Space
- "169.254.0.0/16"
# Private Address Ranges in RFC-1918
- "10.0.0.0/8" # Remove this entry if using Multi-AZ Kubernetes
- "172.16.0.0/12"
- "192.168.0.0/16"
# In Region contains a list of address/range that will be
# classified as in region. This is synonymous with cross
# zone traffic, where the regions between source and destinations
# are the same, but the zone is different.
in-region: []
# Cross Region contains a list of address/range that will be
# classified as non-internet egress from one region to another.
cross-region: []
# Internet contains a list of address/range that will be
# classified as internet traffic. This is synonymous with traffic
# that cannot be classified within the cluster.
# NOTE: Internet classification filters are executed _after_
# NOTE: direct-classification, but before in-zone, in-region,
# NOTE: and cross-region.
internet: []
# Direct Classification specifically maps an ip address or range
# to a region (required) and/or zone (optional). This classification
# takes priority over in-zone, in-region, and cross-region configurations.
direct-classification: []
# - region: "us-east1"
# zone: "us-east1-c"
# ips:
# - "10.0.0.0/24"
services:
# google-cloud-services: when set to true, enables labeling traffic metrics with google cloud
# service endpoints
google-cloud-services: true
# amazon-web-services: when set to true, enables labeling traffic metrics with amazon web service
# endpoints.
amazon-web-services: true
# azure-cloud-services: when set to true, enables labeling traffic metrics with azure cloud service
# endpoints
azure-cloud-services: true
# user defined services provide a way to define custom service endpoints which will label traffic metrics
# falling within the defined address range.
# services:
# - service: "test-service-1"
# ips:
# - "19.1.1.2"
# - service: "test-service-2"
# ips:
# - "15.128.15.2"
# - "20.0.0.0/8"
## Node tolerations for server scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
##
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
affinity: {}
service:
annotations: {}
labels: {}
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
## PodMonitor
## Allows scraping of network metrics from a dedicated prometheus operator setup
podMonitor:
enabled: false
additionalLabels: {}
# match the default extraScrapeConfig
additionalLabels: {}
nodeSelector: {}
annotations: {}
healthCheckProbes: {}
# readinessProbe:
# tcpSocket:
# port: 3001
# initialDelaySeconds: 5
# periodSeconds: 10
# failureThreshold: 5
# livenessProbe:
# tcpSocket:
# port: 3001
# initialDelaySeconds: 5
# periodSeconds: 10
# failureThreshold: 5
additionalSecurityContext: {}
# readOnlyRootFilesystem: true
## Kubecost Deployment Configuration
## Used for HA mode in Business & Enterprise tier
##
kubecostDeployment:
replicas: 1
# deploymentStrategy:
# rollingUpdate:
# maxSurge: 1
# maxUnavailable: 1
# type: RollingUpdate
labels: {}
annotations: {}
## Kubecost Forecasting forecasts future cost patterns based on historical
## patterns observed by Kubecost.
forecasting:
enabled: true
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for the forecasting
# container.
# Example: fullImageName: gcr.io/kubecost1/forecasting:v0.0.1
fullImageName: gcr.io/kubecost1/kubecost-modeling:v0.1.16
imagePullPolicy: IfNotPresent
# Resource specification block for the forecasting container.
resources:
requests:
cpu: 200m
memory: 300Mi
limits:
cpu: 1500m
memory: 1Gi
# Set environment variables for the forecasting container as key/value pairs.
env:
# -t is the worker timeout which primarily affects model training time;
# if it is not high enough, training workers may die mid training
"GUNICORN_CMD_ARGS": "--log-level info -t 1200"
# Define a priority class for the forecasting Deployment.
priority:
enabled: false
name: ""
# Define a nodeSelector for the forecasting Deployment.
nodeSelector: {}
# Define tolerations for the forecasting Deployment.
tolerations: []
# Define Pod affinity for the forecasting Deployment.
affinity: {}
# Define a readiness probe for the forecasting container
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
# Define a liveness probe for the forecasting container.
livenessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## The Kubecost Aggregator is the primary query backend for Kubecost
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl/aggregator
##
kubecostAggregator:
# deployMethod determines how Aggregator is deployed. Current options are
# "singlepod" (within cost-analyzer Pod) "statefulset" (separate
# StatefulSet), and "disabled". Only use "disabled" if this is a secondary
# Federated ETL cluster which does not need to answer queries.
deployMethod: singlepod
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for aggregator.
# fullImageName:
imagePullPolicy: IfNotPresent
# For legacy configuration support, `enabled: true` overrides deployMethod
# and causes `deployMethod: "statefulset"`
enabled: false
# Replicas sets the number of Aggregator replicas. It only has an effect if
# `deployMethod: "statefulset"`
replicas: 1
logLevel: info
# stagingEmptyDirSizeLimit changes how large the "staging"
# /var/configs/waterfowl emptyDir is. It only takes effect in StatefulSet
# configurations of Aggregator, other configurations are unaffected.
#
# It should be set to approximately 8x the size of the largest bingen file in
# object storage. For example, if your largest bingen file is a daily
# Allocation file with size 300MiB, this value should be set to approximately
# 2400Mi. In most environments, the default should suffice.
stagingEmptyDirSizeLimit: 2Gi
# this is the number of partitions the datastore is split into for copying
# the higher this number, the lower the ram usage but the longer it takes for
# new data to show in the kubecost UI
# set to 0 for max partitioning (minimum possible ram usage, but the slowest)
# the default of 25 is sufficient for 95%+ of users. This should only be modified
# after consulting with Kubecost's support team
numDBCopyPartitions: 25
# How many threads the read database is configured with (i.e. Kubecost API /
# UI queries). If increasing this value, it is recommended to increase the
# aggregator's memory requests & limits.
# default: 1
dbReadThreads: 1
# How many threads the write database is configured with (i.e. ingestion of
# new data from S3). If increasing this value, it is recommended to increase
# the aggregator's memory requests & limits.
# default: 1
dbWriteThreads: 1
# How many threads to use when ingesting Asset/Allocation/CloudCost data
# from the federated store bucket. In most cases the default is sufficient,
# but can be increased if trying to backfill historical data.
# default: 1
dbConcurrentIngestionCount: 1
# Memory limit applied to read database and write database connections. The
# default of "no limit" is appropriate when first establishing a baseline of
# resource usage required. It is eventually recommended to set these values
# such that dbMemoryLimit + dbWriteMemoryLimit < the total memory available
# to the aggregator pod.
# default: 0GB is no limit
dbMemoryLimit: 0GB
dbWriteMemoryLimit: 0GB
# How much data to ingest from the federated store bucket, and how much data
# to keep in the DB before rolling the data off.
#
# Note: If increasing this value to backfill historical data, it will take
# time to gradually ingest and process those historical ETL files. Consider
# also increasing the resources available to the aggregator as well as the
# refresh and concurrency env vars.
#
# default: 91
etlDailyStoreDurationDays: 91
# How much hourly data to ingest from the federated store bucket, and how much
# to keep in the DB before rolling the data off.
#
# In high scale environments setting this to `0` can improve performance if hourly
# resolution is not a requirement.
#
# default: 49
etlHourlyStoreDurationHours: 49
# How much container resource usage data to retain in the DB, in terms of days.
#
# In high scale environments setting this to `0` can improve performance if hourly
# resolution is not a requirement.
#
# default: 1
containerResourceUsageRetentionDays: 1
# Trim memory on close, only change if advised by Kubecost support.
dbTrimMemoryOnClose: true
persistentConfigsStorage:
storageClass: "" # default storage class
storageRequest: 1Gi
aggregatorDbStorage:
storageClass: "" # default storage class
storageRequest: 128Gi
resources: {}
# requests:
# cpu: 1000m
# memory: 1Gi
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## Set additional environment variables for the aggregator pod
# extraEnv:
# - name: SOME_VARIABLE
# value: "some_value"
## Add a priority class to the aggregator pod
# priority:
# enabled: false
# name: ""
## Optional - add extra ports to the aggregator container. For kubecost development purposes only - not recommended for users.
# extraPorts: []
# - name: debug
# port: 40000
# targetPort: 40000
# containerPort: 40000
## Define a securityContext for the aggregator pod. This will take highest precedence.
# securityContext: {}
## Define the container-level security context for the aggregator pod. This will take highest precedence.
# containerSecurityContext: {}
## Provide a Service Account name for aggregator.
# serviceAccountName: ""
## Define a nodeSelector for the aggregator pod
# nodeSelector: {}
## Define tolerations for the aggregator pod
# tolerations: []
## Define Pod affinity for the aggregator pod
# affinity: {}
## Define extra volumes for the aggregator pod
# extraVolumes: []
## Define extra volumemounts for the aggregator pod
# extraVolumeMounts: []
## Creates a new container/pod to retrieve CloudCost data. By default it uses
## the same serviceaccount as the cost-analyzer pod. A custom serviceaccount
## can be specified.
cloudCost:
# The cloudCost component of Aggregator depends on
# kubecostAggregator.deployMethod:
# kA.dM = "singlepod" -> cloudCost is run as container inside cost-analyzer
# kA.dM = "statefulset" -> cloudCost is run as single-replica Deployment
resources: {}
# requests:
# cpu: 1000m
# memory: 1Gi
# refreshRateHours:
# queryWindowDays:
# runWindowDays:
# serviceAccountName:
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## Add a nodeSelector for aggregator cloud costs
# nodeSelector: {}
## Tolerations for the aggregator cloud costs
# tolerations: []
## Affinity for the aggregator cloud costs
# affinity: {}
## ServiceAccount for the aggregator cloud costs
# serviceAccountName: ""
## Define environment variables for cloud cost
# env: {}
## Define extra volumes for the cloud cost pod
# extraVolumes: []
## Define extra volumemounts for the cloud cost pod
# extraVolumeMounts: []
## Configure the Collections service for aggregator.
# collections:
# cache:
# enabled: false
# Jaeger is an optional container attached to wherever the Aggregator
# container is running. It is used for performance investigation. Enable if
# Kubecost Support asks.
jaeger:
enabled: false
image: jaegertracing/all-in-one
imageVersion: latest
# containerSecurityContext:
service:
labels: {}
## Kubecost Multi-cluster Diagnostics (beta)
## A single view into the health of all agent clusters. Each agent cluster sends
## its diagnostic data to a storage bucket. Future versions may include
## repairing & alerting from the primary.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/multi-cluster-diagnostics
##
diagnostics:
enabled: true
## The primary aggregates all diagnostic data and handles API requests. It's
## also responsible for deleting diagnostic data (on disk & bucket) beyond
## retention. When in readonly mode it does not push its own diagnostic data
## to the bucket.
primary:
enabled: false
retention: "7d"
readonly: false
## How frequently to run & push diagnostics. Defaults to 5 minutes.
pollingInterval: "300s"
## Creates a new Diagnostic file in the bucket for every run.
keepDiagnosticHistory: false
## Pushes the cluster's Kubecost Helm Values to the bucket once upon startup.
## This may contain sensitive information and is roughly 30kb per cluster.
collectHelmValues: false
## By default, the Multi-cluster Diagnostics service runs within the
## cost-model container in the cost-analyzer pod. For higher availability, it
## can be run as a separate deployment.
deployment:
enabled: false
resources:
requests:
cpu: "10m"
memory: "20Mi"
env: {}
labels: {}
securityContext: {}
containerSecurityContext: {}
nodeSelector: {}
tolerations: []
affinity: {}
## Provide a full name override for the diagnostics Deployment.
# diagnosticsFullnameOverride: ""
# Kubecost Cluster Controller for Right Sizing and Cluster Turndown
clusterController:
enabled: false
image:
repository: gcr.io/kubecost1/cluster-controller
tag: v0.16.9
imagePullPolicy: IfNotPresent
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
# Set custom tolerations for the cluster controller.
tolerations: []
actionConfigs:
# this configures the Kubecost Cluster Turndown action
# for more details, see documentation at https://github.com/kubecost/cluster-turndown/tree/develop?tab=readme-ov-file#setting-a-turndown-schedule
clusterTurndown: []
# - name: my-schedule
# start: "2024-02-09T00:00:00Z"
# end: "2024-02-09T12:00:00Z"
# repeat: daily
# - name: my-schedule2
# start: "2024-02-09T00:00:00Z"
# end: "2024-02-09T01:00:00Z"
# repeat: weekly
# this configures the Kubecost Namespace Turndown action
# for more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#namespace-turndown
namespaceTurndown:
# - name: my-ns-turndown-action
# dryRun: false
# schedule: "0 0 * * *"
# type: Scheduled
# targetObjs:
# - namespace
# keepPatterns:
# - ignorednamespace
# keepLabels:
# turndown: ignore
# params:
# minNamespaceAge: 4h
# this configures the Kubecost Cluster Sizing action
# for more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#cluster-sizing
clusterRightsize:
# startTime: '2024-01-02T15:04:05Z'
# frequencyMinutes: 1440
# lastCompleted: ''
# recommendationParams:
# window: 48h
# architecture: ''
# targetUtilization: 0.8
# minNodeCount: 1
# allowSharedCore: false
# allowCostIncrease: false
# recommendationType: ''
# This configures the Kubecost Continuous Request Sizing Action
#
# Using this configuration overrides annotation-based configuration of
# Continuous Request Sizing. Annotation configuration will be ignored while
# this configuration method is present in the cluster.
#
# For more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#automated-request-sizing
containerRightsize:
# Workloads can be selected by an _exact_ key (namespace, controllerKind,
# controllerName). This will only match a single controller. The cluster
# ID is current irrelevant because Cluster Controller can only modify
# workloads within the cluster it is running in.
# workloads:
# - clusterID: cluster-one
# namespace: my-namespace
# controllerKind: deployment
# controllerName: my-controller
# An alternative to exact key selection is filter selection. The filters
# are syntactically identical to Kubecost's "v2" filters [1] but only
# support a small set of filter fields, those being:
# - namespace
# - controllerKind
# - controllerName
# - label
# - annotation
#
# If multiple filters are listed, they will be ORed together at the top
# level.
#
# See the examples below.
#
# [1] https://docs.kubecost.com/apis/filters-api
# filterConfig:
# - filter: |
# namespace:"abc"+controllerKind:"deployment"
# - filter: |
# controllerName:"abc123"+controllerKind:"daemonset"
# - filter: |
# namespace:"foo"+controllerKind!:"statefulset"
# - filter: |
# namespace:"bar","baz"
# schedule:
# start: "2024-01-30T15:04:05Z"
# frequencyMinutes: 5
# recommendationQueryWindow: "48h"
# lastModified: ''
# targetUtilizationCPU: 0.8 # results in a cpu request setting that is 20% higher than the max seen over last 48h
# targetUtilizationMemory: 0.8 # results in a RAM request setting that is 20% higher than the max seen over last 48h
kubescaler:
# If true, will cause all (supported) workloads to be have their requests
# automatically right-sized on a regular basis.
defaultResizeAll: false
# fqdn: kubecost-cluster-controller.kubecost.svc.cluster.local:9731
namespaceTurndown:
rbac:
enabled: true
reporting:
# Kubecost bug report feature: Logs access/collection limited to .Release.Namespace
# Ref: http://docs.kubecost.com/bug-report
logCollection: true
# Basic frontend analytics
productAnalytics: true
# Report Javascript errors
errorReporting: true
valuesReporting: true
# googleAnalyticsTag allows you to embed your Google Global Site Tag to track usage of Kubecost.
# googleAnalyticsTag is only included in our Enterprise offering.
# googleAnalyticsTag: G-XXXXXXXXX
serviceMonitor: # the kubecost included prometheus uses scrapeConfigs and does not support service monitors. The following options assume an existing prometheus that supports serviceMonitors.
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings: []
networkCosts:
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings: []
aggregatorMetrics:
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_namespace
targetLabel: namespace
prometheusRule:
enabled: false
additionalLabels: {}
supportNFS: false
# initChownDataImage ensures all Kubecost filepath permissions on PV or local storage are set up correctly.
initChownDataImage: "busybox" # Supports a fully qualified Docker image, e.g. registry.hub.docker.com/library/busybox:latest
initChownData:
resources: {}
# requests:
# cpu: "50m"
# memory: "20Mi"
grafana:
# namespace_datasources: kubecost # override the default namespace here
# namespace_dashboards: kubecost # override the default namespace here
rbac:
create: true
serviceAccount:
create: true
name: ""
## Provide a full name override for the Grafana Deployment.
# fullnameOverride: ""
## Provide a name override for the Grafana Deployment.
# nameOverride: ""
## Configure grafana datasources
## ref: http://docs.grafana.org/administration/provisioning/#datasources
##
# datasources:
# datasources.yaml:
# apiVersion: 1
# datasources:
# - name: prometheus-kubecost
# type: prometheus
# url: http://kubecost-prometheus-server.kubecost.svc.cluster.local
# access: proxy
# isDefault: false
# jsonData:
# httpMethod: POST
# prometheusType: Prometheus
# prometheusVersion: 2.35.0
# timeInterval: 1m
## Number of replicas for the Grafana deployment
replicas: 1
## Deployment strategy for the Grafana deployment
deploymentStrategy: RollingUpdate
## Readiness probe for the Grafana deployment
readinessProbe:
httpGet:
path: /api/health
port: 3000
## Liveness probe for the Grafana deployment
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 60
timeoutSeconds: 30
failureThreshold: 10
## Container image settings for the Grafana deployment
image:
repository: cgr.dev/chainguard/grafana
tag: latest
pullPolicy: IfNotPresent
## Optionally specify an array of imagePullSecrets.
## Secrets must be manually created in the namespace.
# pullSecrets:
# - myRegistrKeySecretName
## Pod-level security context for the Grafana deployment. Recommended let global defaults take effect.
securityContext: {}
# runAsUser: 472
# fsGroup: 472
## PriorityClassName for the Grafana deployment
priorityClassName: ""
## Container image settings for Grafana initContainer used to download dashboards. Will only be used when dashboards are present.
downloadDashboardsImage:
repository: curlimages/curl
tag: latest
pullPolicy: IfNotPresent
## Pod Annotations for the Grafana deployment
podAnnotations: {}
## Deployment annotations for the Grafana deployment
annotations: {}
## Expose the Grafana service to be accessed from outside the cluster (LoadBalancer service).
## or access it from within the cluster (ClusterIP service). Set the service type and the port to serve it.
service:
type: ClusterIP
port: 80
annotations: {}
labels: {}
## This template is not needed and is not supported.
## It is here for backwards compatibility.
## Kubecost exposes grafana by default with the
## top level ingress template under /grafana/
ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
labels: {}
path: /
pathType: Prefix
hosts:
- chart-example.local
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
## Resource requests and limits for the Grafana deployment
resources: {}
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
## Node labels for pod assignment of the Grafana deployment
nodeSelector: {}
## Tolerations for pod assignment of the Grafana deployment
tolerations: []
## Affinity for pod assignment of the Grafana deployment
affinity: {}
## Enable persistence using Persistent Volume Claims of the Grafana deployment
persistence:
enabled: false
# storageClassName: default
# accessModes:
# - ReadWriteOnce
# size: 10Gi
# annotations: {}
# subPath: ""
# existingClaim:
## Admin user for Grafana
adminUser: admin
## Admin password for Grafana
adminPassword: strongpassword
## Use an alternate scheduler for the Grafana deployment
# schedulerName:
## Extra environment variables that will be passed onto Grafana deployment pods
env: {}
## The name of a secret for Grafana in the same Kubernetes namespace which contain values to be added to the environment
## This can be useful for auth tokens, etc
envFromSecret: ""
## Additional Grafana server secret mounts
## Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: []
# - name: secret-files
# mountPath: /etc/secrets
# secretName: grafana-secret-files
# readOnly: true
## List of Grafana plugins
plugins: []
# - digrich-bubblechart-panel
# - grafana-clock-panel
## Grafana dashboard providers
## ref: http://docs.grafana.org/administration/provisioning/#dashboards
##
## `path` must be /var/lib/grafana/dashboards/<provider_name>
##
dashboardProviders: {}
# dashboardproviders.yaml:
# apiVersion: 1
# providers:
# - name: 'default'
# orgId: 1
# folder: ''
# type: file
# disableDeletion: false
# editable: true
# options:
# path: /var/lib/grafana/dashboards/default
## Configure Grafana dashboard to import
## NOTE: To use dashboards you must also enable/configure dashboardProviders
## ref: https://grafana.com/dashboards
##
## dashboards per provider, use provider name as key.
##
dashboards: {}
# default:
# prometheus-stats:
# gnetId: 3662
# revision: 2
# datasource: Prometheus
## Reference to external Grafana ConfigMap per provider. Use provider name as key and ConfiMap name as value.
## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both.
## ConfigMap data example:
##
## data:
## example-dashboard.json: |
## RAW_JSON
##
dashboardsConfigMaps: {}
# default: ""
## LDAP Authentication for Grafana can be enabled with the following values on grafana.ini
## NOTE: Grafana will fail to start if the value for ldap.toml is invalid
# auth.ldap:
# enabled: true
# allow_sign_up: true
# config_file: /etc/grafana/ldap.toml
## Grafana's LDAP configuration
## Templated by the template in _helpers.tpl
## NOTE: To enable the grafana.ini must be configured with auth.ldap.enabled
## ref: http://docs.grafana.org/installation/configuration/#auth-ldap
## ref: http://docs.grafana.org/installation/ldap/#configuration
ldap:
# `existingSecret` is a reference to an existing secret containing the ldap configuration
# for Grafana in a key `ldap-toml`.
existingSecret: ""
# `config` is the content of `ldap.toml` that will be stored in the created secret
config: ""
# config: |-
# verbose_logging = true
# [[servers]]
# host = "my-ldap-server"
# port = 636
# use_ssl = true
# start_tls = false
# ssl_skip_verify = false
# bind_dn = "uid=%s,ou=users,dc=myorg,dc=com"
## Grafana's SMTP configuration
## NOTE: To enable, grafana.ini must be configured with smtp.enabled
## ref: http://docs.grafana.org/installation/configuration/#smtp
smtp:
# `existingSecret` is a reference to an existing secret containing the smtp configuration
# for Grafana in keys `user` and `password`.
existingSecret: ""
## Grafana sidecars that collect the configmaps with specified label and stores the included files them into the respective folders
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image:
repository: cgr.dev/chainguard/k8s-sidecar
tag: latest
pullPolicy: IfNotPresent
resources: {}
dashboards:
enabled: true
# label that the configmaps with dashboards are marked with
label: grafana_dashboard
labelValue: "1"
# set sidecar ERROR_THROTTLE_SLEEP env var from default 5s to 0s -> fixes https://github.com/kubecost/cost-analyzer-helm-chart/issues/877
annotations: {}
error_throttle_sleep: 0
folder: /tmp/dashboards
datasources:
# dataSourceFilename: foo.yml # If you need to change the name of the datasource file
enabled: false
error_throttle_sleep: 0
# label that the configmaps with datasources are marked with
label: grafana_datasource
## Grafana's primary configuration
## NOTE: values in map will be converted to ini format
## ref: http://docs.grafana.org/installation/configuration/
##
## For grafana to be accessible, add the path to root_url. For example, if you run kubecost at www.foo.com:9090/kubecost
## set root_url to "%(protocol)s://%(domain)s:%(http_port)s/kubecost/grafana". No change is necessary here if kubecost runs at a root URL
grafana.ini:
server:
serve_from_sub_path: false # Set to false on Grafana v10+
root_url: "%(protocol)s://%(domain)s:%(http_port)s/grafana"
paths:
data: /var/lib/grafana/data
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
analytics:
check_for_updates: true
log:
mode: console
grafana_net:
url: https://grafana.net
auth.anonymous:
enabled: true
org_role: Editor
org_name: Main Org.
serviceAccount:
create: true # Set this to false if you're bringing your own service account.
annotations: {}
# name: kc-test
awsstore:
useAwsStore: false
imageNameAndVersion: gcr.io/kubecost1/awsstore:latest # Name and version of the container image for AWSStore.
createServiceAccount: false
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
# Use a custom nodeSelector for AWSStore.
nodeSelector: {}
# kubernetes.io/arch: amd64
## Annotations for the AWSStore ServiceAccount.
annotations: {}
## Federated ETL Architecture
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl
##
federatedETL:
## If true, installs the minimal set of components required for a Federated ETL cluster.
agentOnly: false
## If true, push ETL data to the federated storage bucket
federatedCluster: false
## If true, this cluster will be able to read from the federated-store but will
## not write to it. This is useful in situations when you want to deploy a
## primary cluster, but don't want the primary cluster's ETL data to be
## pushed to the bucket
readOnlyPrimary: false
## If true, changes the dir of S3 backup to the Federated combined store.
## Commonly used when transitioning from Thanos to Federated ETL architecture.
redirectS3Backup: false
## If true, will query metrics from a central PromQL DB (e.g. Amazon Managed
## Prometheus)
useMultiClusterDB: false
## Kubecost Admission Controller (beta feature)
## To use this feature, ensure you have run the `create-admission-controller.sh`
## script. This generates a k8s secret with TLS keys/certificats and a
## corresponding CA bundle.
##
kubecostAdmissionController:
enabled: false
secretName: webhook-server-tls
caBundle: ${CA_BUNDLE}
# Enables or disables the Cost Event Audit pipeline, which tracks recent changes at cluster level
# and provides an estimated cost impact via the Kubecost Predict API.
#
# It is disabled by default to avoid problems in high-scale environments.
costEventsAudit:
enabled: false
## Disable updates to kubecost from the frontend UI and via POST request
## This feature is considered beta, entrprise users should use teams:
## https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/teams
# readonly: false
# # These configs can also be set from the Settings page in the Kubecost product
# # UI. Values in this block override config changes in the Settings UI on pod
# # restart
# kubecostProductConfigs:
# # An optional list of cluster definitions that can be added for frontend
# # access. The local cluster is *always* included by default, so this list is
# # for non-local clusters.
# clusters:
# - name: "Cluster A"
# address: http://cluster-a.kubecost.com:9090
# # Optional authentication credentials - only basic auth is currently supported.
# auth:
# type: basic
# # Secret name should be a secret formatted based on: https://github.com/kubecost/docs/blob/main/ingress-examples.md
# secretName: cluster-a-auth
# # Or pass auth directly as base64 encoded user:pass
# data: YWRtaW46YWRtaW4=
# # Or user and pass directly
# user: admin
# pass: admin
# - name: "Cluster B"
# address: http://cluster-b.kubecost.com:9090
# # Enabling customPricesEnabled and defaultModelPricing instructs Kubecost to
# # use these custom monthly resource prices when reporting node costs. Note,
# # that the below configuration is for the monthly cost of the resource.
# # Kubecost considers there to be 730 hours in a month. Also note, that these
# # configurations will have no effect on metrics emitted such as
# # `node_ram_hourly_cost` or `node_cpu_hourly_cost`.
# # Ref: https://docs.kubecost.com/install-and-configure/install/provider-installations/air-gapped
# customPricesEnabled: false
# defaultModelPricing:
# enabled: true
# CPU: "28.0"
# spotCPU: "4.86"
# RAM: "3.09"
# spotRAM: "0.65"
# GPU: "693.50"
# spotGPU: "225.0"
# storage: "0.04"
# zoneNetworkEgress: "0.01"
# regionNetworkEgress: "0.01"
# internetNetworkEgress: "0.12"
# # The cluster profile represents a predefined set of parameters to use when calculating savings.
# # Possible values are: [ development, production, high-availability ]
# clusterProfile: production
# spotLabel: lifecycle
# spotLabelValue: Ec2Spot
# gpuLabel: gpu
# gpuLabelValue: true
# alibabaServiceKeyName: ""
# alibabaServiceKeyPassword: ""
# awsServiceKeyName: ACCESSKEYID
# awsServiceKeyPassword: fakepassword # Only use if your values.yaml are stored encrypted. Otherwise provide an existing secret via serviceKeySecretName
# awsSpotDataRegion: us-east-1
# awsSpotDataBucket: spot-data-feed-s3-bucket
# awsSpotDataPrefix: dev
# athenaProjectID: "530337586277" # The AWS AccountID where the Athena CUR is. Generally your masterpayer account
# athenaBucketName: "s3://aws-athena-query-results-530337586277-us-east-1"
# athenaRegion: us-east-1
# athenaDatabase: athenacurcfn_athena_test1
# athenaTable: "athena_test1"
# athenaWorkgroup: "primary" # The default workgroup in AWS is 'primary'
# masterPayerARN: ""
# projectID: "123456789" # Also known as AccountID on AWS -- the current account/project that this instance of Kubecost is deployed on.
# gcpSecretName: gcp-secret # Name of a secret representing the gcp service key
# gcpSecretKeyName: compute-viewer-kubecost-key.json # Name of the secret's key containing the gcp service key
# bigQueryBillingDataDataset: billing_data.gcp_billing_export_v1_01AC9F_74CF1D_5565A2
# labelMappingConfigs: # names of k8s labels or annotations used to designate different allocation concepts
# enabled: true
# owner_label: "owner"
# team_label: "team"
# department_label: "dept"
# product_label: "product"
# environment_label: "env"
# namespace_external_label: "kubernetes_namespace" # external labels/tags are used to map external cloud costs to kubernetes concepts
# cluster_external_label: "kubernetes_cluster"
# controller_external_label: "kubernetes_controller"
# product_external_label: "kubernetes_label_app"
# service_external_label: "kubernetes_service"
# deployment_external_label: "kubernetes_deployment"
# owner_external_label: "kubernetes_label_owner"
# team_external_label: "kubernetes_label_team"
# environment_external_label: "kubernetes_label_env"
# department_external_label: "kubernetes_label_department"
# statefulset_external_label: "kubernetes_statefulset"
# daemonset_external_label: "kubernetes_daemonset"
# pod_external_label: "kubernetes_pod"
# grafanaURL: ""
# # Provide a mapping from Account ID to a readable Account Name in a key/value object. Provide Account IDs as they are displayed in CloudCost
# # as the 'key' and the Account Name associated with it as the 'value'
# cloudAccountMapping:
# EXAMPLE_ACCOUNT_ID: EXAMPLE_ACCOUNT_NAME
# clusterName: "" # clusterName is the default context name in settings.
# clusterAccountID: "" # Manually set Account property for assets
# currencyCode: "USD" # official support for USD, AUD, BRL, CAD, CHF, CNY, DKK, EUR, GBP, IDR, INR, JPY, NOK, PLN, SEK
# azureBillingRegion: US # Represents 2-letter region code, e.g. West Europe = NL, Canada = CA. ref: https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
# azureSubscriptionID: 0bd50fdf-c923-4e1e-850c-196dd3dcc5d3
# azureClientID: f2ef6f7d-71fb-47c8-b766-8d63a19db017
# azureTenantID: 72faf3ff-7a3f-4597-b0d9-7b0b201bb23a
# azureClientPassword: fake key # Only use if your values.yaml are stored encrypted. Otherwise provide an existing secret via serviceKeySecretName
# azureOfferDurableID: "MS-AZR-0003p"
# discount: "" # percentage discount applied to compute
# negotiatedDiscount: "" # custom negotiated cloud provider discount
# standardDiscount: "" # custom negotiated cloud provider discount, applied to all incoming asset compute costs in a federated environment. Overrides negotiatedDiscount on any cluster in the federated environment.
# defaultIdle: false
# serviceKeySecretName: "" # Use an existing AWS or Azure secret with format as in aws-service-key-secret.yaml or azure-service-key-secret.yaml. Leave blank if using createServiceKeySecret
# createServiceKeySecret: true # Creates a secret representing your cloud service key based on data in values.yaml. If you are storing unencrypted values, add a secret manually
# sharedNamespaces: "" # namespaces with shared workloads, example value: "kube-system\,ingress-nginx\,kubecost\,monitoring"
# sharedOverhead: "" # value representing a fixed external cost per month to be distributed among aggregations.
# shareTenancyCosts: true # enable or disable sharing costs such as cluster management fees (defaults to "true" on Settings page)
# metricsConfigs: # configuration for metrics emitted by Kubecost
# disabledMetrics: [] # list of metrics that Kubecost will not emit. Note that disabling metrics can lead to unexpected behavior in the cost-model.
# productKey: # Apply enterprise product license
# enabled: false
# key: ""
# secretname: productkeysecret # Reference an existing k8s secret created from a file named productkey.json of format { "key": "enterprise-key-here" }. If the secretname is specified, a configmap with the key will not be created.
# mountPath: "/some/custom/path/productkey.json" # (use instead of secretname) Declare the path at which the product key file is mounted (eg. by a secrets provisioner). The file must be of format { "key": "enterprise-key-here" }.
# # The following block enables the use of a custom SMTP server which overrides Kubecost's built-in, external SMTP server for alerts and reports
# smtp:
# config: |
# {
# "sender_email": "",
# "host": "",
# "port": 587,
# "authentication": true,
# "username": "",
# "password": "",
# "secure": true
# }
# secretname: smtpconfigsecret # Reference an existing k8s secret created from a file named smtp.json of format specified by config above. If the secretname is specified, a configmap with the key will not be created.
# mountPath: "/some/custom/path/smtp.json" # (use instead of secretname) Declare the path at which the SMTP config file is mounted (eg. by a secrets provisioner). The file must be of format specified by config above.
# carbonEstimates: false # Enables Kubecost beta carbon estimation endpoints /assets/carbon and /allocations/carbon
# The below options to hide UI elements are only supported in Enterprise
# hideDiagnostics: false # useful if the primary is not monitored. Supported in limited environments.
# hideOrphanedResources: false # OrphanedResources works on the primary-cluster's cloud-provider only.
# hideKubecostActions: false
# hideReservedInstances: false
# hideSpotCommander: false
# hideUnclaimedVolumes: false
# hideCloudIntegrationsUI: false
# hideBellIcon: false
# hideTeams: false
# savingsRecommendationsAllowLists: # Define select list of instance types to be evaluated in computing Savings Recommendations
# AWS: []
# GCP: []
# Azure: []
## Specify an existing Kubernetes Secret holding the cloud integration information. This Secret must contain
## a key with name `cloud-integration.json` and the contents must be in a specific format. It is expected
## to exist in the release Namespace. This is mutually exclusive with cloudIntegrationJSON where only one must be defined.
# cloudIntegrationSecret: "cloud-integration"
## Specify the cloud integration information in JSON form if pointing to an existing Secret is not desired or you'd rather
## define the cloud integration information directly in the values file. This will result in a new Secret being created
## named `cloud-integration` in the release Namespace. It is mutually exclusive with the cloudIntegrationSecret where only one must be defined.
# cloudIntegrationJSON: |-
# {
# "aws": [
# {
# "athenaBucketName": "s3://AWS_cloud_integration_athenaBucketName",
# "athenaRegion": "AWS_cloud_integration_athenaRegion",
# "athenaDatabase": "AWS_cloud_integration_athenaDatabase",
# "athenaTable": "AWS_cloud_integration_athenaBucketName",
# "projectID": "AWS_cloud_integration_athena_projectID",
# "serviceKeyName": "AWS_cloud_integration_athena_serviceKeyName",
# "serviceKeySecret": "AWS_cloud_integration_athena_serviceKeySecret"
# }
# ],
# "azure": [
# {
# "azureSubscriptionID": "my-subscription-id",
# "azureStorageAccount": "my-storage-account",
# "azureStorageAccessKey": "my-storage-access-key",
# "azureStorageContainer": "my-storage-container"
# }
# ],
# "gcp": [
# {
# "projectID": "my-project-id",
# "billingDataDataset": "detailedbilling.my-billing-dataset",
# "key": {
# "type": "service_account",
# "project_id": "my-project-id",
# "private_key_id": "my-private-key-id",
# "private_key": "my-pem-encoded-private-key",
# "client_email": "my-service-account-name@my-project-id.iam.gserviceaccount.com",
# "client_id": "my-client-id",
# "auth_uri": "auth-uri",
# "token_uri": "token-uri",
# "auth_provider_x509_cert_url": "my-x509-provider-cert",
# "client_x509_cert_url": "my-x509-cert-url"
# }
# }
# ]
# }
# ingestPodUID: false # Enables using UIDs to uniquely ID pods. This requires either Kubecost's replicated KSM metrics, or KSM v2.1.0+. This may impact performance, and changes the default cost-model allocation behavior.
# regionOverrides: "region1,region2,region3" # list of regions which will override default costmodel provider regions
# Explicit names of various ConfigMaps to use. If not set, a default will apply.
# pricingConfigmapName: ""
# productConfigmapName: ""
# smtpConfigmapName: ""
# -- Array of extra K8s manifests to deploy
## Note: Supports use of custom Helm templates
extraObjects: []
# Cloud Billing Integration:
# - apiVersion: v1
# kind: Secret
# metadata:
# name: cloud-integration
# namespace: kubecost
# type: Opaque
# data:
# cloud-integration.json: BASE64_SECRET
# Istio:
# - apiVersion: networking.istio.io/v1alpha3
# kind: VirtualService
# metadata:
# name: my-virtualservice
# spec:
# hosts:
# - kubecost.myorg.com
# gateways:
# - my-gateway
# http:
# - route:
# - destination:
# host: kubecost.kubecost.svc.cluster.local
# port:
# number: 80