# zone: cluster.local (use only if your DNS server doesn't live in the same zone as kubecost)
enabled: true # Kubecost depends on Prometheus data, it is not optional. When enabled: false, Prometheus will not be installed and you must configure your own Prometheus to scrape kubecost as well as provide the fqdn below. -- Warning: Before changing this setting, please read to understand the risks https://docs.kubecost.com/install-and-configure/install/custom-prom
fqdn: http://cost-analyzer-prometheus-server.default.svc # example address of a prometheus to connect to. Include protocol (http:// or https://) Ignored if enabled: true
# insecureSkipVerify: false # If true, kubecost will not check the TLS cert of prometheus
# queryServiceBasicAuthSecretName: dbsecret # kubectl create secret generic dbsecret -n kubecost --from-file=USERNAME --from-file=PASSWORD
# queryServiceBearerTokenSecretName: mcdbsecret # kubectl create secret generic mcdbsecret -n kubecost --from-file=TOKEN
enabled: true # If false, Grafana will not be installed
domainName: cost-analyzer-grafana.default.svc # example grafana domain Ignored if enabled: true
scheme: "http" # http or https, for the domain name above.
proxy: true # If true, the kubecost frontend will route to your grafana through its service endpoint
# fqdn: cost-analyzer-grafana.default.svc
# Enable only when you are using GCP Marketplace ENT listing. Learn more at https://console.cloud.google.com/marketplace/product/kubecost-public/kubecost-ent
enabled: false
# Google Cloud Managed Service for Prometheus
# Remember to set up these parameters when install the Kubecost Helm chart with `global.gmp.enabled=true` if you want to use GMP self-deployed collection (Recommended) to utilize Kubecost scrape configs.
# If enabling GMP, it is highly recommended to utilize Google's distribution of Prometheus.
# Learn more at https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-unmanaged
# --set prometheus.server.image.repository="gke.gcr.io/prometheus-engine/prometheus" \
# --set prometheus.server.image.tag="v2.35.0-gmp.2-gke.0"
enabled: false # If true, kubecost will be configured to use GMP Prometheus image and query from Google Cloud Managed Service for Prometheus.
prometheusServerEndpoint: http://localhost:8085/ # The prometheus service endpoint used by kubecost. The calls are forwarded through the GMP Prom proxy side car to the GMP database.
enabled: false
image: gke.gcr.io/prometheus-engine/frontend:v0.4.1-gke.0 # GMP Prometheus proxy image that serve as an endpoint to query metrics from GMP
imagePullPolicy: IfNotPresent
name: gmp-proxy
port: 8085
projectId: YOUR_PROJECT_ID # example GCP project ID
# Amazon Managed Service for Prometheus
enabled: false # If true, kubecost will be configured to remote_write and query from Amazon Managed Service for Prometheus.
prometheusServerEndpoint: http://localhost:8005/workspaces/<workspaceId>/ # The prometheus service endpoint used by kubecost. The calls are forwarded through the SigV4Proxy side car to the AMP workspace.
remoteWriteService: https://aps-workspaces.us-west-2.amazonaws.com/workspaces/<workspaceId>/api/v1/remote_write # The remote_write endpoint for the AMP workspace.
region: us-west-2
# access_key: ACCESS_KEY # AWS Access key
# secret_key: SECRET_KEY # AWS Secret key
# role_arn: ROLE_ARN # AWS role arn
# profile: PROFILE # AWS profile
# Mimir Proxy to help Kubecost to query metrics from multi-tenant Grafana Mimir.
# Set `global.mimirProxy.enabled=true` and `global.prometheus.enabled=false` to enable Mimir Proxy.
# You also need to set `global.prometheus.fqdn=http://kubecost-cost-analyzer-mimir-proxy.kubecost.svc:8085/prometheus`
# or `global.prometheus.fqdn=http://{{ template "cost-analyzer.fullname" . }}-mimir-proxy.{{ .Release.Namespace }}.svc:8085/prometheus'
# Learn more at https://grafana.com/docs/mimir/latest/operators-guide/secure/authentication-and-authorization/#without-an-authenticating-reverse-proxy
enabled: false
name: mimir-proxy
image: nginxinc/nginx-unprivileged
port: 8085
mimirEndpoint: $mimir_endpoint # Your Mimir query endpoint. If your Mimir query endpoint is http://example.com/prometheus, replace $mimir_endpoint with http://example.com/
orgIdentifier: $your_tenant_ID # Your Grafana Mimir tenant ID
# basicAuth:
# username: user
# password: pwd
# Azure Monitor Managed Service for Prometheus
# See https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-monitor/essentials/prometheus-metrics-overview.md for information
# and https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/prometheus-remote-write-virtual-machines for more information on setting this up
enabled: false
prometheusServerEndpoint: http://localhost:8081/
queryEndpoint: $<AMMSP_QUERY_ENDPOINT>
enabled: false
# per https://github.com/Azure/aad-auth-proxy/releases/tag/0.1.0-main-04-10-2024-7067ac84
image: $<IMAGE> # Example: mcr.microsoft.com/azuremonitor/auth-proxy/prod/aad-auth-proxy/images/aad-auth-proxy:0.1.0-main-04-10-2024-7067ac84
imagePullPolicy: IfNotPresent
name: aad-auth-proxy
port: 8081
audience: https://prometheus.monitor.azure.com/.default
identityType: userAssigned
# Kubecost alerting configuration
# Ref: http://docs.kubecost.com/alerts
# alertConfigs:
# frontendUrl: http://localhost:9090 # optional, used for linkbacks
# globalSlackWebhookUrl: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX # optional, used for Slack alerts
# globalMsTeamsWebhookUrl: https://xxxxx.webhook.office.com/webhookb2/XXXXXXXXXXXXXXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXX # optional, used for Microsoft Teams alerts
# globalAlertEmails:
# - recipient@example.com
# - additionalRecipient@example.com
# globalEmailSubject: Custom Subject
# Alerts generated by kubecost, about cluster data
# alerts:
# Daily namespace budget alert on namespace `kubecost`
# - type: budget # supported: budget, recurringUpdate
# threshold: 50 # optional, required for budget alerts
# window: daily # or 1d
# aggregation: namespace
# filter: kubecost
# ownerContact: # optional, overrides globalAlertEmails default
# - owner@example.com
# - owner2@example.com
# # optional, used for alert-specific Slack and Microsoft Teams alerts
# slackWebhookUrl: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX
# msTeamsWebhookUrl: https://xxxxx.webhook.office.com/webhookb2/XXXXXXXXXXXXXXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXX
# Daily cluster budget alert on cluster `cluster-one`
# - type: budget
# threshold: 200.8 # optional, required for budget alerts
# window: daily # or 1d
# aggregation: cluster
# filter: cluster-one # does not accept csv
# Recurring weekly update (weeklyUpdate alert)
# - type: recurringUpdate
# window: weekly # or 7d
# aggregation: namespace
# filter: '*'
# Recurring weekly namespace update on kubecost namespace
# - type: recurringUpdate
# window: weekly # or 7d
# aggregation: namespace
# filter: kubecost
# Spend Change Alert
# - type: spendChange # change relative to moving avg
# relativeThreshold: 0.20 # Proportional change relative to baseline. Must be greater than -1 (can be negative)
# window: 1d # accepts ‘d’, ‘h’
# baselineWindow: 30d # previous window, offset by window
# aggregation: namespace
# filter: kubecost, default # accepts csv
# Health Score Alert
# - type: health # Alerts when health score changes by a threshold
# window: 10m
# threshold: 5 # Send Alert if health scores changes by 5 or more
# Kubecost Health Diagnostic
# - type: diagnostic # Alerts when kubecost is unable to compute costs - ie: Prometheus unreachable
# window: 10m
alertmanager: # Supply an alertmanager FQDN to receive notifications from the app.
enabled: false # If true, allow kubecost to write to your alertmanager
fqdn: http://cost-analyzer-prometheus-server.default.svc # example fqdn. Ignored if prometheus.enabled: true
# Set saved Cost Allocation report(s) accessible from /reports
# Ref: http://docs.kubecost.com/saved-reports
enabled: false # If true, overwrites report parameters set through UI
- title: "Example Saved Report 0"
window: "today"
aggregateBy: "namespace"
chartDisplay: "category"
idle: "separate"
rate: "cumulative"
accumulate: false # daily resolution
filters: # Ref: https://docs.kubecost.com/apis/filters-api
- key: "cluster" # Ref: https://docs.kubecost.com/apis/filters-api#allocation-apis-request-sizing-v2-api
operator: ":" # Ref: https://docs.kubecost.com/apis/filters-api#filter-operators
value: "dev"
- title: "Example Saved Report 1"
window: "month"
aggregateBy: "controllerKind"
chartDisplay: "category"
idle: "share"
rate: "monthly"
accumulate: false
filters: # Ref: https://docs.kubecost.com/apis/filters-api
- key: "namespace" # Ref: https://docs.kubecost.com/apis/filters-api#allocation-apis-request-sizing-v2-api
operator: "!:" # Ref: https://docs.kubecost.com/apis/filters-api#filter-operators
value: "kubecost"
- title: "Example Saved Report 2"
window: "2020-11-11T00:00:00Z,2020-12-09T23:59:59Z"
aggregateBy: "service"
chartDisplay: "category"
idle: "hide"
rate: "daily"
accumulate: true # entire window resolution
filters: [] # if no filters, specify empty array
# Set saved Asset report(s) accessible from /reports
# Ref: http://docs.kubecost.com/saved-reports
enabled: false # If true, overwrites report parameters set through UI
- title: "Example Asset Report 0"
window: "today"
aggregateBy: "type"
accumulate: false # daily resolution
- property: "cluster"
value: "cluster-one"
# Set saved Cloud Cost report(s) accessible from /reports
# Ref: http://docs.kubecost.com/saved-reports
enabled: false # If true, overwrites report parameters set through UI
- title: "Cloud Cost Report 0"
window: "today"
aggregateBy: "service"
accumulate: false # daily resolution
# filters:
# - property: "service"
# value: "service1" # corresponds to a value to filter cloud cost aggregate by service data on.
podAnnotations: {}
# iam.amazonaws.com/role: role-arn
# Applies these labels to all Deployments, StatefulSets, DaemonSets, and their pod templates.
additionalLabels: {}
runAsNonRoot: true
type: RuntimeDefault
fsGroup: 1001
runAsGroup: 1001
runAsUser: 1001
fsGroupChangePolicy: OnRootMismatch
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: true
# Platforms is a higher-level abstraction for platform-specific values and settings.
# Deploying to OpenShift (OCP) requires enabling this option.
enabled: false # Deploy Kubecost to OpenShift.
enabled: false # Create an OpenShift Route.
annotations: {} # Add annotations to the Route.
# host: kubecost.apps.okd4.example.com # Add a custom host for your Route.
# Create Security Context Constraint resources for the DaemonSets requiring additional privileges.
nodeExporter: false # Creates an SCC for Prometheus Node Exporter. This requires Node Exporter be enabled.
networkCosts: false # Creates an SCC for Kubecost network-costs. This requires network-costs be enabled.
# When OpenShift is enabled, the following securityContext will be applied to all resources unless they define their own.
runAsNonRoot: true
type: RuntimeDefault
# Set options for deploying with CI/CD tools like Argo CD.
enabled: false # Set to true when using affected CI/CD tools for access to the below configuration options.
skipSanityChecks: false # If true, skip all sanity/existence checks for resources like Secrets.
## Kubecost Integrations
## Ref: https://docs.kubecost.com/integrations
enabled: false
runInterval: "12h" # How frequently to run the integration.
databaseHost: "" # REQUIRED. ex: my.postgres.database.azure.com
databasePort: "" # REQUIRED. ex: 5432
databaseName: "" # REQUIRED. ex: postgres
databaseUser: "" # REQUIRED. ex: myusername
databasePassword: "" # REQUIRED. ex: mypassword
databaseSecretName: "" # OPTIONAL. Specify your own k8s secret containing the above credentials. Must have key "creds.json".
## Configure what Postgres table to write to, and what parameters to pass
## when querying Kubecost's APIs. Ensure all parameters are enclosed in
## quotes. Ref: https://docs.kubecost.com/apis/apis-overview
allocations: []
# - databaseTable: "kubecost_allocation_data"
# window: "7d"
# aggregate: "namespace"
# idle: "true"
# shareIdle: "true"
# shareNamespaces: "kubecost,kube-system"
# shareLabels: ""
# - databaseTable: "kubecost_allocation_data_by_cluster"
# window: "10d"
# aggregate: "cluster"
# idle: "true"
# shareIdle: "false"
# shareNamespaces: ""
# shareLabels: ""
assets: []
# - databaseTable: "kubecost_assets_data"
# window: "7d"
# aggregate: "cluster"
cloudCosts: []
# - databaseTable: "kubecost_cloudcosts_data"
# window: "7d"
# aggregate: "service"
## Provide a name override for the chart.
# nameOverride: ""
## Provide a full name override option for the chart.
# fullnameOverride: ""
## This flag is only required for users upgrading to a new version of Kubecost.
## The flag is used to ensure users are aware of important
## (potentially breaking) changes included in the new version.
toV2: false
# generated at http://kubecost.com/install, used for alerts tracking and free trials
kubecostToken: # ""
# Advanced pipeline for custom prices, enterprise key required
enabled: false
provider: "AWS"
region: "us-east-1"
URI: s3://kc-csv-test/pricing_schema.csv # a valid file URI
csvAccessCredentials: pricing-schema-access-secret
# SAML integration for user management and RBAC, enterprise key required
# Ref: https://github.com/kubecost/docs/blob/main/user-management.md
enabled: false
# secretName: "kubecost-authzero"
# metadataSecretName: "kubecost-authzero-metadata" # One of metadataSecretName or idpMetadataURL must be set. defaults to metadataURL if set
# idpMetadataURL: "https://dev-elu2z98r.auth0.com/samlp/metadata/c6nY4M37rBP0qSO1IYIqBPPyIPxLS8v2"
# appRootURL: "http://localhost:9090" # sample URL
# authTimeout: 1440 # number of minutes the JWT will be valid
# redirectURL: "https://dev-elu2z98r.auth0.com/v2/logout" # callback URL redirected to after logout
# audienceURI: "http://localhost:9090" # by convention, the same as the appRootURL, but any string uniquely identifying kubecost to your samp IDP. Optional if you follow the convention
# nameIDFormat: "urn:oasis:names:tc:SAML:1.1:nameid-format:unspecified" If your SAML provider requires a specific nameid format
# isGLUUProvider: false # An additional URL parameter must be appended for GLUU providers
# encryptionCertSecret: "kubecost-saml-cert" # k8s secret where the x509 certificate used to encrypt an Okta saml response is stored
# decryptionKeySecret: "kubecost-sank-decryption-key" # k8s secret where the private key associated with the encryptionCertSecret is stored
# authSecret: "random-string" # value of SAML secret used to issue tokens, will be autogenerated as random string if not provided
# authSecretName: "kubecost-saml-secret" # name of k8s secret where the authSecret will be stored, defaults to "kubecost-saml-secret" if not provided
enabled: false
# groups:
# - name: admin
# enabled: false # if admin is disabled, all SAML users will be able to make configuration changes to the kubecost frontend
# assertionName: "http://schemas.auth0.com/userType" # a SAML Assertion, one of whose elements has a value that matches on of the values in assertionValues
# assertionValues:
# - "admin"
# - "superusers"
# - name: readonly
# enabled: false # if readonly is disabled, all users authorized on SAML will default to readonly
# assertionName: "http://schemas.auth0.com/userType"
# assertionValues:
# - "readonly"
# - name: editor
# enabled: true # if editor is enabled, editors will be allowed to edit reports/alerts scoped to them, and act as readers otherwise. Users will never default to editor.
# assertionName: "http://schemas.auth0.com/userType"
# assertionValues:
# - "editor"
enabled: false
clientID: "" # application/client client_id parameter obtained from provider, used to make requests to server
clientSecret: "" # application/client client_secret parameter obtained from provider, used to make requests to server
# secretName: "kubecost-oidc-secret" # k8s secret where clientsecret will be stored
# For use to provide a custom OIDC Secret. Overrides the usage of oidc.clientSecret and oidc.secretName.
# Should contain the field directly.
# Can be created using raw k8s secrets, external secrets, sealed secrets, or any other method.
enabled: false
name: "" # name of the secret containing the client secret
# authURL: "https://my.auth.server/authorize" # endpoint for login to auth server
# loginRedirectURL: "http://my.kubecost.url/model/oidc/authorize" # Kubecost url configured in provider for redirect after authentication
# discoveryURL: "https://my.auth.server/.well-known/openid-configuration" # url for OIDC endpoint discovery
skipOnlineTokenValidation: false # if true, will skip accessing OIDC introspection endpoint for online token verification, and instead try to locally validate JWT claims
useClientSecretPost: false # if true, client secret will specifically only use client_secret_post method, otherwise it will attempt to send the secret in both the header and the body.
# hostedDomain: "example.com" # optional, blocks access to the auth domain specified in the hd claim of the provider ID token
enabled: false
# groups:
# - name: admin
# enabled: false # if admin is disabled, all authenticated users will be able to make configuration changes to the kubecost frontend
# claimName: "roles" # Kubecost matches this string against the JWT's payload key containing RBAC info (this value is unique across identity providers)
# claimValues: # Kubecost matches these strings with the roles created in your identity provider
# - "admin"
# - "superusers"
# - name: readonly
# enabled: false # if readonly is disabled, all authenticated users will default to readonly
# claimName: "roles"
# claimValues:
# - "readonly"
# - name: editor
# enabled: false # if editor is enabled, editors will be allowed to edit reports/alerts scoped to them, and act as readers otherwise. Users will never default to editor.
# claimName: "roles"
# claimValues:
# - "editor"
## Adds the HTTP_PROXY, HTTPS_PROXY, and NO_PROXY environment variables to all
## containers. Typically used in environments that have firewall rules which
## prevent kubecost from accessing cloud provider resources.
## Ref: https://www.oreilly.com/library/view/security-with-go/9781788627917/5ea6a02b-3d96-44b1-ad3c-6ab60fcbbe4f.xhtml
enabled: false
httpProxyUrl: ""
httpsProxyUrl: ""
noProxy: ""
# imagePullSecrets:
# - name: "image-pull-secret"
# imageVersion uses the base image name (image:) but overrides the version
# pulled. It should be avoided. If non-default behavior is needed, use
# fullImageName for the relevant component.
# imageVersion:
enabled: true
deployMethod: singlepod # haMode or singlepod - haMode is currently only supported with Enterprise tier
haReplicas: 2 # only used with haMode
image: "gcr.io/kubecost1/frontend"
imagePullPolicy: IfNotPresent
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for the frontend.
# fullImageName:
# extraEnv:
# value: "1"
# securityContext:
# readOnlyRootFilesystem: true
cpu: "10m"
memory: "55Mi"
# limits:
# cpu: "100m"
# memory: "256Mi"
deploymentStrategy: {}
# rollingUpdate:
# maxSurge: 1
# maxUnavailable: 1
# type: RollingUpdate
# Define a readiness probe for the Kubecost frontend container.
enabled: true
initialDelaySeconds: 1
periodSeconds: 5
failureThreshold: 6
# Define a liveness probe for the Kubecost frontend container.
enabled: true
initialDelaySeconds: 1
periodSeconds: 5
failureThreshold: 6
enabled: true # disable if the cluster does not support ipv6
# timeoutSeconds: 600 # should be rarely used, but can be increased if needed
# allow customizing nginx-conf server block
# extraServerConfig: |-
# proxy_busy_buffers_size 512k;
# proxy_buffers 4 512k;
# proxy_buffer_size 256k;
# large_client_header_buffers 4 64k;
# hideDiagnostics: false # useful if the primary is not monitored. Supported in limited environments.
# hideOrphanedResources: false # OrphanedResources works on the primary-cluster's cloud-provider only.
# set to true to set all upstreams to use <service>.<namespace>.svc.cluster.local instead of just <service>.<namespace>
useDefaultFqdn: false
# api:
# fqdn: kubecost-api.kubecost.svc.cluster.local:9001
# model:
# fqdn: kubecost-model.kubecost.svc.cluster.local:9003
# forecasting:
# fqdn: kubecost-forcasting.kubecost.svc.cluster.local:5000
# aggregator:
# fqdn: kubecost-aggregator.kubecost.svc.cluster.local:9004
# cloudCost:
# fqdn: kubecost-cloud-cost.kubecost.svc.cluster.local:9005
# multiClusterDiagnostics:
# fqdn: kubecost-multi-diag.kubecost.svc.cluster.local:9007
# clusterController:
# fqdn: cluster-controller.kubecost.svc.cluster.local:9731
# Kubecost Metrics deploys a separate pod which will emit kubernetes specific metrics required
# by the cost-model. This pod is designed to remain active and decoupled from the cost-model itself.
# However, disabling this service/pod deployment will flag the cost-model to emit the metrics instead.
# emitPodAnnotations: false
# emitNamespaceAnnotations: false
# emitKsmV1Metrics: true # emit all KSM metrics in KSM v1.
# emitKsmV1MetricsOnly: false # emit only the KSM metrics missing from KSM v2. Advanced users only.
# Optional
# The metrics exporter is a separate deployment and service (for prometheus scrape auto-discovery)
# which emits metrics cost-model relies on. Enabling this deployment also removes the KSM dependency
# from the cost-model. If the deployment is not enabled, the metrics will continue to be emitted from
# the cost-model.
enabled: false
port: 9005
# Adds the default Prometheus scrape annotations to the metrics exporter service.
# Set to false and use service.annotations (below) to set custom scrape annotations.
prometheusScrape: true
resources: {}
# requests:
# cpu: "200m"
# memory: "55Mi"
## Node tolerations for server scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
affinity: {}
annotations: {}
# Service Monitor for Kubecost Metrics
serviceMonitor: # the kubecost included prometheus uses scrapeConfigs and does not support service monitors. The following options assume an existing prometheus that supports serviceMonitors.
enabled: false
additionalLabels: {}
metricRelabelings: []
relabelings: []
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
additionalLabels: {}
nodeSelector: {}
extraArgs: []
image: public.ecr.aws/aws-observability/aws-sigv4-proxy:latest
imagePullPolicy: IfNotPresent
name: aps
port: 8005
region: us-west-2 # The AWS region
host: aps-workspaces.us-west-2.amazonaws.com # The hostname for AMP service.
# role_arn: arn:aws:iam::<account>:role/role-name # The AWS IAM role to assume.
extraEnv: # Pass extra env variables to sigV4Proxy
# value: <access_key>
# value: <secret_key>
# Optional resource requests and limits for the sigV4proxy container.
resources: {}
image: "gcr.io/kubecost1/cost-model"
imagePullPolicy: IfNotPresent
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for cost-model.
# fullImageName:
# securityContext:
# readOnlyRootFilesystem: true
# Build local cost allocation cache
warmCache: false
# Run allocation ETL pipelines
etl: true
# Enable the ETL filestore backing storage
etlFileStoreEnabled: true
# The total number of days the ETL pipelines will build
# Set to 0 to disable daily ETL (not recommended)
etlDailyStoreDurationDays: 91
# The total number of hours the ETL pipelines will build
# Set to 0 to disable hourly ETL (recommended for large environments)
# Must be < prometheus server retention, otherwise empty data may overwrite
# known-good data
etlHourlyStoreDurationHours: 49
# For deploying kubecost in a cluster that does not self-monitor
etlReadOnlyMode: false
## The name of the Secret containing a bucket config for Federated storage.
## The contents should be stored under a key named federated-store.yaml.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/long-term-storage-configuration
# federatedStorageConfigSecret: federated-store
## Federated storage config can be supplied via a secret or the yaml block
## below when using the block below, only a single provider is supported,
## others are for example purposes.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/long-term-storage-configuration
# federatedStorageConfig: |-
# type: S3
# config:
# bucket: kubecost-federated-storage-bucket
# endpoint: s3.amazonaws.com
# region: us-east-1
# # best practice is to use pod identities to access AWS resources. Otherwise it is possible to use an access_key and secret_key
# access_key: "<your-access-key>"
# secret_key: "<your-secret-key>"
# type: AZURE
# config:
# storage_account: ""
# storage_account_key: ""
# container: ""
# max_retries: 0
# type: GCS
# config:
# bucket: kubecost-federated-storage-bucket
# service_account: |-
# {
# "type": "service_account",
# "project_id": "...",
# "private_key_id": "...",
# "private_key": "...",
# "client_email": "...",
# "client_id": "...",
# "auth_uri": "https://accounts.google.com/o/oauth2/auth",
# "token_uri": "https://oauth2.googleapis.com/token",
# "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
# "client_x509_cert_url": ""
# }
# Installs Kubecost/OpenCost plugins
enabled: false
enabled: false
fullImageName: curlimages/curl:latest
allowPrivilegeEscalation: false
type: RuntimeDefault
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
folder: /opt/opencost/plugin
# leave this commented to always download most recent version of plugins
# the list of enabled plugins
enabledPlugins: []
# - datadog
# pre-existing secret for plugin configuration
enabled: false
name: "" # name of the secret containing plugin config
secretName: kubecost-plugin-secret
# uncomment this to define plugin configuration via the values file
# configs:
# datadog: |
# {
# "datadog_site": "<INSERT_DATADOG_SITE>",
# "datadog_api_key": "<INSERT_DATADOG_API_KEY>",
# "datadog_app_key": "<INSERT_DATADOG_APP_KEY>"
# }
# Enables or disables adding node labels to allocation data (i.e. workloads).
# Defaults to "true" and starts with a sensible includeList for basics like
# topology (e.g. zone, region) and instance type labels.
# nodeLabels:
# enabled: true
# includeList: "node.kubernetes.io/instance-type,topology.kubernetes.io/region,topology.kubernetes.io/zone"
# Enables or disables the ContainerStats pipeline, used for quantile-based
# queries like for request sizing recommendations.
# ContainerStats provides support for quantile-based request right-sizing
# recommendations.
# It is disabled by default to avoid problems in extremely high-scale Thanos
# environments. If you would like to try quantile-based request-sizing
# recommendations, enable this! If you are in a high-scale environment,
# please monitor Kubecost logs, Thanos query logs, and Thanos load closely.
# We hope to make major improvements at scale here soon!
containerStatsEnabled: true # enabled by default as of v2.2.0
# max number of concurrent Prometheus queries
maxQueryConcurrency: 5
cpu: "200m"
memory: "55Mi"
# limits:
# cpu: "800m"
# memory: "256Mi"
# Define a readiness probe for the Kubecost cost-model container.
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
# Define a liveness probe for the Kubecost cost-model container.
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
extraArgs: []
# Optional. A list of extra environment variables to be added to the cost-model container.
# extraEnv:
# - name: LOG_LEVEL
# value: trace
# - name: LOG_FORMAT
# value: json
# # When false, Kubecost will not show Asset costs for local disks physically
# # attached to nodes (e.g. ephemeral storage). This needs to be applied to
# # each cluster monitored.
# value: "true"
# creates an ingress directly to the model container, for API access
enabled: false
# className: nginx
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
paths: ["/"]
pathType: ImplementationSpecific
- cost-analyzer-model.local
tls: []
# - secretName: cost-analyzer-model-tls
# hosts:
# - cost-analyzer-model.local
utcOffset: "+00:00"
# Optional - add extra ports to the cost-model container. For kubecost development purposes only - not recommended for users.
extraPorts: []
# - name: debug
# port: 40000
# targetPort: 40000
# containerPort: 40000
## etlUtils is a utility typically used by Enterprise customers transitioning
## from v1 to v2 of Kubecost. It translates the data from the "/etl" dir of the
## bucket, to the "/federated" dir of the bucket.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl/thanos-migration-guide
enabled: false
fullImageName: null
resources: {}
env: {}
nodeSelector: {}
tolerations: []
affinity: {}
# Basic Kubecost ingress, more examples available at https://docs.kubecost.com/install-and-configure/install/ingress-examples
enabled: false
# className: nginx
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
paths: ["/"] # There's no need to route specifically to the pods-- we have an nginx deployed that handles routing
pathType: ImplementationSpecific
- cost-analyzer.local
tls: []
# - secretName: cost-analyzer-tls
# hosts:
# - cost-analyzer.local
nodeSelector: {}
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
affinity: {}
topologySpreadConstraints: []
# If true, creates a PriorityClass to be used by the cost-analyzer pod
enabled: false
name: "" # Provide name of existing priority class only. If left blank, upstream chart will create one from default template.
# If true, enable creation of NetworkPolicy resources.
enabled: false
denyEgress: true # create a network policy that denies egress from kubecost
sameNamespace: true # Set to true if cost analyzer and prometheus are on the same namespace
# namespace: kubecost # Namespace where prometheus is installed
# Cost-analyzer specific vars using the new template
enabled: false # If true, create a network policy for cost-analyzer
annotations: {} # annotations to be added to the network policy
additionalLabels: {} # additional labels to be added to the network policy
# Examples rules:
# ingressRules:
# - selectors: # allow ingress from self on all ports
# - podSelector:
# matchLabels:
# app.kubernetes.io/name: cost-analyzer
# - selectors: # allow egress access to prometheus
# - namespaceSelector:
# matchLabels:
# name: prometheus
# podSelector:
# matchLabels:
# app: prometheus
# ports:
# - protocol: TCP
# port: 9090
# egressRules:
# - selectors: # restrict egress to inside cluster
# - namespaceSelector: {}
## @param extraVolumes A list of volumes to be added to the pod
extraVolumes: []
## @param extraVolumeMounts A list of volume mounts to be added to the pod
extraVolumeMounts: []
# Define persistence volume for cost-analyzer, more information at https://docs.kubecost.com/install-and-configure/install/storage
size: 32Gi
dbSize: 32.0Gi
enabled: true # Note that setting this to false means configurations will be wiped out on pod restart.
# storageClass: "-" #
# existingClaim: kubecost-cost-analyzer # a claim in the same namespace as kubecost
labels: {}
annotations: {}
# helm.sh/resource-policy: keep # https://helm.sh/docs/howto/charts_tips_and_tricks/#tell-helm-not-to-uninstall-a-resource
# Enables a separate PV specifically for ETL data. This should be avoided, but
# is kept for legacy compatibility.
dbPVEnabled: false
type: ClusterIP
port: 9090
targetPort: 9090
nodePort: {}
labels: {}
annotations: {}
# loadBalancerSourceRanges: []
enabled: false # Makes sure that connections from a client are passed to the same Pod each time, when set to `true`. You should set it when you enabled authentication through OIDC or SAML integration.
timeoutSeconds: 10800
## Provide a full name override for Prometheus.
# fullnameOverride: ""
## Provide a name override for Prometheus.
# nameOverride: ""
create: true # Create the RBAC resources for Prometheus.
## Define serviceAccount names for components. Defaults to component's fully qualified name.
create: true
create: true
create: true
create: true
## Prometheus server ServiceAccount annotations.
## Can be used for AWS IRSA annotations when using Remote Write mode with Amazon Managed Prometheus.
annotations: {}
## Specify an existing ConfigMap to be used by Prometheus when using self-signed certificates.
# selfsignedCertConfigMapName: ""
# - name: "image-pull-secret"
extraScrapeConfigs: |
- job_name: kubecost
honor_labels: true
scrape_interval: 1m
scrape_timeout: 60s
metrics_path: /metrics
scheme: http
- names:
- {{ template "cost-analyzer.serviceName" . }}
type: 'A'
port: 9003
- job_name: kubecost-networking
- role: pod
# Scrape only the the targets matching the following metadata
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance]
action: keep
regex: kubecost
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: network-costs
- job_name: kubecost-aggregator
scrape_interval: 1m
scrape_timeout: 60s
metrics_path: /metrics
scheme: http
- names:
- {{ template "aggregator.serviceName" . }}
type: 'A'
{{- if or .Values.saml.enabled .Values.oidc.enabled }}
port: 9008
{{- else }}
port: 9004
{{- end }}
## Enables scraping of NVIDIA GPU metrics via dcgm-exporter. Scrapes all
## endpoints which contain "dcgm-exporter" in labels "app",
## "app.kubernetes.io/component", or "app.kubernetes.io/name" with a case
## insensitive match.
## Refs:
## https://github.com/NVIDIA/gpu-operator/blob/d4316a415bbd684ce8416a88042305fc1a093aa4/assets/state-dcgm-exporter/0600_service.yaml#L7
## https://github.com/NVIDIA/dcgm-exporter/blob/54fd1ca137c66511a87a720390613680b9bdabdd/deployment/templates/service.yaml#L23
- job_name: kubecost-dcgm-exporter
- role: endpoints
- source_labels: [__meta_kubernetes_pod_label_app, __meta_kubernetes_pod_label_app_kubernetes_io_component, __meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: (?i)(.*dcgm-exporter.*|.*dcgm-exporter.*|.*dcgm-exporter.*)
# If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID
# to use as unique cluster ID in kubecost cost-analyzer deployment.
# This overrides the cluster_id set in prometheus.server.global.external_labels.
# NOTE: This does not affect the external_labels set in prometheus config.
# clusterIDConfigmap: cluster-id-configmap
## Provide a full name override for the Prometheus server.
# fullnameOverride: ""
## Prometheus server container name
enabled: true
name: server
type: Recreate
rollingUpdate: null
## Prometheus server container image
repository: cgr.dev/chainguard/prometheus
tag: latest
pullPolicy: IfNotPresent
## prometheus server priorityClassName
priorityClassName: ""
## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
## so that the various internal URLs are still able to access as they are in the default case.
## (Optional)
prefixURL: ""
## External URL which can access alertmanager
## Maybe same with Ingress host name
baseURL: ""
## Additional server container environment variables
## You specify this manually like you would a raw deployment manifest.
## This means you can bind in environment variables from secrets.
## e.g. static environment variable:
## - name: DEMO_GREETING
## value: "Hello from the environment"
## e.g. secret environment variable:
## - name: USERNAME
## valueFrom:
## secretKeyRef:
## name: mysecret
## key: username
env: []
- web.enable-lifecycle
## web.enable-admin-api flag controls access to the administrative HTTP API which includes functionality such as
## deleting time series. This is disabled by default.
# - web.enable-admin-api
## storage.tsdb.no-lockfile flag controls BD locking
# - storage.tsdb.no-lockfile
## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL)
# - storage.tsdb.wal-compression
## Path to a configuration file on prometheus server container FS
configPath: /etc/config/prometheus.yml
## How frequently to scrape targets by default
scrape_interval: 1m
## How long until a scrape request times out
scrape_timeout: 60s
## How frequently to evaluate rules
evaluation_interval: 1m
cluster_id: cluster-one # Each cluster should have a unique ID
## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
remoteWrite: {}
## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read
remoteRead: {}
## Additional Prometheus server container arguments
query.max-concurrency: 1
query.max-samples: 100000000
## Additional InitContainers to initialize the pod
extraInitContainers: []
## Additional Prometheus server Volume mounts
extraVolumeMounts: []
## Additional Prometheus server Volumes
extraVolumes: []
## Additional Prometheus server hostPath mounts
extraHostPathMounts: []
# - name: certs-dir
# mountPath: /etc/kubernetes/certs
# subPath: ""
# hostPath: /etc/kubernetes/certs
# readOnly: true
extraConfigmapMounts: []
# - name: certs-configmap
# mountPath: /prometheus
# subPath: ""
# configMap: certs-configmap
# readOnly: true
## Additional Prometheus server Secret mounts
# Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: []
# - name: secret-files
# mountPath: /etc/secrets
# subPath: ""
# secretName: prom-secret-files
# readOnly: true
## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}}
## Defining configMapOverrideName will cause templates/server-configmap.yaml
## to NOT generate a ConfigMap resource
configMapOverrideName: ""
## If true, Prometheus server Ingress will be created
enabled: false
# className: nginx
## Prometheus server Ingress annotations
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: 'true'
## Prometheus server Ingress additional labels
extraLabels: {}
## Prometheus server Ingress hostnames with optional path
## Must be provided if Ingress is enabled
hosts: []
# - prometheus.domain.com
# - domain.com/prometheus
## PathType determines the interpretation of the Path matching
pathType: "Prefix"
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## Prometheus server Ingress TLS configuration
## Secrets must be manually created in the namespace
tls: []
# - secretName: prometheus-server-tls
# hosts:
# - prometheus.domain.com
## Server Deployment Strategy type
# strategy:
# type: Recreate
## Node tolerations for server scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for Prometheus server pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
## Pod affinity
affinity: {}
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
enabled: false
maxUnavailable: 1
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
# schedulerName:
## If true, Prometheus server will create/use a Persistent Volume Claim
## If false, use emptyDir
enabled: true
## Prometheus server data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
- ReadWriteOnce
## Prometheus server data Persistent Volume annotations
annotations: {}
# helm.sh/resource-policy: keep # https://helm.sh/docs/howto/charts_tips_and_tricks/#tell-helm-not-to-uninstall-a-resource
## Prometheus server data Persistent Volume existing claim name
## Requires server.persistentVolume.enabled: true
## If defined, PVC must be created manually before volume will be bound
existingClaim: ""
## Prometheus server data Persistent Volume mount root path
mountPath: /data
## Prometheus server data Persistent Volume size
size: 32Gi
## Prometheus server data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
# storageClass: "-"
## Prometheus server data Persistent Volume Binding Mode
## If defined, volumeBindingMode: <volumeBindingMode>
## If undefined (the default) or set to null, no volumeBindingMode spec is
## set, choosing the default mode.
# volumeBindingMode: ""
## Subdirectory of Prometheus server data Persistent Volume to mount
## Useful if the volume's root directory is not empty
subPath: ""
sizeLimit: ""
## Annotations to be added to Prometheus server pods
podAnnotations: {}
# iam.amazonaws.com/role: prometheus
## Annotations to be added to the Prometheus Server deployment
deploymentAnnotations: {}
## Labels to be added to Prometheus server pods
podLabels: {}
## Prometheus AlertManager configuration
alertmanagers: []
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
replicaCount: 1
## If true, use a statefulset instead of a deployment for pod management.
## This allows to scale replicas to more than 1 pod
enabled: false
annotations: {}
labels: {}
podManagementPolicy: OrderedReady
## Alertmanager headless service to use for the statefulset
annotations: {}
labels: {}
servicePort: 80
## Prometheus server readiness and liveness probe initial delay and timeout
## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
readinessProbeInitialDelay: 5
readinessProbeTimeout: 3
readinessProbeFailureThreshold: 3
readinessProbeSuccessThreshold: 1
livenessProbeInitialDelay: 5
livenessProbeTimeout: 3
livenessProbeFailureThreshold: 3
livenessProbeSuccessThreshold: 1
## Prometheus server resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# cpu: 500m
# memory: 512Mi
# requests:
# cpu: 500m
# memory: 512Mi
## Vertical Pod Autoscaler config
## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
## If true a VPA object will be created for the controller (either StatefulSet or Deployment, based on above configs)
enabled: false
## Optional. Defaults to "Auto" if not specified.
# updateMode: "Auto"
## Mandatory. Without, VPA will not be created.
# containerPolicies:
# - containerName: 'prometheus-server'
## Security context to be added to server pods
securityContext: {}
# runAsUser: 1001
# runAsNonRoot: true
# runAsGroup: 1001
# fsGroup: 1001
containerSecurityContext: {}
annotations: {}
labels: {}
clusterIP: ""
# nodePort: ""
## List of IP addresses at which the Prometheus server service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 80
sessionAffinity: None
type: ClusterIP
## Enable gRPC port on service to allow auto discovery with thanos-querier
enabled: false
servicePort: 10901
# nodePort: 10901
## If using a statefulSet (statefulSet.enabled=true), configure the
## service to connect to a specific replica to have a consistent view
## of the data.
enabled: false
replica: 0
## Prometheus server pod termination grace period
terminationGracePeriodSeconds: 300
## Prometheus data retention period (default if not specified is 97 hours)
## Kubecost builds up its own persistent store of metric data on the
## filesystem (usually a PV) and, when using ETL Backup and/or Federated
## ETL, in more durable object storage like S3 or GCS. Kubecost's data
## retention is _not_ tied to the configured Prometheus retention.
## For data durability, we recommend using ETL Backup instead of relying on
## Prometheus retention.
## Lower retention values will affect Prometheus by reducing resource
## consumption and increasing stability. It _must not_ be set below or equal
## to kubecostModel.etlHourlyStoreDurationHours, otherwise empty data sets
## may overwrite good data sets. For now, it must also be >= 49h for Daily
## ETL stability.
## "ETL Rebuild" and "ETL Repair" is only possible on data available within
## this retention window. This is an extremely rare operation.
## If you want maximum security in the event of a Kubecost agent
## (cost-model) outage, increase this value. The current default of 97h is
## intended to balance Prometheus stability and resource consumption
## against the event of an outage in Kubecost which would necessitate a
## version change. 4 days should provide enough time for most users to
## notice a problem and initiate corrective action.
retention: 97h
# retentionSize: should be significantly greater than the storage used in the number of hours set in etlHourlyStoreDurationHours
# Install Prometheus Alert Manager
## If false, alertmanager will not be installed
enabled: false
## Provide a full name override for Prometheus alertmanager.
# fullnameOverride: ""
type: Recreate
rollingUpdate: null
## alertmanager container name
name: alertmanager
## alertmanager container image
repository: cgr.dev/chainguard/prometheus-alertmanager
tag: latest
pullPolicy: IfNotPresent
## alertmanager priorityClassName
priorityClassName: ""
## Additional alertmanager container arguments
extraArgs: {}
## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug
## so that the various internal URLs are still able to access as they are in the default case.
## (Optional)
prefixURL: ""
## External URL which can access alertmanager
baseURL: "http://localhost:9093"
## Additional alertmanager container environment variable
## For instance to add a http_proxy
extraEnv: {}
## Additional alertmanager Secret mounts
# Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: []
# - name: secret-files
# mountPath: /etc/secrets
# subPath: ""
# secretName: alertmanager-secret-files
# readOnly: true
## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.alertmanager.configMapOverrideName}}
## Defining configMapOverrideName will cause templates/alertmanager-configmap.yaml
## to NOT generate a ConfigMap resource
configMapOverrideName: ""
## The name of a secret in the same kubernetes namespace which contains the Alertmanager config
## Defining configFromSecret will cause templates/alertmanager-configmap.yaml
## to NOT generate a ConfigMap resource
configFromSecret: ""
## The configuration file name to be loaded to alertmanager
## Must match the key within configuration loaded from ConfigMap/Secret
configFileName: alertmanager.yml
## If true, alertmanager Ingress will be created
enabled: false
## alertmanager Ingress annotations
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: 'true'
## alertmanager Ingress additional labels
extraLabels: {}
## alertmanager Ingress hostnames with optional path
## Must be provided if Ingress is enabled
hosts: []
# - alertmanager.domain.com
# - domain.com/alertmanager
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## alertmanager Ingress TLS configuration
## Secrets must be manually created in the namespace
tls: []
# - secretName: prometheus-alerts-tls
# hosts:
# - alertmanager.domain.com
## Alertmanager Deployment Strategy type
# strategy:
# type: Recreate
## Node tolerations for alertmanager scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for alertmanager pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
## Pod affinity
affinity: {}
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
enabled: false
maxUnavailable: 1
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
# schedulerName:
## If true, alertmanager will create/use a Persistent Volume Claim
## If false, use emptyDir
enabled: true
## alertmanager data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
- ReadWriteOnce
## alertmanager data Persistent Volume Claim annotations
annotations: {}
## alertmanager data Persistent Volume existing claim name
## Requires alertmanager.persistentVolume.enabled: true
## If defined, PVC must be created manually before volume will be bound
existingClaim: ""
## alertmanager data Persistent Volume mount root path
mountPath: /data
## alertmanager data Persistent Volume size
size: 2Gi
## alertmanager data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
# storageClass: "-"
## alertmanager data Persistent Volume Binding Mode
## If defined, volumeBindingMode: <volumeBindingMode>
## If undefined (the default) or set to null, no volumeBindingMode spec is
## set, choosing the default mode.
# volumeBindingMode: ""
## Subdirectory of alertmanager data Persistent Volume to mount
## Useful if the volume's root directory is not empty
subPath: ""
## Annotations to be added to alertmanager pods
podAnnotations: {}
## Tell prometheus to use a specific set of alertmanager pods
## instead of all alertmanager pods found in the same namespace
## Useful if you deploy multiple releases within the same namespace
## prometheus.io/probe: alertmanager-teamA
## Labels to be added to Prometheus AlertManager pods
podLabels: {}
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
replicaCount: 1
## If true, use a statefulset instead of a deployment for pod management.
## This allows to scale replicas to more than 1 pod
enabled: false
podManagementPolicy: OrderedReady
## Alertmanager headless service to use for the statefulset
annotations: {}
labels: {}
## Enabling peer mesh service end points for enabling the HA alert manager
## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
# enableMeshPeer : true
servicePort: 80
## alertmanager resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# cpu: 10m
# memory: 32Mi
# requests:
# cpu: 10m
# memory: 32Mi
## Security context to be added to alertmanager pods
runAsUser: 1001
runAsNonRoot: true
runAsGroup: 1001
fsGroup: 1001
annotations: {}
labels: {}
clusterIP: ""
## Enabling peer mesh service end points for enabling the HA alert manager
## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
# enableMeshPeer : true
## List of IP addresses at which the alertmanager service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 80
# nodePort: 30000
sessionAffinity: None
type: ClusterIP
# Define a custom scheduler for Alertmanager pods
# schedulerName: default-scheduler
## alertmanager ConfigMap entries
global: {}
# slack_api_url: ''
- name: default-receiver
# slack_configs:
# - channel: '@you'
# send_resolved: true
group_wait: 10s
group_interval: 5m
receiver: default-receiver
repeat_interval: 3h
## Monitors ConfigMap changes and POSTs to a URL
## If false, the configmap-reload container will not be deployed
enabled: false
## configmap-reload container name
name: configmap-reload
## configmap-reload container image
repository: cgr.dev/chainguard/prometheus-config-reloader
tag: latest
pullPolicy: IfNotPresent
## Additional configmap-reload container arguments
extraArgs: {}
## Additional configmap-reload volume directories
extraVolumeDirs: []
## Additional configmap-reload mounts
extraConfigmapMounts: []
# - name: prometheus-alerts
# mountPath: /etc/alerts.d
# subPath: ""
# configMap: prometheus-alerts
# readOnly: true
## configmap-reload resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
## configmap-reload container securityContext
containerSecurityContext: {}
## If false, the configmap-reload container will not be deployed
enabled: false
## configmap-reload container name
name: configmap-reload
## configmap-reload container image
repository: cgr.dev/chainguard/prometheus-config-reloader
tag: latest
pullPolicy: IfNotPresent
## Additional configmap-reload container arguments
extraArgs: {}
## Additional configmap-reload volume directories
extraVolumeDirs: []
## Additional configmap-reload mounts
extraConfigmapMounts: []
# - name: prometheus-alerts
# mountPath: /etc/alerts.d
# subPath: ""
# configMap: prometheus-alerts
# readOnly: true
## configmap-reload resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# node-export must be disabled if there is an existing daemonset: https://guide.kubecost.com/hc/en-us/articles/4407601830679-Troubleshoot-Install#a-name-node-exporter-a-issue-failedscheduling-kubecost-prometheus-node-exporter
## If false, node-exporter will not be installed.
## This is disabled by default in Kubecost 2.0, though it can be enabled as needed.
enabled: false
## Provide a full name override for node exporter.
# fullnameOverride: ""
## If true, node-exporter pods share the host network namespace
hostNetwork: true
## If true, node-exporter pods share the host PID namespace
hostPID: true
## node-exporter dns policy
dnsPolicy: ClusterFirstWithHostNet
## node-exporter container name
name: node-exporter
## node-exporter container image
repository: prom/node-exporter
tag: v1.8.2
pullPolicy: IfNotPresent
## node-exporter priorityClassName
priorityClassName: ""
## Custom Update Strategy
type: RollingUpdate
## Additional node-exporter container arguments
extraArgs: {}
## Additional node-exporter hostPath mounts
extraHostPathMounts: []
# - name: textfile-dir
# mountPath: /srv/txt_collector
# hostPath: /var/lib/node-exporter
# readOnly: true
# mountPropagation: HostToContainer
extraConfigmapMounts: []
# - name: certs-configmap
# mountPath: /prometheus
# configMap: certs-configmap
# readOnly: true
## Set a custom affinity for node-exporter
# affinity:
## Node tolerations for node-exporter scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for node-exporter pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
## Annotations to be added to node-exporter pods
podAnnotations: {}
## Annotations to be added to the node-exporter DaemonSet
deploymentAnnotations: {}
## Labels to be added to node-exporter pods
labels: {}
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
enabled: false
maxUnavailable: 1
## node-exporter resource limits & requests
## Ref: https://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# cpu: 200m
# memory: 50Mi
# requests:
# cpu: 100m
# memory: 30Mi
## Security context to be added to node-exporter pods
securityContext: {}
# runAsUser: 0
prometheus.io/scrape: "true"
labels: {}
# Exposed as a headless service:
# https://kubernetes.io/docs/concepts/services-networking/service/#headless-services
clusterIP: None
## List of IP addresses at which the node-exporter service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
externalIPs: []
hostPort: 9100
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 9100
type: ClusterIP
# Install Prometheus Push Gateway.
## If false, pushgateway will not be installed
enabled: false
## Provide a full name override for Prometheus push gateway.
# fullnameOverride: ""
## Use an alternate scheduler, e.g. "stork".
## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
# schedulerName:
## pushgateway container name
name: pushgateway
## pushgateway container image
repository: prom/pushgateway
tag: v1.9.0
pullPolicy: IfNotPresent
## pushgateway priorityClassName
priorityClassName: ""
## Additional pushgateway container arguments
## for example: persistence.file: /data/pushgateway.data
extraArgs: {}
## If true, pushgateway Ingress will be created
enabled: false
## pushgateway Ingress annotations
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: 'true'
## pushgateway Ingress hostnames with optional path
## Must be provided if Ingress is enabled
hosts: []
# - pushgateway.domain.com
# - domain.com/pushgateway
## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## pushgateway Ingress TLS configuration
## Secrets must be manually created in the namespace
tls: []
# - secretName: prometheus-alerts-tls
# hosts:
# - pushgateway.domain.com
## Node tolerations for pushgateway scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
## Node labels for pushgateway pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
nodeSelector: {}
## Annotations to be added to pushgateway pods
podAnnotations: {}
replicaCount: 1
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
enabled: false
maxUnavailable: 1
## pushgateway resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
resources: {}
# limits:
# cpu: 10m
# memory: 32Mi
# requests:
# cpu: 10m
# memory: 32Mi
## Security context to be added to push-gateway pods
runAsUser: 1001
runAsNonRoot: true
prometheus.io/probe: pushgateway
labels: {}
clusterIP: ""
## List of IP addresses at which the pushgateway service is available
## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 9091
type: ClusterIP
type: Recreate
rollingUpdate: null
## If true, pushgateway will create/use a Persistent Volume Claim
## If false, use emptyDir
enabled: true
## pushgateway data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
- ReadWriteOnce
## pushgateway data Persistent Volume Claim annotations
annotations: {}
## pushgateway data Persistent Volume existing claim name
## Requires pushgateway.persistentVolume.enabled: true
## If defined, PVC must be created manually before volume will be bound
existingClaim: ""
## pushgateway data Persistent Volume mount root path
mountPath: /data
## pushgateway data Persistent Volume size
size: 2Gi
## pushgateway data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
# storageClass: "-"
## pushgateway data Persistent Volume Binding Mode
## If defined, volumeBindingMode: <volumeBindingMode>
## If undefined (the default) or set to null, no volumeBindingMode spec is
## set, choosing the default mode.
# volumeBindingMode: ""
## Subdirectory of pushgateway data Persistent Volume to mount
## Useful if the volume's root directory is not empty
subPath: ""
## Alerts configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
alerting_rules.yml: {}
# groups:
# - name: Instances
# rules:
# - alert: InstanceDown
# expr: up == 0
# for: 5m
# labels:
# severity: page
# annotations:
# description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
# summary: 'Instance {{ $labels.instance }} down'
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml
alerts: {}
## Records configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
recording_rules.yml: {}
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
## Below two files are DEPRECATED will be removed from this default values file
- /etc/config/rules
- /etc/config/alerts
- job_name: prometheus
- targets:
- localhost:9090
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
- job_name: 'kubernetes-nodes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
- role: node
# This configuration will work only on kubelet 1.7.3+
# As the scrape endpoints for cAdvisor have changed
# if you are using older version you need to change the replacement to
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
# more info here https://github.com/coreos/prometheus-operator/issues/633
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
- source_labels: [__name__]
regex: (container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_receive_errors_total|container_network_transmit_errors_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_memory_usage_bytes|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_periods_total|container_fs_inodes_free|container_fs_inodes_total|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_fs_inodes_free|container_fs_inodes_total|container_fs_usage_bytes|container_fs_limit_bytes|container_spec_cpu_shares|container_spec_memory_limit_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_fs_reads_bytes_total|container_network_receive_bytes_total|container_fs_writes_bytes_total|container_fs_reads_bytes_total|cadvisor_version_info|kubecost_pv_info)
action: keep
- source_labels: [container]
target_label: container_name
regex: (.+)
action: replace
- source_labels: [pod]
target_label: pod_name
regex: (.+)
action: replace
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
- role: node
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics
- source_labels: [__name__]
regex: (kubelet_volume_stats_used_bytes) # this metric is in alpha
action: keep
# Scrape config for service endpoints.
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
- role: endpoints
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_endpoints_name]
action: keep
regex: (.*node-exporter|kubecost-network-costs)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: kubernetes_node
- source_labels: [__name__]
regex: (container_cpu_allocation|container_cpu_usage_seconds_total|container_fs_limit_bytes|container_fs_writes_bytes_total|container_gpu_allocation|container_memory_allocation_bytes|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|DCGM_FI_DEV_GPU_UTIL|deployment_match_labels|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_ready|kube_deployment_spec_replicas|kube_deployment_status_replicas|kube_deployment_status_replicas_available|kube_job_status_failed|kube_namespace_annotations|kube_namespace_labels|kube_node_info|kube_node_labels|kube_node_status_allocatable|kube_node_status_allocatable_cpu_cores|kube_node_status_allocatable_memory_bytes|kube_node_status_capacity|kube_node_status_capacity_cpu_cores|kube_node_status_capacity_memory_bytes|kube_node_status_condition|kube_persistentvolume_capacity_bytes|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_limits_cpu_cores|kube_pod_container_resource_limits_memory_bytes|kube_pod_container_resource_requests|kube_pod_container_resource_requests_cpu_cores|kube_pod_container_resource_requests_memory_bytes|kube_pod_container_status_restarts_total|kube_pod_container_status_running|kube_pod_container_status_terminated_reason|kube_pod_labels|kube_pod_owner|kube_pod_status_phase|kube_replicaset_owner|kube_statefulset_replicas|kube_statefulset_status_replicas|kubecost_cluster_info|kubecost_cluster_management_cost|kubecost_cluster_memory_working_set_bytes|kubecost_load_balancer_cost|kubecost_network_internet_egress_cost|kubecost_network_region_egress_cost|kubecost_network_zone_egress_cost|kubecost_node_is_spot|kubecost_pod_network_egress_bytes_total|node_cpu_hourly_cost|node_cpu_seconds_total|node_disk_reads_completed|node_disk_reads_completed_total|node_disk_writes_completed|node_disk_writes_completed_total|node_filesystem_device_error|node_gpu_count|node_gpu_hourly_cost|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_network_transmit_bytes_total|node_ram_hourly_cost|node_total_hourly_cost|pod_pvc_allocation|pv_hourly_cost|service_selector_labels|statefulSet_match_labels|kubecost_pv_info|up)
action: keep
# prometheus.yml: # Sample block -- enable if using an in cluster durable store.
# remote_write:
# - url: "http://pgprometheus-adapter:9201/write"
# write_relabel_configs:
# - source_labels: [__name__]
# regex: 'container_.*_allocation|container_.*_allocation_bytes|.*_hourly_cost|kube_pod_container_resource_requests{resource="memory", unit="byte"}|container_memory_working_set_bytes|kube_pod_container_resource_requests{resource="cpu", unit="core"}|kube_pod_container_resource_requests|pod_pvc_allocation|kube_namespace_labels|kube_pod_labels'
# action: keep
# queue_config:
# max_samples_per_send: 1000
# remote_read:
# - url: "http://pgprometheus-adapter:9201/read"
- name: CPU
- expr: sum(rate(container_cpu_usage_seconds_total{container!=""}[5m]))
record: cluster:cpu_usage:rate5m
- expr: rate(container_cpu_usage_seconds_total{container!=""}[5m])
record: cluster:cpu_usage_nosum:rate5m
- expr: avg(irate(container_cpu_usage_seconds_total{container!="POD", container!=""}[5m])) by (container,pod,namespace)
record: kubecost_container_cpu_usage_irate
- expr: sum(container_memory_working_set_bytes{container!="POD",container!=""}) by (container,pod,namespace)
record: kubecost_container_memory_working_set_bytes
- expr: sum(container_memory_working_set_bytes{container!="POD",container!=""})
record: kubecost_cluster_memory_working_set_bytes
- name: Savings
- expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod))
record: kubecost_savings_cpu_allocation
daemonset: "false"
- expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod)) / sum(kube_node_info)
record: kubecost_savings_cpu_allocation
daemonset: "true"
- expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod))
record: kubecost_savings_memory_allocation_bytes
daemonset: "false"
- expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod)) / sum(kube_node_info)
record: kubecost_savings_memory_allocation_bytes
daemonset: "true"
# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager
# useful in H/A prometheus with different external labels but the same alerts
# alert_relabel_configs:
# - source_labels: [dc]
# regex: (.+)\d+
# target_label: dc
## Enable creation of NetworkPolicy resources.
enabled: false
## Optional daemonset to more accurately attribute network costs to the correct workload
## https://docs.kubecost.com/install-and-configure/advanced-configuration/network-costs-configuration
enabled: false
repository: gcr.io/kubecost1/kubecost-network-costs
tag: v0.17.6
imagePullPolicy: IfNotPresent
type: RollingUpdate
# For existing Prometheus Installs, use the serviceMonitor: or prometheusScrape below.
# the below setting annotates the networkCost service endpoints for each of the network-costs pods.
# The Service is annotated with prometheus.io/scrape: "true" to automatically get picked up by the prometheus config.
# NOTE: Setting this option to true and leaving the above extraScrapeConfig "job_name: kubecost-networking" configured will cause the
# NOTE: pods to be scraped twice.
prometheusScrape: false
# Traffic Logging will enable logging the top 5 destinations for each source
# every 30 minutes.
trafficLogging: true
logLevel: info
# Port will set both the containerPort and hostPort to this value.
# These must be identical due to network-costs being run on hostNetwork
port: 3001
# this daemonset can use significant resources on large clusters: https://guide.kubecost.com/hc/en-us/articles/4407595973527-Network-Traffic-Cost-Allocation
limits: # remove the limits by setting cpu: null
cpu: 500m # can be less, will depend on cluster size
# memory: it is not recommended to set a memory limit
cpu: 50m
memory: 20Mi
extraArgs: []
# Configuration for traffic destinations, including specific classification
# for IPs and CIDR blocks. This configuration will act as an override to the
# automatic classification provided by network-costs.
# In Zone contains a list of address/range that will be
# classified as in zone.
# Loopback Addresses in "IANA IPv4 Special-Purpose Address Registry"
- ""
# IPv4 Link Local Address Space
- ""
# Private Address Ranges in RFC-1918
- "" # Remove this entry if using Multi-AZ Kubernetes
- ""
- ""
# In Region contains a list of address/range that will be
# classified as in region. This is synonymous with cross
# zone traffic, where the regions between source and destinations
# are the same, but the zone is different.
in-region: []
# Cross Region contains a list of address/range that will be
# classified as non-internet egress from one region to another.
cross-region: []
# Internet contains a list of address/range that will be
# classified as internet traffic. This is synonymous with traffic
# that cannot be classified within the cluster.
# NOTE: Internet classification filters are executed _after_
# NOTE: direct-classification, but before in-zone, in-region,
# NOTE: and cross-region.
internet: []
# Direct Classification specifically maps an ip address or range
# to a region (required) and/or zone (optional). This classification
# takes priority over in-zone, in-region, and cross-region configurations.
direct-classification: []
# - region: "us-east1"
# zone: "us-east1-c"
# ips:
# - ""
# google-cloud-services: when set to true, enables labeling traffic metrics with google cloud
# service endpoints
google-cloud-services: true
# amazon-web-services: when set to true, enables labeling traffic metrics with amazon web service
# endpoints.
amazon-web-services: true
# azure-cloud-services: when set to true, enables labeling traffic metrics with azure cloud service
# endpoints
azure-cloud-services: true
# user defined services provide a way to define custom service endpoints which will label traffic metrics
# falling within the defined address range.
# services:
# - service: "test-service-1"
# ips:
# - ""
# - service: "test-service-2"
# ips:
# - ""
# - ""
## Node tolerations for server scheduling to nodes with taints
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
tolerations: []
# - key: "key"
# operator: "Equal|Exists"
# value: "value"
# effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)"
affinity: {}
annotations: {}
labels: {}
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
## PodMonitor
## Allows scraping of network metrics from a dedicated prometheus operator setup
enabled: false
additionalLabels: {}
# match the default extraScrapeConfig
additionalLabels: {}
nodeSelector: {}
annotations: {}
healthCheckProbes: {}
# readinessProbe:
# tcpSocket:
# port: 3001
# initialDelaySeconds: 5
# periodSeconds: 10
# failureThreshold: 5
# livenessProbe:
# tcpSocket:
# port: 3001
# initialDelaySeconds: 5
# periodSeconds: 10
# failureThreshold: 5
additionalSecurityContext: {}
# readOnlyRootFilesystem: true
## Kubecost Deployment Configuration
## Used for HA mode in Business & Enterprise tier
replicas: 1
# deploymentStrategy:
# rollingUpdate:
# maxSurge: 1
# maxUnavailable: 1
# type: RollingUpdate
labels: {}
annotations: {}
## Kubecost Forecasting forecasts future cost patterns based on historical
## patterns observed by Kubecost.
enabled: true
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for the forecasting
# container.
# Example: fullImageName: gcr.io/kubecost1/forecasting:v0.0.1
fullImageName: gcr.io/kubecost1/kubecost-modeling:v0.1.16
imagePullPolicy: IfNotPresent
# Resource specification block for the forecasting container.
cpu: 200m
memory: 300Mi
cpu: 1500m
memory: 1Gi
# Set environment variables for the forecasting container as key/value pairs.
# -t is the worker timeout which primarily affects model training time;
# if it is not high enough, training workers may die mid training
"GUNICORN_CMD_ARGS": "--log-level info -t 1200"
# Define a priority class for the forecasting Deployment.
enabled: false
name: ""
# Define a nodeSelector for the forecasting Deployment.
nodeSelector: {}
# Define tolerations for the forecasting Deployment.
tolerations: []
# Define Pod affinity for the forecasting Deployment.
affinity: {}
# Define a readiness probe for the forecasting container
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
# Define a liveness probe for the forecasting container.
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## The Kubecost Aggregator is the primary query backend for Kubecost
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl/aggregator
# deployMethod determines how Aggregator is deployed. Current options are
# "singlepod" (within cost-analyzer Pod) "statefulset" (separate
# StatefulSet), and "disabled". Only use "disabled" if this is a secondary
# Federated ETL cluster which does not need to answer queries.
deployMethod: singlepod
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for aggregator.
# fullImageName:
imagePullPolicy: IfNotPresent
# For legacy configuration support, `enabled: true` overrides deployMethod
# and causes `deployMethod: "statefulset"`
enabled: false
# Replicas sets the number of Aggregator replicas. It only has an effect if
# `deployMethod: "statefulset"`
replicas: 1
logLevel: info
# stagingEmptyDirSizeLimit changes how large the "staging"
# /var/configs/waterfowl emptyDir is. It only takes effect in StatefulSet
# configurations of Aggregator, other configurations are unaffected.
# It should be set to approximately 8x the size of the largest bingen file in
# object storage. For example, if your largest bingen file is a daily
# Allocation file with size 300MiB, this value should be set to approximately
# 2400Mi. In most environments, the default should suffice.
stagingEmptyDirSizeLimit: 2Gi
# this is the number of partitions the datastore is split into for copying
# the higher this number, the lower the ram usage but the longer it takes for
# new data to show in the kubecost UI
# set to 0 for max partitioning (minimum possible ram usage, but the slowest)
# the default of 25 is sufficient for 95%+ of users. This should only be modified
# after consulting with Kubecost's support team
numDBCopyPartitions: 25
# How many threads the read database is configured with (i.e. Kubecost API /
# UI queries). If increasing this value, it is recommended to increase the
# aggregator's memory requests & limits.
# default: 1
dbReadThreads: 1
# How many threads the write database is configured with (i.e. ingestion of
# new data from S3). If increasing this value, it is recommended to increase
# the aggregator's memory requests & limits.
# default: 1
dbWriteThreads: 1
# How many threads to use when ingesting Asset/Allocation/CloudCost data
# from the federated store bucket. In most cases the default is sufficient,
# but can be increased if trying to backfill historical data.
# default: 1
dbConcurrentIngestionCount: 1
# Memory limit applied to read database and write database connections. The
# default of "no limit" is appropriate when first establishing a baseline of
# resource usage required. It is eventually recommended to set these values
# such that dbMemoryLimit + dbWriteMemoryLimit < the total memory available
# to the aggregator pod.
# default: 0GB is no limit
dbMemoryLimit: 0GB
dbWriteMemoryLimit: 0GB
# How much data to ingest from the federated store bucket, and how much data
# to keep in the DB before rolling the data off.
# Note: If increasing this value to backfill historical data, it will take
# time to gradually ingest and process those historical ETL files. Consider
# also increasing the resources available to the aggregator as well as the
# refresh and concurrency env vars.
# default: 91
etlDailyStoreDurationDays: 91
# How much hourly data to ingest from the federated store bucket, and how much
# to keep in the DB before rolling the data off.
# In high scale environments setting this to `0` can improve performance if hourly
# resolution is not a requirement.
# default: 49
etlHourlyStoreDurationHours: 49
# How much container resource usage data to retain in the DB, in terms of days.
# In high scale environments setting this to `0` can improve performance if hourly
# resolution is not a requirement.
# default: 1
containerResourceUsageRetentionDays: 1
# Trim memory on close, only change if advised by Kubecost support.
dbTrimMemoryOnClose: true
storageClass: "" # default storage class
storageRequest: 1Gi
storageClass: "" # default storage class
storageRequest: 128Gi
resources: {}
# requests:
# cpu: 1000m
# memory: 1Gi
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## Set additional environment variables for the aggregator pod
# extraEnv:
# value: "some_value"
## Add a priority class to the aggregator pod
# priority:
# enabled: false
# name: ""
## Optional - add extra ports to the aggregator container. For kubecost development purposes only - not recommended for users.
# extraPorts: []
# - name: debug
# port: 40000
# targetPort: 40000
# containerPort: 40000
## Define a securityContext for the aggregator pod. This will take highest precedence.
# securityContext: {}
## Define the container-level security context for the aggregator pod. This will take highest precedence.
# containerSecurityContext: {}
## Provide a Service Account name for aggregator.
# serviceAccountName: ""
## Define a nodeSelector for the aggregator pod
# nodeSelector: {}
## Define tolerations for the aggregator pod
# tolerations: []
## Define Pod affinity for the aggregator pod
# affinity: {}
## Define extra volumes for the aggregator pod
# extraVolumes: []
## Define extra volumemounts for the aggregator pod
# extraVolumeMounts: []
## Creates a new container/pod to retrieve CloudCost data. By default it uses
## the same serviceaccount as the cost-analyzer pod. A custom serviceaccount
## can be specified.
# The cloudCost component of Aggregator depends on
# kubecostAggregator.deployMethod:
# kA.dM = "singlepod" -> cloudCost is run as container inside cost-analyzer
# kA.dM = "statefulset" -> cloudCost is run as single-replica Deployment
resources: {}
# requests:
# cpu: 1000m
# memory: 1Gi
# refreshRateHours:
# queryWindowDays:
# runWindowDays:
# serviceAccountName:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## Add a nodeSelector for aggregator cloud costs
# nodeSelector: {}
## Tolerations for the aggregator cloud costs
# tolerations: []
## Affinity for the aggregator cloud costs
# affinity: {}
## ServiceAccount for the aggregator cloud costs
# serviceAccountName: ""
## Define environment variables for cloud cost
# env: {}
## Define extra volumes for the cloud cost pod
# extraVolumes: []
## Define extra volumemounts for the cloud cost pod
# extraVolumeMounts: []
## Configure the Collections service for aggregator.
# collections:
# cache:
# enabled: false
# Jaeger is an optional container attached to wherever the Aggregator
# container is running. It is used for performance investigation. Enable if
# Kubecost Support asks.
enabled: false
image: jaegertracing/all-in-one
imageVersion: latest
# containerSecurityContext:
labels: {}
## Kubecost Multi-cluster Diagnostics (beta)
## A single view into the health of all agent clusters. Each agent cluster sends
## its diagnostic data to a storage bucket. Future versions may include
## repairing & alerting from the primary.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/multi-cluster-diagnostics
enabled: true
## The primary aggregates all diagnostic data and handles API requests. It's
## also responsible for deleting diagnostic data (on disk & bucket) beyond
## retention. When in readonly mode it does not push its own diagnostic data
## to the bucket.
enabled: false
retention: "7d"
readonly: false
## How frequently to run & push diagnostics. Defaults to 5 minutes.
pollingInterval: "300s"
## Creates a new Diagnostic file in the bucket for every run.
keepDiagnosticHistory: false
## Pushes the cluster's Kubecost Helm Values to the bucket once upon startup.
## This may contain sensitive information and is roughly 30kb per cluster.
collectHelmValues: false
## By default, the Multi-cluster Diagnostics service runs within the
## cost-model container in the cost-analyzer pod. For higher availability, it
## can be run as a separate deployment.
enabled: false
cpu: "10m"
memory: "20Mi"
env: {}
labels: {}
securityContext: {}
containerSecurityContext: {}
nodeSelector: {}
tolerations: []
affinity: {}
## Provide a full name override for the diagnostics Deployment.
# diagnosticsFullnameOverride: ""
# Kubecost Cluster Controller for Right Sizing and Cluster Turndown
enabled: false
repository: gcr.io/kubecost1/cluster-controller
tag: v0.16.9
imagePullPolicy: IfNotPresent
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
# Set custom tolerations for the cluster controller.
tolerations: []
# this configures the Kubecost Cluster Turndown action
# for more details, see documentation at https://github.com/kubecost/cluster-turndown/tree/develop?tab=readme-ov-file#setting-a-turndown-schedule
clusterTurndown: []
# - name: my-schedule
# start: "2024-02-09T00:00:00Z"
# end: "2024-02-09T12:00:00Z"
# repeat: daily
# - name: my-schedule2
# start: "2024-02-09T00:00:00Z"
# end: "2024-02-09T01:00:00Z"
# repeat: weekly
# this configures the Kubecost Namespace Turndown action
# for more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#namespace-turndown
# - name: my-ns-turndown-action
# dryRun: false
# schedule: "0 0 * * *"
# type: Scheduled
# targetObjs:
# - namespace
# keepPatterns:
# - ignorednamespace
# keepLabels:
# turndown: ignore
# params:
# minNamespaceAge: 4h
# this configures the Kubecost Cluster Sizing action
# for more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#cluster-sizing
# startTime: '2024-01-02T15:04:05Z'
# frequencyMinutes: 1440
# lastCompleted: ''
# recommendationParams:
# window: 48h
# architecture: ''
# targetUtilization: 0.8
# minNodeCount: 1
# allowSharedCore: false
# allowCostIncrease: false
# recommendationType: ''
# This configures the Kubecost Continuous Request Sizing Action
# Using this configuration overrides annotation-based configuration of
# Continuous Request Sizing. Annotation configuration will be ignored while
# this configuration method is present in the cluster.
# For more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#automated-request-sizing
# Workloads can be selected by an _exact_ key (namespace, controllerKind,
# controllerName). This will only match a single controller. The cluster
# ID is current irrelevant because Cluster Controller can only modify
# workloads within the cluster it is running in.
# workloads:
# - clusterID: cluster-one
# namespace: my-namespace
# controllerKind: deployment
# controllerName: my-controller
# An alternative to exact key selection is filter selection. The filters
# are syntactically identical to Kubecost's "v2" filters [1] but only
# support a small set of filter fields, those being:
# - namespace
# - controllerKind
# - controllerName
# - label
# - annotation
# If multiple filters are listed, they will be ORed together at the top
# level.
# See the examples below.
# [1] https://docs.kubecost.com/apis/filters-api
# filterConfig:
# - filter: |
# namespace:"abc"+controllerKind:"deployment"
# - filter: |
# controllerName:"abc123"+controllerKind:"daemonset"
# - filter: |
# namespace:"foo"+controllerKind!:"statefulset"
# - filter: |
# namespace:"bar","baz"
# schedule:
# start: "2024-01-30T15:04:05Z"
# frequencyMinutes: 5
# recommendationQueryWindow: "48h"
# lastModified: ''
# targetUtilizationCPU: 0.8 # results in a cpu request setting that is 20% higher than the max seen over last 48h
# targetUtilizationMemory: 0.8 # results in a RAM request setting that is 20% higher than the max seen over last 48h
# If true, will cause all (supported) workloads to be have their requests
# automatically right-sized on a regular basis.
defaultResizeAll: false
# fqdn: kubecost-cluster-controller.kubecost.svc.cluster.local:9731
enabled: true
# Kubecost bug report feature: Logs access/collection limited to .Release.Namespace
# Ref: http://docs.kubecost.com/bug-report
logCollection: true
# Basic frontend analytics
productAnalytics: true
# Report Javascript errors
errorReporting: true
valuesReporting: true
# googleAnalyticsTag allows you to embed your Google Global Site Tag to track usage of Kubecost.
# googleAnalyticsTag is only included in our Enterprise offering.
# googleAnalyticsTag: G-XXXXXXXXX
serviceMonitor: # the kubecost included prometheus uses scrapeConfigs and does not support service monitors. The following options assume an existing prometheus that supports serviceMonitors.
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings: []
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings: []
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
- action: replace
- __meta_kubernetes_namespace
targetLabel: namespace
enabled: false
additionalLabels: {}
supportNFS: false
# initChownDataImage ensures all Kubecost filepath permissions on PV or local storage are set up correctly.
initChownDataImage: "busybox" # Supports a fully qualified Docker image, e.g. registry.hub.docker.com/library/busybox:latest
resources: {}
# requests:
# cpu: "50m"
# memory: "20Mi"
# namespace_datasources: kubecost # override the default namespace here
# namespace_dashboards: kubecost # override the default namespace here
create: true
create: true
name: ""
## Provide a full name override for the Grafana Deployment.
# fullnameOverride: ""
## Provide a name override for the Grafana Deployment.
# nameOverride: ""
## Configure grafana datasources
## ref: http://docs.grafana.org/administration/provisioning/#datasources
# datasources:
# datasources.yaml:
# apiVersion: 1
# datasources:
# - name: prometheus-kubecost
# type: prometheus
# url: http://kubecost-prometheus-server.kubecost.svc.cluster.local
# access: proxy
# isDefault: false
# jsonData:
# httpMethod: POST
# prometheusType: Prometheus
# prometheusVersion: 2.35.0
# timeInterval: 1m
## Number of replicas for the Grafana deployment
replicas: 1
## Deployment strategy for the Grafana deployment
deploymentStrategy: RollingUpdate
## Readiness probe for the Grafana deployment
path: /api/health
port: 3000
## Liveness probe for the Grafana deployment
path: /api/health
port: 3000
initialDelaySeconds: 60
timeoutSeconds: 30
failureThreshold: 10
## Container image settings for the Grafana deployment
repository: cgr.dev/chainguard/grafana
tag: latest
pullPolicy: IfNotPresent
## Optionally specify an array of imagePullSecrets.
## Secrets must be manually created in the namespace.
# pullSecrets:
# - myRegistrKeySecretName
## Pod-level security context for the Grafana deployment. Recommended let global defaults take effect.
securityContext: {}
# runAsUser: 472
# fsGroup: 472
## PriorityClassName for the Grafana deployment
priorityClassName: ""
## Container image settings for Grafana initContainer used to download dashboards. Will only be used when dashboards are present.
repository: curlimages/curl
tag: latest
pullPolicy: IfNotPresent
## Pod Annotations for the Grafana deployment
podAnnotations: {}
## Deployment annotations for the Grafana deployment
annotations: {}
## Expose the Grafana service to be accessed from outside the cluster (LoadBalancer service).
## or access it from within the cluster (ClusterIP service). Set the service type and the port to serve it.
type: ClusterIP
port: 80
annotations: {}
labels: {}
## This template is not needed and is not supported.
## It is here for backwards compatibility.
## Kubecost exposes grafana by default with the
## top level ingress template under /grafana/
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
labels: {}
path: /
pathType: Prefix
- chart-example.local
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
## Resource requests and limits for the Grafana deployment
resources: {}
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
## Node labels for pod assignment of the Grafana deployment
nodeSelector: {}
## Tolerations for pod assignment of the Grafana deployment
tolerations: []
## Affinity for pod assignment of the Grafana deployment
affinity: {}
## Enable persistence using Persistent Volume Claims of the Grafana deployment
enabled: false
# storageClassName: default
# accessModes:
# - ReadWriteOnce
# size: 10Gi
# annotations: {}
# subPath: ""
# existingClaim:
## Admin user for Grafana
adminUser: admin
## Admin password for Grafana
adminPassword: strongpassword
## Use an alternate scheduler for the Grafana deployment
# schedulerName:
## Extra environment variables that will be passed onto Grafana deployment pods
env: {}
## The name of a secret for Grafana in the same Kubernetes namespace which contain values to be added to the environment
## This can be useful for auth tokens, etc
envFromSecret: ""
## Additional Grafana server secret mounts
## Defines additional mounts with secrets. Secrets must be manually created in the namespace.
extraSecretMounts: []
# - name: secret-files
# mountPath: /etc/secrets
# secretName: grafana-secret-files
# readOnly: true
## List of Grafana plugins
plugins: []
# - digrich-bubblechart-panel
# - grafana-clock-panel
## Grafana dashboard providers
## ref: http://docs.grafana.org/administration/provisioning/#dashboards
## `path` must be /var/lib/grafana/dashboards/<provider_name>
dashboardProviders: {}
# dashboardproviders.yaml:
# apiVersion: 1
# providers:
# - name: 'default'
# orgId: 1
# folder: ''
# type: file
# disableDeletion: false
# editable: true
# options:
# path: /var/lib/grafana/dashboards/default
## Configure Grafana dashboard to import
## NOTE: To use dashboards you must also enable/configure dashboardProviders
## ref: https://grafana.com/dashboards
## dashboards per provider, use provider name as key.
dashboards: {}
# default:
# prometheus-stats:
# gnetId: 3662
# revision: 2
# datasource: Prometheus
## Reference to external Grafana ConfigMap per provider. Use provider name as key and ConfiMap name as value.
## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both.
## ConfigMap data example:
## data:
## example-dashboard.json: |
dashboardsConfigMaps: {}
# default: ""
## LDAP Authentication for Grafana can be enabled with the following values on grafana.ini
## NOTE: Grafana will fail to start if the value for ldap.toml is invalid
# auth.ldap:
# enabled: true
# allow_sign_up: true
# config_file: /etc/grafana/ldap.toml
## Grafana's LDAP configuration
## Templated by the template in _helpers.tpl
## NOTE: To enable the grafana.ini must be configured with auth.ldap.enabled
## ref: http://docs.grafana.org/installation/configuration/#auth-ldap
## ref: http://docs.grafana.org/installation/ldap/#configuration
# `existingSecret` is a reference to an existing secret containing the ldap configuration
# for Grafana in a key `ldap-toml`.
existingSecret: ""
# `config` is the content of `ldap.toml` that will be stored in the created secret
config: ""
# config: |-
# verbose_logging = true
# [[servers]]
# host = "my-ldap-server"
# port = 636
# use_ssl = true
# start_tls = false
# ssl_skip_verify = false
# bind_dn = "uid=%s,ou=users,dc=myorg,dc=com"
## Grafana's SMTP configuration
## NOTE: To enable, grafana.ini must be configured with smtp.enabled
## ref: http://docs.grafana.org/installation/configuration/#smtp
# `existingSecret` is a reference to an existing secret containing the smtp configuration
# for Grafana in keys `user` and `password`.
existingSecret: ""
## Grafana sidecars that collect the configmaps with specified label and stores the included files them into the respective folders
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
repository: cgr.dev/chainguard/k8s-sidecar
tag: latest
pullPolicy: IfNotPresent
resources: {}
enabled: true
# label that the configmaps with dashboards are marked with
label: grafana_dashboard
labelValue: "1"
# set sidecar ERROR_THROTTLE_SLEEP env var from default 5s to 0s -> fixes https://github.com/kubecost/cost-analyzer-helm-chart/issues/877
annotations: {}
error_throttle_sleep: 0
folder: /tmp/dashboards
# dataSourceFilename: foo.yml # If you need to change the name of the datasource file
enabled: false
error_throttle_sleep: 0
# label that the configmaps with datasources are marked with
label: grafana_datasource
## Grafana's primary configuration
## NOTE: values in map will be converted to ini format
## ref: http://docs.grafana.org/installation/configuration/
## For grafana to be accessible, add the path to root_url. For example, if you run kubecost at www.foo.com:9090/kubecost
## set root_url to "%(protocol)s://%(domain)s:%(http_port)s/kubecost/grafana". No change is necessary here if kubecost runs at a root URL
serve_from_sub_path: false # Set to false on Grafana v10+
root_url: "%(protocol)s://%(domain)s:%(http_port)s/grafana"
data: /var/lib/grafana/data
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
check_for_updates: true
mode: console
url: https://grafana.net
enabled: true
org_role: Editor
org_name: Main Org.
create: true # Set this to false if you're bringing your own service account.
annotations: {}
# name: kc-test
useAwsStore: false
imageNameAndVersion: gcr.io/kubecost1/awsstore:latest # Name and version of the container image for AWSStore.
createServiceAccount: false
## PriorityClassName
## Ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass
priorityClassName: ""
# Use a custom nodeSelector for AWSStore.
nodeSelector: {}
# kubernetes.io/arch: amd64
## Annotations for the AWSStore ServiceAccount.
annotations: {}
## Federated ETL Architecture
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl
## If true, installs the minimal set of components required for a Federated ETL cluster.
agentOnly: false
## If true, push ETL data to the federated storage bucket
federatedCluster: false
## If true, this cluster will be able to read from the federated-store but will
## not write to it. This is useful in situations when you want to deploy a
## primary cluster, but don't want the primary cluster's ETL data to be
## pushed to the bucket
readOnlyPrimary: false
## If true, changes the dir of S3 backup to the Federated combined store.
## Commonly used when transitioning from Thanos to Federated ETL architecture.
redirectS3Backup: false
## If true, will query metrics from a central PromQL DB (e.g. Amazon Managed
## Prometheus)
useMultiClusterDB: false
## Kubecost Admission Controller (beta feature)
## To use this feature, ensure you have run the `create-admission-controller.sh`
## script. This generates a k8s secret with TLS keys/certificats and a
## corresponding CA bundle.
enabled: false
secretName: webhook-server-tls
caBundle: ${CA_BUNDLE}
# Enables or disables the Cost Event Audit pipeline, which tracks recent changes at cluster level
# and provides an estimated cost impact via the Kubecost Predict API.
# It is disabled by default to avoid problems in high-scale environments.
enabled: false
## Disable updates to kubecost from the frontend UI and via POST request
## This feature is considered beta, entrprise users should use teams:
## https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/teams
# readonly: false
# # These configs can also be set from the Settings page in the Kubecost product
# # UI. Values in this block override config changes in the Settings UI on pod
# # restart
# kubecostProductConfigs:
# # An optional list of cluster definitions that can be added for frontend
# # access. The local cluster is *always* included by default, so this list is
# # for non-local clusters.
# clusters:
# - name: "Cluster A"
# address: http://cluster-a.kubecost.com:9090
# # Optional authentication credentials - only basic auth is currently supported.
# auth:
# type: basic
# # Secret name should be a secret formatted based on: https://github.com/kubecost/docs/blob/main/ingress-examples.md
# secretName: cluster-a-auth
# # Or pass auth directly as base64 encoded user:pass
# data: YWRtaW46YWRtaW4=
# # Or user and pass directly
# user: admin
# pass: admin
# - name: "Cluster B"
# address: http://cluster-b.kubecost.com:9090
# # Enabling customPricesEnabled and defaultModelPricing instructs Kubecost to
# # use these custom monthly resource prices when reporting node costs. Note,
# # that the below configuration is for the monthly cost of the resource.
# # Kubecost considers there to be 730 hours in a month. Also note, that these
# # configurations will have no effect on metrics emitted such as
# # `node_ram_hourly_cost` or `node_cpu_hourly_cost`.
# # Ref: https://docs.kubecost.com/install-and-configure/install/provider-installations/air-gapped
# customPricesEnabled: false
# defaultModelPricing:
# enabled: true
# CPU: "28.0"
# spotCPU: "4.86"
# RAM: "3.09"
# spotRAM: "0.65"
# GPU: "693.50"
# spotGPU: "225.0"
# storage: "0.04"
# zoneNetworkEgress: "0.01"
# regionNetworkEgress: "0.01"
# internetNetworkEgress: "0.12"
# # The cluster profile represents a predefined set of parameters to use when calculating savings.
# # Possible values are: [ development, production, high-availability ]
# clusterProfile: production
# spotLabel: lifecycle
# spotLabelValue: Ec2Spot
# gpuLabel: gpu
# gpuLabelValue: true
# alibabaServiceKeyName: ""
# alibabaServiceKeyPassword: ""
# awsServiceKeyName: ACCESSKEYID
# awsServiceKeyPassword: fakepassword # Only use if your values.yaml are stored encrypted. Otherwise provide an existing secret via serviceKeySecretName
# awsSpotDataRegion: us-east-1
# awsSpotDataBucket: spot-data-feed-s3-bucket
# awsSpotDataPrefix: dev
# athenaProjectID: "530337586277" # The AWS AccountID where the Athena CUR is. Generally your masterpayer account
# athenaBucketName: "s3://aws-athena-query-results-530337586277-us-east-1"
# athenaRegion: us-east-1
# athenaDatabase: athenacurcfn_athena_test1
# athenaTable: "athena_test1"
# athenaWorkgroup: "primary" # The default workgroup in AWS is 'primary'
# masterPayerARN: ""
# projectID: "123456789" # Also known as AccountID on AWS -- the current account/project that this instance of Kubecost is deployed on.
# gcpSecretName: gcp-secret # Name of a secret representing the gcp service key
# gcpSecretKeyName: compute-viewer-kubecost-key.json # Name of the secret's key containing the gcp service key
# bigQueryBillingDataDataset: billing_data.gcp_billing_export_v1_01AC9F_74CF1D_5565A2
# labelMappingConfigs: # names of k8s labels or annotations used to designate different allocation concepts
# enabled: true
# owner_label: "owner"
# team_label: "team"
# department_label: "dept"
# product_label: "product"
# environment_label: "env"
# namespace_external_label: "kubernetes_namespace" # external labels/tags are used to map external cloud costs to kubernetes concepts
# cluster_external_label: "kubernetes_cluster"
# controller_external_label: "kubernetes_controller"
# product_external_label: "kubernetes_label_app"
# service_external_label: "kubernetes_service"
# deployment_external_label: "kubernetes_deployment"
# owner_external_label: "kubernetes_label_owner"
# team_external_label: "kubernetes_label_team"
# environment_external_label: "kubernetes_label_env"
# department_external_label: "kubernetes_label_department"
# statefulset_external_label: "kubernetes_statefulset"
# daemonset_external_label: "kubernetes_daemonset"
# pod_external_label: "kubernetes_pod"
# grafanaURL: ""
# # Provide a mapping from Account ID to a readable Account Name in a key/value object. Provide Account IDs as they are displayed in CloudCost
# # as the 'key' and the Account Name associated with it as the 'value'
# cloudAccountMapping:
# clusterName: "" # clusterName is the default context name in settings.
# clusterAccountID: "" # Manually set Account property for assets
# currencyCode: "USD" # official support for USD, AUD, BRL, CAD, CHF, CNY, DKK, EUR, GBP, IDR, INR, JPY, NOK, PLN, SEK
# azureBillingRegion: US # Represents 2-letter region code, e.g. West Europe = NL, Canada = CA. ref: https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
# azureSubscriptionID: 0bd50fdf-c923-4e1e-850c-196dd3dcc5d3
# azureClientID: f2ef6f7d-71fb-47c8-b766-8d63a19db017
# azureTenantID: 72faf3ff-7a3f-4597-b0d9-7b0b201bb23a
# azureClientPassword: fake key # Only use if your values.yaml are stored encrypted. Otherwise provide an existing secret via serviceKeySecretName
# azureOfferDurableID: "MS-AZR-0003p"
# discount: "" # percentage discount applied to compute
# negotiatedDiscount: "" # custom negotiated cloud provider discount
# standardDiscount: "" # custom negotiated cloud provider discount, applied to all incoming asset compute costs in a federated environment. Overrides negotiatedDiscount on any cluster in the federated environment.
# defaultIdle: false
# serviceKeySecretName: "" # Use an existing AWS or Azure secret with format as in aws-service-key-secret.yaml or azure-service-key-secret.yaml. Leave blank if using createServiceKeySecret
# createServiceKeySecret: true # Creates a secret representing your cloud service key based on data in values.yaml. If you are storing unencrypted values, add a secret manually
# sharedNamespaces: "" # namespaces with shared workloads, example value: "kube-system\,ingress-nginx\,kubecost\,monitoring"
# sharedOverhead: "" # value representing a fixed external cost per month to be distributed among aggregations.
# shareTenancyCosts: true # enable or disable sharing costs such as cluster management fees (defaults to "true" on Settings page)
# metricsConfigs: # configuration for metrics emitted by Kubecost
# disabledMetrics: [] # list of metrics that Kubecost will not emit. Note that disabling metrics can lead to unexpected behavior in the cost-model.
# productKey: # Apply enterprise product license
# enabled: false
# key: ""
# secretname: productkeysecret # Reference an existing k8s secret created from a file named productkey.json of format { "key": "enterprise-key-here" }. If the secretname is specified, a configmap with the key will not be created.
# mountPath: "/some/custom/path/productkey.json" # (use instead of secretname) Declare the path at which the product key file is mounted (eg. by a secrets provisioner). The file must be of format { "key": "enterprise-key-here" }.
# # The following block enables the use of a custom SMTP server which overrides Kubecost's built-in, external SMTP server for alerts and reports
# smtp:
# config: |
# {
# "sender_email": "",
# "host": "",
# "port": 587,
# "authentication": true,
# "username": "",
# "password": "",
# "secure": true
# }
# secretname: smtpconfigsecret # Reference an existing k8s secret created from a file named smtp.json of format specified by config above. If the secretname is specified, a configmap with the key will not be created.
# mountPath: "/some/custom/path/smtp.json" # (use instead of secretname) Declare the path at which the SMTP config file is mounted (eg. by a secrets provisioner). The file must be of format specified by config above.
# carbonEstimates: false # Enables Kubecost beta carbon estimation endpoints /assets/carbon and /allocations/carbon
# The below options to hide UI elements are only supported in Enterprise
# hideDiagnostics: false # useful if the primary is not monitored. Supported in limited environments.
# hideOrphanedResources: false # OrphanedResources works on the primary-cluster's cloud-provider only.
# hideKubecostActions: false
# hideReservedInstances: false
# hideSpotCommander: false
# hideUnclaimedVolumes: false
# hideCloudIntegrationsUI: false
# hideBellIcon: false
# hideTeams: false
# savingsRecommendationsAllowLists: # Define select list of instance types to be evaluated in computing Savings Recommendations
# AWS: []
# GCP: []
# Azure: []
## Specify an existing Kubernetes Secret holding the cloud integration information. This Secret must contain
## a key with name `cloud-integration.json` and the contents must be in a specific format. It is expected
## to exist in the release Namespace. This is mutually exclusive with cloudIntegrationJSON where only one must be defined.
# cloudIntegrationSecret: "cloud-integration"
## Specify the cloud integration information in JSON form if pointing to an existing Secret is not desired or you'd rather
## define the cloud integration information directly in the values file. This will result in a new Secret being created
## named `cloud-integration` in the release Namespace. It is mutually exclusive with the cloudIntegrationSecret where only one must be defined.
# cloudIntegrationJSON: |-
# {
# "aws": [
# {
# "athenaBucketName": "s3://AWS_cloud_integration_athenaBucketName",
# "athenaRegion": "AWS_cloud_integration_athenaRegion",
# "athenaDatabase": "AWS_cloud_integration_athenaDatabase",
# "athenaTable": "AWS_cloud_integration_athenaBucketName",
# "projectID": "AWS_cloud_integration_athena_projectID",
# "serviceKeyName": "AWS_cloud_integration_athena_serviceKeyName",
# "serviceKeySecret": "AWS_cloud_integration_athena_serviceKeySecret"
# }
# ],
# "azure": [
# {
# "azureSubscriptionID": "my-subscription-id",
# "azureStorageAccount": "my-storage-account",
# "azureStorageAccessKey": "my-storage-access-key",
# "azureStorageContainer": "my-storage-container"
# }
# ],
# "gcp": [
# {
# "projectID": "my-project-id",
# "billingDataDataset": "detailedbilling.my-billing-dataset",
# "key": {
# "type": "service_account",
# "project_id": "my-project-id",
# "private_key_id": "my-private-key-id",
# "private_key": "my-pem-encoded-private-key",
# "client_email": "my-service-account-name@my-project-id.iam.gserviceaccount.com",
# "client_id": "my-client-id",
# "auth_uri": "auth-uri",
# "token_uri": "token-uri",
# "auth_provider_x509_cert_url": "my-x509-provider-cert",
# "client_x509_cert_url": "my-x509-cert-url"
# }
# }
# ]
# }
# ingestPodUID: false # Enables using UIDs to uniquely ID pods. This requires either Kubecost's replicated KSM metrics, or KSM v2.1.0+. This may impact performance, and changes the default cost-model allocation behavior.
# regionOverrides: "region1,region2,region3" # list of regions which will override default costmodel provider regions
# Explicit names of various ConfigMaps to use. If not set, a default will apply.
# pricingConfigmapName: ""
# productConfigmapName: ""
# smtpConfigmapName: ""
# -- Array of extra K8s manifests to deploy
## Note: Supports use of custom Helm templates
extraObjects: []
# Cloud Billing Integration:
# - apiVersion: v1
# kind: Secret
# metadata:
# name: cloud-integration
# namespace: kubecost
# type: Opaque
# data:
# cloud-integration.json: BASE64_SECRET
# Istio:
# - apiVersion: networking.istio.io/v1alpha3
# kind: VirtualService
# metadata:
# name: my-virtualservice
# spec:
# hosts:
# - kubecost.myorg.com
# gateways:
# - my-gateway
# http:
# - route:
# - destination:
# host: kubecost.kubecost.svc.cluster.local
# port:
# number: 80