rancher-partner-charts/charts/kubecost/cost-analyzer/2.5.2/values.yaml

2420 lines
98 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

global:
# zone: cluster.local (use only if your DNS server doesn't live in the same zone as kubecost)
prometheus:
enabled: true # Kubecost depends on Prometheus data, it is not optional. When enabled: false, Prometheus will not be installed and you must configure your own Prometheus to scrape kubecost as well as provide the fqdn below. -- Warning: Before changing this setting, please read to understand the risks https://docs.kubecost.com/install-and-configure/install/custom-prom
fqdn: http://cost-analyzer-prometheus-server.default.svc # example address of a prometheus to connect to. Include protocol (http:// or https://) Ignored if enabled: true
insecureSkipVerify: false # If true, kubecost will not check the TLS cert of prometheus
# queryServiceBasicAuthSecretName: dbsecret # kubectl create secret generic dbsecret -n kubecost --from-file=USERNAME --from-file=PASSWORD
# queryServiceBearerTokenSecretName: mcdbsecret # kubectl create secret generic mcdbsecret -n kubecost --from-file=TOKEN
kubeRBACProxy: false # If true, kubecost will use kube-rbac-proxy to authenticate with in cluster Prometheus for openshift
grafana:
enabled: true # If false, Grafana will not be installed
domainName: cost-analyzer-grafana.default.svc # example grafana domain Ignored if enabled: true
scheme: "http" # http or https, for the domain name above.
proxy: true # If true, the kubecost frontend will route to your grafana through its service endpoint
# fqdn: cost-analyzer-grafana.default.svc
# Enable only when you are using GCP Marketplace ENT listing. Learn more at https://console.cloud.google.com/marketplace/product/kubecost-public/kubecost-ent
gcpstore:
enabled: false
# Google Cloud Managed Service for Prometheus
gmp:
# Remember to set up these parameters when install the Kubecost Helm chart with `global.gmp.enabled=true` if you want to use GMP self-deployed collection (Recommended) to utilize Kubecost scrape configs.
# If enabling GMP, it is highly recommended to utilize Google's distribution of Prometheus.
# Learn more at https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-unmanaged
# --set prometheus.server.image.repository="gke.gcr.io/prometheus-engine/prometheus" \
# --set prometheus.server.image.tag="v2.35.0-gmp.2-gke.0"
enabled: false # If true, kubecost will be configured to use GMP Prometheus image and query from Google Cloud Managed Service for Prometheus.
prometheusServerEndpoint: http://localhost:8085/ # The prometheus service endpoint used by kubecost. The calls are forwarded through the GMP Prom proxy side car to the GMP database.
gmpProxy:
enabled: false
image: gke.gcr.io/prometheus-engine/frontend:v0.4.1-gke.0 # GMP Prometheus proxy image that serve as an endpoint to query metrics from GMP
imagePullPolicy: IfNotPresent
name: gmp-proxy
port: 8085
projectId: YOUR_PROJECT_ID # example GCP project ID
# Amazon Managed Service for Prometheus
amp:
enabled: false # If true, kubecost will be configured to remote_write and query from Amazon Managed Service for Prometheus.
prometheusServerEndpoint: http://localhost:8005/workspaces/<workspaceId>/ # The prometheus service endpoint used by kubecost. The calls are forwarded through the SigV4Proxy side car to the AMP workspace.
remoteWriteService: https://aps-workspaces.us-west-2.amazonaws.com/workspaces/<workspaceId>/api/v1/remote_write # The remote_write endpoint for the AMP workspace.
sigv4:
region: us-west-2
# access_key: ACCESS_KEY # AWS Access key
# secret_key: SECRET_KEY # AWS Secret key
# role_arn: ROLE_ARN # AWS role arn
# profile: PROFILE # AWS profile
# Mimir Proxy to help Kubecost to query metrics from multi-tenant Grafana Mimir.
# Set `global.mimirProxy.enabled=true` and `global.prometheus.enabled=false` to enable Mimir Proxy.
# You also need to set `global.prometheus.fqdn=http://kubecost-cost-analyzer-mimir-proxy.kubecost.svc:8085/prometheus`
# or `global.prometheus.fqdn=http://{{ template "cost-analyzer.fullname" . }}-mimir-proxy.{{ .Release.Namespace }}.svc:8085/prometheus'
# Learn more at https://grafana.com/docs/mimir/latest/operators-guide/secure/authentication-and-authorization/#without-an-authenticating-reverse-proxy
mimirProxy:
enabled: false
## Annotations to be added to the Mimir Proxy deployment template
annotations: {}
name: mimir-proxy
image: nginxinc/nginx-unprivileged
port: 8085
mimirEndpoint: $mimir_endpoint # Your Mimir query endpoint. If your Mimir query endpoint is http://example.com/prometheus, replace $mimir_endpoint with http://example.com/
orgIdentifier: $your_tenant_ID # Your Grafana Mimir tenant ID
# basicAuth:
# username: user
# password: pwd
## Azure Monitor Managed Service for Prometheus
## Ref: https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/prometheus-remote-write-virtual-machines
ammsp:
enabled: false
prometheusServerEndpoint: http://localhost:8081/
remoteWriteService: $<AMMSP_METRICS_INGESTION_ENDPOINT>
queryEndpoint: $<AMMSP_QUERY_ENDPOINT>
aadAuthProxy:
enabled: false
# per https://github.com/Azure/aad-auth-proxy/releases/tag/0.1.0-main-04-10-2024-7067ac84
image: $<IMAGE> # Example: mcr.microsoft.com/azuremonitor/auth-proxy/prod/aad-auth-proxy/images/aad-auth-proxy:0.1.0-main-04-10-2024-7067ac84
imagePullPolicy: IfNotPresent
name: aad-auth-proxy
port: 8081
audience: https://prometheus.monitor.azure.com/.default
identityType: userAssigned
aadClientId: $<AZURE_MANAGED_IDENTITY_CLIENT_ID>
aadTenantId: $<AZURE_MANAGED_IDENTITY_TENANT_ID>
## Kubecost Alerting
## Ref: http://docs.kubecost.com/alerts
notifications:
# alertConfigs:
# frontendUrl: http://localhost:9090 # Optional
# globalSlackWebhookUrl: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX # Optional
# globalMsTeamsWebhookUrl: https://xxxxx.webhook.office.com/webhookb2/XXXXXXXXXXXXXXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXX # Optional
# globalAlertEmails:
# - recipient@example.com
# - additionalRecipient@example.com
# globalEmailSubject: Custom Subject
# alerts:
# # Daily namespace budget alert on namespace `kubecost`
# - type: budget # supported: budget, recurringUpdate
# threshold: 50 # optional, required for budget alerts
# window: daily # or 1d
# aggregation: namespace
# filter: kubecost
# ownerContact: # optional, overrides globalAlertEmails default
# - owner@example.com
# - owner2@example.com
# slackWebhookUrl: https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX # Optional
# msTeamsWebhookUrl: https://xxxxx.webhook.office.com/webhookb2/XXXXXXXXXXXXXXXXXXXXXXXX/IncomingWebhook/XXXXXXXXXXXXXXXXXXXXXXXX # Optional
# # Daily cluster budget alert on cluster `cluster-one`
# - type: budget
# threshold: 200.8 # optional, required for budget alerts
# window: daily # or 1d
# aggregation: cluster
# filter: cluster-one # does not accept csv
# # Recurring weekly update (weeklyUpdate alert)
# - type: recurringUpdate
# window: weekly # or 7d
# aggregation: namespace
# filter: '*'
# # Recurring weekly namespace update on kubecost namespace
# - type: recurringUpdate
# window: weekly # or 7d
# aggregation: namespace
# filter: kubecost
# # Spend Change Alert
# - type: spendChange # change relative to moving avg
# relativeThreshold: 0.20 # Proportional change relative to baseline. Must be greater than -1 (can be negative)
# window: 1d # accepts d, h
# baselineWindow: 30d # previous window, offset by window
# aggregation: namespace
# filter: kubecost, default # accepts csv
# # Health Score Alert
# - type: health # Alerts when health score changes by a threshold
# window: 10m
# threshold: 5 # Send Alert if health scores changes by 5 or more
# # Kubecost Health Diagnostic
# - type: diagnostic # Alerts when kubecost is unable to compute costs - ie: Prometheus unreachable
# window: 10m
alertmanager: # Supply an alertmanager FQDN to receive notifications from the app.
enabled: false # If true, allow kubecost to write to your alertmanager
fqdn: http://cost-analyzer-prometheus-server.default.svc # example fqdn. Ignored if prometheus.enabled: true
## Kubecost Saved Reports
## Ref: http://docs.kubecost.com/saved-reports
savedReports:
enabled: false # If true, overwrites report parameters set through UI
reports:
- title: "Example Saved Report 0"
window: "today"
aggregateBy: "namespace"
chartDisplay: "category"
idle: "separate"
rate: "cumulative"
accumulate: false # daily resolution
filters: # Ref: https://docs.kubecost.com/apis/filters-api
- key: "cluster" # Ref: https://docs.kubecost.com/apis/filters-api#allocation-apis-request-sizing-v2-api
operator: ":" # Ref: https://docs.kubecost.com/apis/filters-api#filter-operators
value: "dev"
- title: "Example Saved Report 1"
window: "month"
aggregateBy: "controllerKind"
chartDisplay: "category"
idle: "share"
rate: "monthly"
accumulate: false
filters: # Ref: https://docs.kubecost.com/apis/filters-api
- key: "namespace" # Ref: https://docs.kubecost.com/apis/filters-api#allocation-apis-request-sizing-v2-api
operator: "!:" # Ref: https://docs.kubecost.com/apis/filters-api#filter-operators
value: "kubecost"
- title: "Example Saved Report 2"
window: "2020-11-11T00:00:00Z,2020-12-09T23:59:59Z"
aggregateBy: "service"
chartDisplay: "category"
idle: "hide"
rate: "daily"
accumulate: true # entire window resolution
filters: [] # if no filters, specify empty array
assetReports:
enabled: false # If true, overwrites report parameters set through UI
reports:
- title: "Example Asset Report 0"
window: "today"
aggregateBy: "type"
accumulate: false # daily resolution
filters:
- property: "cluster"
value: "cluster-one"
cloudCostReports:
enabled: false # If true, overwrites report parameters set through UI
reports:
- title: "Cloud Cost Report 0"
window: "today"
aggregateBy: "service"
accumulate: false # daily resolution
# filters:
# - property: "service"
# value: "service1" # corresponds to a value to filter cloud cost aggregate by service data on.
podAnnotations: {}
# iam.amazonaws.com/role: role-arn
# Annotations to be added for all controllers (StatefulSets, Deployments, DaemonSets)
annotations: {}
# iam.amazonaws.com/role: role-arn
# Applies these labels to all Deployments, StatefulSets, DaemonSets, and their pod templates.
additionalLabels: {}
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
fsGroup: 1001
runAsGroup: 1001
runAsUser: 1001
fsGroupChangePolicy: OnRootMismatch
containerSecurityContext:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
# Installs custom CA certificates onto Kubecost pods
updateCaTrust:
enabled: false # Set to true to enable the init container for updating CA trust
# Security context settings for the init container.
securityContext:
runAsUser: 0
runAsGroup: 0
runAsNonRoot: false
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
seccompProfile:
type: RuntimeDefault
caCertsSecret: ca-certs-secret # The name of the Secret containing custom CA certificates to mount to the cost-model container.
# caCertsConfig: ca-certs-config # The name of the ConfigMap containing the CA trust configuration.
resources: {} # Resource requests and limits for the init container.
caCertsMountPath: /etc/pki/ca-trust/source/anchors # The path where the custom CA certificates will be mounted in the init container
# Platforms is a higher-level abstraction for platform-specific values and settings.
platforms:
# Deploying to OpenShift (OCP) requires enabling this option.
openshift:
enabled: false # Deploy Kubecost to OpenShift.
route:
enabled: false # Create an OpenShift Route.
annotations: {} # Add annotations to the Route.
# host: kubecost.apps.okd4.example.com # Add a custom host for your Route.
# OPTIONAL. The following configs only to be enabled when using a Prometheus instance already installed in the cluster.
createMonitoringClusterRoleBinding: false # Create a ClusterRoleBinding to grant the Kubecost serviceaccount access to query Prometheus.
createMonitoringResourceReaderRoleBinding: false # Create a Role and Role Binding to allow Prometheus to list and watch Kubecost resources.
monitoringServiceAccountName: prometheus-k8s # Name of the Prometheus serviceaccount to bind to the Resource Reader Role Binding.
monitoringServiceAccountNamespace: openshift-monitoring # Namespace of the Prometheus serviceaccount to bind to the Resource Reader Role Binding.
# Create Security Context Constraint resources for the DaemonSets requiring additional privileges.
scc:
nodeExporter: false # Creates an SCC for Prometheus Node Exporter. This requires Node Exporter be enabled.
networkCosts: false # Creates an SCC for Kubecost network-costs. This requires network-costs be enabled.
# When OpenShift is enabled, the following securityContext will be applied to all resources unless they define their own.
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
# Set options for deploying with CI/CD tools like Argo CD.
cicd:
enabled: false # Set to true when using affected CI/CD tools for access to the below configuration options.
skipSanityChecks: false # If true, skip all sanity/existence checks for resources like Secrets.
## Kubecost Integrations
## Ref: https://docs.kubecost.com/integrations
integrations:
turbonomic:
enabled: false # Set to true to enable the Turbonomic integration
clientId: "" # Client ID generated from the OAuth Client created
clientSecret: "" # Client Secret generated from the OAuth Client created
role: "" # Role that the OAuth Client was created with (e.g. ADMINISTRATOR, SITE_ADMIN, etc.)
host: "" # URL to your turbonomic API. EG: https://turbonomic.example.com/
insecureClient: false # Do not verify certificate
postgres:
enabled: false
runInterval: "12h" # How frequently to run the integration.
databaseHost: "" # REQUIRED. ex: my.postgres.database.azure.com
databasePort: "" # REQUIRED. ex: 5432
databaseName: "" # REQUIRED. ex: postgres
databaseUser: "" # REQUIRED. ex: myusername
databasePassword: "" # REQUIRED. ex: mypassword
databaseSecretName: "" # OPTIONAL. Specify your own k8s secret containing the above credentials. Must have key "creds.json".
## Configure what Postgres table to write to, and what parameters to pass
## when querying Kubecost's APIs. Ensure all parameters are enclosed in
## quotes. Ref: https://docs.kubecost.com/apis/apis-overview
queryConfigs:
allocations: []
# - databaseTable: "kubecost_allocation_data"
# window: "7d"
# aggregate: "namespace"
# idle: "true"
# shareIdle: "true"
# shareNamespaces: "kubecost,kube-system"
# shareLabels: ""
# - databaseTable: "kubecost_allocation_data_by_cluster"
# window: "10d"
# aggregate: "cluster"
# idle: "true"
# shareIdle: "false"
# shareNamespaces: ""
# shareLabels: ""
assets: []
# - databaseTable: "kubecost_assets_data"
# window: "7d"
# aggregate: "cluster"
cloudCosts: []
# - databaseTable: "kubecost_cloudcosts_data"
# window: "7d"
# aggregate: "service"
## Provide a name override for the chart.
# nameOverride: ""
## Provide a full name override option for the chart.
# fullnameOverride: ""
## Provide additional labels for the chart.
# chartLabels:
# app.kubernetes.io/name: kubecost-cost-analyzer
## This flag is only required for users upgrading to a new version of Kubecost.
## The flag is used to ensure users are aware of important
## (potentially breaking) changes included in the new version.
##
upgrade:
toV2: false
# generated at http://kubecost.com/install, used for alerts tracking and free trials
kubecostToken: # ""
# Advanced pipeline for custom prices, enterprise key required
pricingCsv:
enabled: false
location:
provider: "AWS"
region: "us-east-1"
URI: s3://kc-csv-test/pricing_schema.csv # a valid file URI
csvAccessCredentials: pricing-schema-access-secret
## Kubecost SAML (enterprise key required)
## Ref: https://docs.kubecost.com/install-and-configure/advanced-configuration/user-management-saml
saml:
enabled: false
# secretName: ""
# metadataSecretName: "" # One of metadataSecretName or idpMetadataURL must be set. Defaults to idpMetadataURL if set.
# idpMetadataURL: ""
# appRootURL: ""
# authTimeout: 1440 # Number of minutes the JWT will be valid
# redirectURL: "" # Callback URL redirected to after logout
# audienceURI: "" # Usually the same as the appRootURL. Optionally any string uniquely identifying kubecost to your SAML IDP.
# nameIDFormat: "" # If your SAML provider requires a specific nameid format
# isGLUUProvider: false # An additional URL parameter must be appended for GLUU providers
# encryptionCertSecret: "" # K8s secret storing the x509 certificate used to encrypt an Okta SAML response
# decryptionKeySecret: "" # K8s secret storing the private key associated with the encryptionCertSecret
# authSecret: "" # Value of SAML secret used to issue tokens, will be autogenerated as random string if not provided
# authSecretName: "" # Name of K8s secret where the authSecret will be stored. Defaults to "kubecost-saml-secret" if not provided.
rbac:
enabled: false
# groups:
# - name: admin
# enabled: false # If admin is disabled, all SAML users will be able to make configuration changes to the Kubecost frontend
# assertionName: ""
# assertionValues:
# - "admin"
# - "superusers"
# - name: readonly
# enabled: false # If readonly is disabled, all users authorized on SAML will default to readonly
# assertionName: ""
# assertionValues:
# - "readonly"
# - name: editor
# enabled: true # If editor is enabled, editors will be allowed to edit reports/alerts scoped to them, and act as readers otherwise. Users will never default to editor.
# assertionName: ""
# assertionValues:
# - "editor"
## Kubecost OIDC (enterprise key required)
## Ref: https://docs.kubecost.com/install-and-configure/advanced-configuration/user-management-oidc
oidc:
enabled: false
clientID: "" # Application client_id parameter obtained from provider. Used to make requests to server.
clientSecret: "" # Application/client client_secret parameter obtained from provider. Used to make requests to server.
secretName: "kubecost-oidc-secret" # K8s secret where clientsecret will be stored
existingCustomSecret:
enabled: false
name: "" # Name of an existing clientSecret. Overrides the usage of oidc.clientSecret and oidc.secretName.
authURL: "" # Authorization endpoint for your identity provider
loginRedirectURL: "" # Kubecost URL endpoint which handles auth flow
discoveryURL: "" # Your identity provider's endpoint sharing OIDC configuration
skipOnlineTokenValidation: false # If true, validate JWT claims locally
useClientSecretPost: false # If true, only use client_secret_post method. Otherwise attempt to send the secret in both the header and the body.
hostedDomain: "" # Optional, blocks access to the auth domain specified in the hd claim of the provider ID token
rbac:
enabled: false
# groups:
# - name: admin # Admins have permissions to edit Kubecost settings and save reports
# enabled: false
# claimName: "roles" # Kubecost matches this string against the JWT's payload key containing RBAC info (this value is unique across identity providers)
# claimValues: # Kubecost matches these strings with the roles created in your identity provider
# - "admin"
# - "superusers"
# - name: readonly # Readonly users do not have permissions to edit Kubecost settings or save reports.
# enabled: false
# claimName: "roles"
# claimValues:
# - "readonly"
# - name: editor # Editors have permissions to edit reports/alerts and act as readers otherwise
# enabled: false
# claimName: "roles"
# claimValues:
# - "editor"
## Adds the HTTP_PROXY, HTTPS_PROXY, and NO_PROXY environment variables to all
## containers. Typically used in environments that have firewall rules which
## prevent kubecost from accessing cloud provider resources.
## Ref: https://www.oreilly.com/library/view/security-with-go/9781788627917/5ea6a02b-3d96-44b1-ad3c-6ab60fcbbe4f.xhtml
##
systemProxy:
enabled: false
httpProxyUrl: ""
httpsProxyUrl: ""
noProxy: ""
# imagePullSecrets:
# - name: "image-pull-secret"
# imageVersion uses the base image name (image:) but overrides the version
# pulled. It should be avoided. If non-default behavior is needed, use
# fullImageName for the relevant component.
# imageVersion:
kubecostFrontend:
enabled: true
deployMethod: singlepod # haMode or singlepod - haMode is currently only supported with Enterprise tier
haReplicas: 2 # only used with haMode
image: "gcr.io/kubecost1/frontend"
imagePullPolicy: IfNotPresent
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for the frontend.
# fullImageName:
# extraEnv:
# - name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE
# value: "1"
# securityContext:
# readOnlyRootFilesystem: true
resources:
requests:
cpu: "10m"
memory: "55Mi"
deploymentStrategy: {}
readinessProbe:
enabled: true
initialDelaySeconds: 1
periodSeconds: 5
failureThreshold: 6
livenessProbe:
enabled: true
initialDelaySeconds: 1
periodSeconds: 5
failureThreshold: 6
ipv6:
enabled: true # disable if the cluster does not support ipv6
# timeoutSeconds: 600 # should be rarely used, but can be increased if needed
# allow customizing nginx-conf server block
# extraServerConfig: |-
# proxy_busy_buffers_size 512k;
# proxy_buffers 4 512k;
# proxy_buffer_size 256k;
# large_client_header_buffers 4 64k;
# hideDiagnostics: false # useful if the primary is not monitored. Supported in limited environments.
# hideOrphanedResources: false # OrphanedResources works on the primary-cluster's cloud-provider only.
# set to true to set all upstreams to use <service>.<namespace>.svc.cluster.local instead of just <service>.<namespace>
useDefaultFqdn: false
# api:
# fqdn: kubecost-api.kubecost.svc.cluster.local:9001
# model:
# fqdn: kubecost-model.kubecost.svc.cluster.local:9003
# forecasting:
# fqdn: kubecost-forcasting.kubecost.svc.cluster.local:5000
# aggregator:
# fqdn: kubecost-aggregator.kubecost.svc.cluster.local:9004
# cloudCost:
# fqdn: kubecost-cloud-cost.kubecost.svc.cluster.local:9005
# multiClusterDiagnostics:
# fqdn: kubecost-multi-diag.kubecost.svc.cluster.local:9007
# clusterController:
# fqdn: cluster-controller.kubecost.svc.cluster.local:9731
# Kubecost Metrics deploys a separate pod which will emit kubernetes specific metrics required
# by the cost-model. This pod is designed to remain active and decoupled from the cost-model itself.
# However, disabling this service/pod deployment will flag the cost-model to emit the metrics instead.
kubecostMetrics:
# emitPodAnnotations: false
# emitNamespaceAnnotations: false
# emitKsmV1Metrics: true # emit all KSM metrics in KSM v1.
# emitKsmV1MetricsOnly: false # emit only the KSM metrics missing from KSM v2. Advanced users only.
sigV4Proxy:
image: public.ecr.aws/aws-observability/aws-sigv4-proxy:latest
imagePullPolicy: IfNotPresent
name: aps
port: 8005
region: us-west-2 # The AWS region
host: aps-workspaces.us-west-2.amazonaws.com # The hostname for AMP service.
# role_arn: arn:aws:iam::<account>:role/role-name # The AWS IAM role to assume.
extraEnv: # Pass extra env variables to sigV4Proxy
# - name: AWS_ACCESS_KEY_ID
# value: <access_key>
# - name: AWS_SECRET_ACCESS_KEY
# value: <secret_key>
resources: {}
kubecostModel:
image: "gcr.io/kubecost1/cost-model"
imagePullPolicy: IfNotPresent
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for cost-model.
# fullImageName:
# Log level for the cost model container. Options are "trace", "debug", "info", "warn", "error", "fatal", "panic"
logLevel: info
# securityContext:
# readOnlyRootFilesystem: true
# The total number of days the ETL pipelines will build
# Set to 0 to disable daily ETL (not recommended)
etlDailyStoreDurationDays: 91
# The total number of hours the ETL pipelines will build
# Set to 0 to disable hourly ETL (recommended for large environments)
# Must be < prometheus server retention, otherwise empty data may overwrite
# known-good data
etlHourlyStoreDurationHours: 49
# For deploying kubecost in a cluster that does not self-monitor
etlReadOnlyMode: false
## The name of the Secret containing a bucket config for Federated storage.
## The contents should be stored under a key named federated-store.yaml.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/long-term-storage-configuration
# federatedStorageConfigSecret: federated-store
## Federated storage config can be supplied via a secret or the yaml block
## below when using the block below, only a single provider is supported,
## others are for example purposes.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/long-term-storage-configuration
# federatedStorageConfig: |-
# # AWS EXAMPLE
# type: S3
# config:
# bucket: kubecost-federated-storage-bucket
# endpoint: s3.amazonaws.com
# region: us-east-1
# # best practice is to use pod identities to access AWS resources. Otherwise it is possible to use an access_key and secret_key
# access_key: "<your-access-key>"
# secret_key: "<your-secret-key>"
# # AZURE EXAMPLE
# type: AZURE
# config:
# storage_account: ""
# storage_account_key: ""
# container: ""
# max_retries: 0
# # GCP EXAMPLE
# type: GCS
# config:
# bucket: kubecost-federated-storage-bucket
# service_account: |-
# {
# "type": "service_account",
# "project_id": "...",
# "private_key_id": "...",
# "private_key": "...",
# "client_email": "...",
# "client_id": "...",
# "auth_uri": "https://accounts.google.com/o/oauth2/auth",
# "token_uri": "https://oauth2.googleapis.com/token",
# "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
# "client_x509_cert_url": ""
# }
# Installs Kubecost/OpenCost plugins
plugins:
enabled: false
install:
enabled: false
fullImageName: curlimages/curl:latest
securityContext:
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
folder: /opt/opencost/plugin
# leave this commented to always download most recent version of plugins
# version: <INSERT_SPECIFIC_PLUGINS_VERSION>
# the list of enabled plugins
enabledPlugins: []
# - datadog
# pre-existing secret for plugin configuration
existingCustomSecret:
enabled: false
name: "" # name of the secret containing plugin config
secretName: kubecost-plugin-secret
# uncomment this to define plugin configuration via the values file
# configs:
# datadog: |
# {
# "datadog_site": "<INSERT_DATADOG_SITE>",
# "datadog_api_key": "<INSERT_DATADOG_API_KEY>",
# "datadog_app_key": "<INSERT_DATADOG_APP_KEY>"
# }
allocation:
# Enables or disables adding node labels to allocation data (i.e. workloads).
# Defaults to "true" and starts with a sensible includeList for basics like
# topology (e.g. zone, region) and instance type labels.
# nodeLabels:
# enabled: true
# includeList: "node.kubernetes.io/instance-type,topology.kubernetes.io/region,topology.kubernetes.io/zone"
# Enables or disables the ContainerStats pipeline, used for quantile-based
# queries like for request sizing recommendations.
# ContainerStats provides support for quantile-based request right-sizing
# recommendations.
#
# It is disabled by default to avoid problems in extremely high-scale Thanos
# environments. If you would like to try quantile-based request-sizing
# recommendations, enable this! If you are in a high-scale environment,
# please monitor Kubecost logs, Thanos query logs, and Thanos load closely.
# We hope to make major improvements at scale here soon!
#
containerStatsEnabled: true # enabled by default as of v2.2.0
# max number of concurrent Prometheus queries
maxQueryConcurrency: 5
resources:
requests:
cpu: "200m"
memory: "55Mi"
# limits:
# cpu: "800m"
# memory: "256Mi"
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
livenessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
extraArgs: []
# Optional. A list of extra environment variables to be added to the cost-model container.
# extraEnv:
# - name: LOG_FORMAT
# value: json
# # When false, Kubecost will not show Asset costs for local disks physically
# # attached to nodes (e.g. ephemeral storage). This needs to be applied to
# # each cluster monitored.
# - name: ASSET_INCLUDE_LOCAL_DISK_COST
# value: "true"
utcOffset: "+00:00"
extraPorts: []
## etlUtils is a utility typically used by Enterprise customers transitioning
## from v1 to v2 of Kubecost. It translates the data from the "/etl" dir of the
## bucket, to the "/federated" dir of the bucket.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl/thanos-migration-guide
##
etlUtils:
enabled: false
fullImageName: null
resources: {}
env: {}
nodeSelector: {}
tolerations: []
## Annotations to be added to etlutils deployment
annotations: {}
affinity: {}
# Basic Kubecost ingress, more examples available at https://docs.kubecost.com/install-and-configure/install/ingress-examples
ingress:
enabled: false
# className: nginx
labels:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
annotations:
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
paths: ["/"] # There's no need to route specifically to the pods-- we have an nginx deployed that handles routing
pathType: ImplementationSpecific
hosts:
- cost-analyzer.local
tls: []
# - secretName: cost-analyzer-tls
# hosts:
# - cost-analyzer.local
nodeSelector: {}
tolerations: []
affinity: {}
topologySpreadConstraints: []
priority:
enabled: false
name: ""
extraVolumes: []
extraVolumeMounts: []
# Define persistence volume for cost-analyzer, more information at https://docs.kubecost.com/install-and-configure/install/storage
persistentVolume:
size: 32Gi
enabled: true # Note that setting this to false means configurations will be wiped out on pod restart.
# storageClass: "-" #
# existingClaim: kubecost-cost-analyzer # a claim in the same namespace as kubecost
labels: {}
annotations: {}
service:
type: ClusterIP
port: 9090
targetPort: 9090
nodePort: {}
labels: {}
annotations: {}
# loadBalancerSourceRanges: []
sessionAffinity:
enabled: false # Makes sure that connections from a client are passed to the same Pod each time, when set to `true`. You should set it when you enabled authentication through OIDC or SAML integration.
timeoutSeconds: 10800
prometheus:
## Provide a full name override for Prometheus.
# fullnameOverride: ""
## Provide a name override for Prometheus.
# nameOverride: ""
rbac:
create: true # Create the RBAC resources for Prometheus.
serviceAccounts:
alertmanager:
create: true
name:
nodeExporter:
create: true
name:
server:
create: true
name:
## Prometheus server ServiceAccount annotations.
## Can be used for AWS IRSA annotations when using Remote Write mode with Amazon Managed Prometheus.
annotations: {}
## Specify an existing ConfigMap to be used by Prometheus when using self-signed certificates.
##
# selfsignedCertConfigMapName: ""
imagePullSecrets:
extraScrapeConfigs: |
- job_name: kubecost
honor_labels: true
scrape_interval: 1m
scrape_timeout: 60s
metrics_path: /metrics
scheme: http
dns_sd_configs:
- names:
- {{ template "cost-analyzer.serviceName" . }}
type: 'A'
port: 9003
- job_name: kubecost-networking
kubernetes_sd_configs:
- role: pod
relabel_configs:
# Scrape only the the targets matching the following metadata
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance]
action: keep
regex: kubecost
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: network-costs
- job_name: kubecost-aggregator
scrape_interval: 1m
scrape_timeout: 60s
metrics_path: /metrics
scheme: http
dns_sd_configs:
- names:
- {{ template "aggregator.serviceName" . }}
type: 'A'
{{- if or .Values.saml.enabled .Values.oidc.enabled }}
port: 9008
{{- else }}
port: 9004
{{- end }}
## Enables scraping of NVIDIA GPU metrics via dcgm-exporter. Scrapes all
## endpoints which contain "dcgm-exporter" in labels "app",
## "app.kubernetes.io/component", or "app.kubernetes.io/name" with a case
## insensitive match. The label must be present on the K8s service endpoints and not just pods.
## Refs:
## https://github.com/NVIDIA/gpu-operator/blob/d4316a415bbd684ce8416a88042305fc1a093aa4/assets/state-dcgm-exporter/0600_service.yaml#L7
## https://github.com/NVIDIA/dcgm-exporter/blob/54fd1ca137c66511a87a720390613680b9bdabdd/deployment/templates/service.yaml#L23
- job_name: kubecost-dcgm-exporter
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app, __meta_kubernetes_pod_label_app_kubernetes_io_component, __meta_kubernetes_pod_label_app_kubernetes_io_name]
action: keep
regex: (?i)(.*dcgm-exporter.*|.*dcgm-exporter.*|.*dcgm-exporter.*)
server:
# If clusterIDConfigmap is defined, instead use user-generated configmap with key CLUSTER_ID
# to use as unique cluster ID in kubecost cost-analyzer deployment.
# This overrides the cluster_id set in prometheus.server.global.external_labels.
# NOTE: This does not affect the external_labels set in prometheus config.
# clusterIDConfigmap: cluster-id-configmap
## Provide a full name override for the Prometheus server.
# fullnameOverride: ""
enabled: true
name: server
sidecarContainers:
strategy:
type: Recreate
rollingUpdate: null
image:
repository: quay.io/prometheus/prometheus
tag: v2.55.1
pullPolicy: IfNotPresent
priorityClassName: ""
prefixURL: ""
baseURL: ""
env: []
extraFlags:
- web.enable-lifecycle
configPath: /etc/config/prometheus.yml
global:
scrape_interval: 1m
scrape_timeout: 60s
evaluation_interval: 1m
external_labels:
cluster_id: cluster-one # Each cluster should have a unique ID
remoteWrite: {}
remoteRead: {}
extraArgs:
query.max-concurrency: 1
query.max-samples: 100000000
extraInitContainers: []
extraVolumeMounts: []
extraVolumes: []
extraHostPathMounts: []
extraConfigmapMounts: []
extraSecretMounts: []
configMapOverrideName: ""
ingress:
enabled: false
# className: nginx
annotations: {}
extraLabels: {}
hosts: []
pathType: "Prefix"
extraPaths: []
tls: []
# strategy:
# type: Recreate
tolerations: []
nodeSelector: {}
affinity: {}
podDisruptionBudget:
enabled: false
maxUnavailable: 1
# schedulerName:
persistentVolume:
enabled: true
accessModes:
- ReadWriteOnce
annotations: {}
existingClaim: ""
mountPath: /data
size: 32Gi
# storageClass: "-"
# volumeBindingMode: ""
subPath: ""
emptyDir:
sizeLimit: ""
podAnnotations: {}
annotations: {}
podLabels: {}
alertmanagers: []
replicaCount: 1
statefulSet:
enabled: false
annotations: {}
labels: {}
podManagementPolicy: OrderedReady
headless:
annotations: {}
labels: {}
servicePort: 80
readinessProbeInitialDelay: 5
readinessProbeTimeout: 3
readinessProbeFailureThreshold: 3
readinessProbeSuccessThreshold: 1
livenessProbeInitialDelay: 5
livenessProbeTimeout: 3
livenessProbeFailureThreshold: 3
livenessProbeSuccessThreshold: 1
resources: {}
verticalAutoscaler:
enabled: false
## Optional. Defaults to "Auto" if not specified.
# updateMode: "Auto"
## Mandatory. Without, VPA will not be created.
# containerPolicies:
# - containerName: 'prometheus-server'
securityContext: {}
containerSecurityContext: {}
service:
annotations: {}
labels: {}
clusterIP: ""
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 80
sessionAffinity: None
type: ClusterIP
gRPC:
enabled: false
servicePort: 10901
statefulsetReplica:
enabled: false
replica: 0
terminationGracePeriodSeconds: 300
## Prometheus data retention period (default if not specified is 97 hours)
##
## Kubecost builds up its own persistent store of metric data on the
## filesystem (usually a PV) and, when using ETL Backup and/or Federated
## ETL, in more durable object storage like S3 or GCS. Kubecost's data
## retention is _not_ tied to the configured Prometheus retention.
##
## For data durability, we recommend using ETL Backup instead of relying on
## Prometheus retention.
##
## Lower retention values will affect Prometheus by reducing resource
## consumption and increasing stability. It _must not_ be set below or equal
## to kubecostModel.etlHourlyStoreDurationHours, otherwise empty data sets
## may overwrite good data sets. For now, it must also be >= 49h for Daily
## ETL stability.
##
## "ETL Rebuild" and "ETL Repair" is only possible on data available within
## this retention window. This is an extremely rare operation.
##
## If you want maximum security in the event of a Kubecost agent
## (cost-model) outage, increase this value. The current default of 97h is
## intended to balance Prometheus stability and resource consumption
## against the event of an outage in Kubecost which would necessitate a
## version change. 4 days should provide enough time for most users to
## notice a problem and initiate corrective action.
retention: 97h
# retentionSize: should be significantly greater than the storage used in the number of hours set in etlHourlyStoreDurationHours
# Install Prometheus Alert Manager
alertmanager:
enabled: false
## Provide a full name override for Prometheus alertmanager.
# fullnameOverride: ""
strategy:
type: Recreate
rollingUpdate: null
name: alertmanager
image:
repository: quay.io/prometheus/alertmanager
tag: v0.27.0
pullPolicy: IfNotPresent
priorityClassName: ""
extraArgs: {}
prefixURL: ""
baseURL: "http://localhost:9093"
extraEnv: {}
extraSecretMounts: []
configMapOverrideName: ""
configFromSecret: ""
configFileName: alertmanager.yml
ingress:
enabled: false
annotations: {}
extraLabels: {}
hosts: []
extraPaths: []
tls: []
# strategy:
# type: Recreate
tolerations: []
nodeSelector: {}
affinity: {}
podDisruptionBudget:
enabled: false
maxUnavailable: 1
# schedulerName:
persistentVolume:
enabled: true
accessModes:
- ReadWriteOnce
annotations: {}
existingClaim: ""
mountPath: /data
size: 2Gi
# storageClass: "-"
# volumeBindingMode: ""
subPath: ""
podAnnotations: {}
annotations: {}
podLabels: {}
replicaCount: 1
statefulSet:
enabled: false
annotations: {}
podManagementPolicy: OrderedReady
headless:
annotations: {}
labels: {}
# enableMeshPeer : true
servicePort: 80
resources: {}
securityContext:
runAsUser: 1001
runAsNonRoot: true
runAsGroup: 1001
fsGroup: 1001
service:
annotations: {}
labels: {}
clusterIP: ""
# enableMeshPeer : true
externalIPs: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 80
# nodePort: 30000
sessionAffinity: None
type: ClusterIP
alertmanagerFiles:
alertmanager.yml:
global: {}
receivers:
- name: default-receiver
route:
group_wait: 10s
group_interval: 5m
receiver: default-receiver
repeat_interval: 3h
## Monitors ConfigMap changes and POSTs to a URL
configmapReload:
prometheus:
enabled: false
name: configmap-reload
image:
repository: quay.io/prometheus-operator/prometheus-config-reloader
tag: v0.78.2
pullPolicy: IfNotPresent
extraArgs: {}
extraVolumeDirs: []
extraConfigmapMounts: []
resources: {}
containerSecurityContext: {}
alertmanager:
enabled: false
name: configmap-reload
image:
repository: quay.io/prometheus-operator/prometheus-config-reloader
tag: v0.78.2
pullPolicy: IfNotPresent
extraArgs: {}
extraVolumeDirs: []
extraConfigmapMounts: []
resources: {}
nodeExporter:
## If false, node-exporter will not be installed.
## This is disabled by default in Kubecost 2.0, though it can be enabled as needed.
##
enabled: false
## Provide a full name override for node exporter.
# fullnameOverride: ""
hostNetwork: true
hostPID: true
dnsPolicy: ClusterFirstWithHostNet
name: node-exporter
image:
repository: prom/node-exporter
tag: v1.8.2
pullPolicy: IfNotPresent
priorityClassName: ""
updateStrategy:
type: RollingUpdate
extraArgs: {}
extraHostPathMounts: []
extraConfigmapMounts: []
# affinity:
tolerations: []
nodeSelector: {}
podAnnotations: {}
annotations: {}
pod:
labels: {}
podDisruptionBudget:
enabled: false
maxUnavailable: 1
resources: {}
securityContext: {}
service:
annotations:
prometheus.io/scrape: "true"
labels: {}
clusterIP: None
externalIPs: []
hostPort: 9100
loadBalancerIP: ""
loadBalancerSourceRanges: []
servicePort: 9100
type: ClusterIP
serverFiles:
## Alerts configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/
alerting_rules.yml: {}
## Records configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
recording_rules.yml: {}
prometheus.yml:
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
- job_name: 'kubernetes-nodes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# This configuration will work only on kubelet 1.7.3+
# As the scrape endpoints for cAdvisor have changed
# if you are using older version you need to change the replacement to
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
# more info here https://github.com/coreos/prometheus-operator/issues/633
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
metric_relabel_configs:
- source_labels: [__name__]
regex: (container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_network_receive_errors_total|container_network_transmit_errors_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_memory_usage_bytes|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_periods_total|container_fs_inodes_free|container_fs_inodes_total|container_fs_usage_bytes|container_fs_limit_bytes|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_fs_inodes_free|container_fs_inodes_total|container_fs_usage_bytes|container_fs_limit_bytes|container_spec_cpu_shares|container_spec_memory_limit_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_fs_reads_bytes_total|container_network_receive_bytes_total|container_fs_writes_bytes_total|container_fs_reads_bytes_total|cadvisor_version_info|kubecost_pv_info)
action: keep
- source_labels: [container]
target_label: container_name
regex: (.+)
action: replace
- source_labels: [pod]
target_label: pod_name
regex: (.+)
action: replace
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics
metric_relabel_configs:
- source_labels: [__name__]
regex: (kubelet_volume_stats_used_bytes) # this metric is in alpha
action: keep
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_endpoints_name]
action: keep
regex: (.*node-exporter|kubecost-network-costs)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: kubernetes_node
metric_relabel_configs:
- source_labels: [__name__]
regex: (container_cpu_allocation|container_cpu_usage_seconds_total|container_fs_limit_bytes|container_fs_writes_bytes_total|container_gpu_allocation|container_memory_allocation_bytes|container_memory_usage_bytes|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_transmit_bytes_total|DCGM_FI_DEV_GPU_UTIL|deployment_match_labels|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_number_ready|kube_deployment_spec_replicas|kube_deployment_status_replicas|kube_deployment_status_replicas_available|kube_job_status_failed|kube_namespace_annotations|kube_namespace_labels|kube_node_info|kube_node_labels|kube_node_status_allocatable|kube_node_status_allocatable_cpu_cores|kube_node_status_allocatable_memory_bytes|kube_node_status_capacity|kube_node_status_capacity_cpu_cores|kube_node_status_capacity_memory_bytes|kube_node_status_condition|kube_persistentvolume_capacity_bytes|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_limits_cpu_cores|kube_pod_container_resource_limits_memory_bytes|kube_pod_container_resource_requests|kube_pod_container_resource_requests_cpu_cores|kube_pod_container_resource_requests_memory_bytes|kube_pod_container_status_restarts_total|kube_pod_container_status_running|kube_pod_container_status_terminated_reason|kube_pod_labels|kube_pod_owner|kube_pod_status_phase|kube_replicaset_owner|kube_statefulset_replicas|kube_statefulset_status_replicas|kubecost_cluster_info|kubecost_cluster_management_cost|kubecost_cluster_memory_working_set_bytes|kubecost_load_balancer_cost|kubecost_network_internet_egress_cost|kubecost_network_region_egress_cost|kubecost_network_zone_egress_cost|kubecost_node_is_spot|kubecost_pod_network_egress_bytes_total|node_cpu_hourly_cost|node_cpu_seconds_total|node_disk_reads_completed|node_disk_reads_completed_total|node_disk_writes_completed|node_disk_writes_completed_total|node_filesystem_device_error|node_gpu_count|node_gpu_hourly_cost|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_network_transmit_bytes_total|node_ram_hourly_cost|node_total_hourly_cost|pod_pvc_allocation|pv_hourly_cost|service_selector_labels|statefulSet_match_labels|kubecost_pv_info|up)
action: keep
rules:
groups:
- name: CPU
rules:
- expr: sum(rate(container_cpu_usage_seconds_total{container!=""}[5m]))
record: cluster:cpu_usage:rate5m
- expr: rate(container_cpu_usage_seconds_total{container!=""}[5m])
record: cluster:cpu_usage_nosum:rate5m
- expr: avg(irate(container_cpu_usage_seconds_total{container!="POD", container!=""}[5m])) by (container,pod,namespace)
record: kubecost_container_cpu_usage_irate
- expr: sum(container_memory_working_set_bytes{container!="POD",container!=""}) by (container,pod,namespace)
record: kubecost_container_memory_working_set_bytes
- expr: sum(container_memory_working_set_bytes{container!="POD",container!=""})
record: kubecost_cluster_memory_working_set_bytes
- name: Savings
rules:
- expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod))
record: kubecost_savings_cpu_allocation
labels:
daemonset: "false"
- expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod)) / sum(kube_node_info)
record: kubecost_savings_cpu_allocation
labels:
daemonset: "true"
- expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod))
record: kubecost_savings_memory_allocation_bytes
labels:
daemonset: "false"
- expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod)) / sum(kube_node_info)
record: kubecost_savings_memory_allocation_bytes
labels:
daemonset: "true"
# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager
# useful in H/A prometheus with different external labels but the same alerts
alertRelabelConfigs:
# alert_relabel_configs:
# - source_labels: [dc]
# regex: (.+)\d+
# target_label: dc
networkPolicy:
enabled: false
## Optional daemonset to more accurately attribute network costs to the correct workload
## https://docs.kubecost.com/install-and-configure/advanced-configuration/network-costs-configuration
networkCosts:
enabled: false
image:
repository: gcr.io/kubecost1/kubecost-network-costs
tag: v0.17.6
imagePullPolicy: IfNotPresent
updateStrategy:
type: RollingUpdate
# For existing Prometheus Installs, use the serviceMonitor: or prometheusScrape below.
# the below setting annotates the networkCost service endpoints for each of the network-costs pods.
# The Service is annotated with prometheus.io/scrape: "true" to automatically get picked up by the prometheus config.
# NOTE: Setting this option to true and leaving the above extraScrapeConfig "job_name: kubecost-networking" configured will cause the
# NOTE: pods to be scraped twice.
prometheusScrape: false
# Traffic Logging will enable logging the top 5 destinations for each source
# every 30 minutes.
trafficLogging: true
# Log level for the network cost containers. Options are "trace", "debug", "info", "warn", "error", "fatal", "panic"
logLevel: info
# Port will set both the containerPort and hostPort to this value.
# These must be identical due to network-costs being run on hostNetwork
port: 3001
# this daemonset can use significant resources on large clusters: https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/cost-allocation/network-allocation
resources:
limits: # remove the limits by setting cpu: null
cpu: 500m # can be less, will depend on cluster size
# memory: it is not recommended to set a memory limit
requests:
cpu: 50m
memory: 20Mi
extraArgs: []
config:
# Configuration for traffic destinations, including specific classification
# for IPs and CIDR blocks. This configuration will act as an override to the
# automatic classification provided by network-costs.
destinations:
# In Zone contains a list of address/range that will be
# classified as in zone.
in-zone:
# Loopback Addresses in "IANA IPv4 Special-Purpose Address Registry"
- "127.0.0.0/8"
# IPv4 Link Local Address Space
- "169.254.0.0/16"
# Private Address Ranges in RFC-1918
- "10.0.0.0/8" # Remove this entry if using Multi-AZ Kubernetes
- "172.16.0.0/12"
- "192.168.0.0/16"
# In Region contains a list of address/range that will be
# classified as in region. This is synonymous with cross
# zone traffic, where the regions between source and destinations
# are the same, but the zone is different.
in-region: []
# Cross Region contains a list of address/range that will be
# classified as non-internet egress from one region to another.
cross-region: []
# Internet contains a list of address/range that will be
# classified as internet traffic. This is synonymous with traffic
# that cannot be classified within the cluster.
# NOTE: Internet classification filters are executed _after_
# NOTE: direct-classification, but before in-zone, in-region,
# NOTE: and cross-region.
internet: []
# Direct Classification specifically maps an ip address or range
# to a region (required) and/or zone (optional). This classification
# takes priority over in-zone, in-region, and cross-region configurations.
direct-classification: []
# - region: "us-east1"
# zone: "us-east1-c"
# ips:
# - "10.0.0.0/24"
services:
# google-cloud-services: when set to true, enables labeling traffic metrics with google cloud
# service endpoints
google-cloud-services: true
# amazon-web-services: when set to true, enables labeling traffic metrics with amazon web service
# endpoints.
amazon-web-services: true
# azure-cloud-services: when set to true, enables labeling traffic metrics with azure cloud service
# endpoints
azure-cloud-services: true
# user defined services provide a way to define custom service endpoints which will label traffic metrics
# falling within the defined address range.
# services:
# - service: "test-service-1"
# ips:
# - "19.1.1.2"
# - service: "test-service-2"
# ips:
# - "15.128.15.2"
# - "20.0.0.0/8"
tolerations: []
affinity: {}
service:
annotations: {}
labels: {}
priorityClassName: ""
podMonitor:
enabled: false
additionalLabels: {}
additionalLabels: {}
nodeSelector: {}
# Annotations to be added to network cost daemonset template and pod template annotations
annotations: {}
healthCheckProbes: {}
additionalSecurityContext: {}
## Kubecost Deployment Configuration
## Used for HA mode in Business & Enterprise tier
##
kubecostDeployment:
replicas: 1
labels: {}
annotations: {}
## Kubecost Forecasting forecasts future cost patterns based on historical
## patterns observed by Kubecost.
forecasting:
enabled: true
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for the forecasting
# container.
fullImageName: gcr.io/kubecost1/kubecost-modeling:v0.1.19
imagePullPolicy: IfNotPresent
# Resource specification block for the forecasting container.
resources:
requests:
cpu: 200m
memory: 300Mi
limits:
cpu: 1500m
memory: 1Gi
# Set environment variables for the forecasting container as key/value pairs.
env:
# -t is the worker timeout which primarily affects model training time;
# if it is not high enough, training workers may die mid training
"GUNICORN_CMD_ARGS": "--log-level info -t 1200"
priority:
enabled: false
name: ""
nodeSelector: {}
tolerations: []
annotations: {}
affinity: {}
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
livenessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## The Kubecost Aggregator is the primary query backend for Kubecost
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl/aggregator
##
kubecostAggregator:
# deployMethod determines how Aggregator is deployed. Current options are
# "singlepod" (within cost-analyzer Pod) "statefulset" (separate
# StatefulSet), and "disabled". Only use "disabled" if this is a secondary
# Federated ETL cluster which does not need to answer queries.
deployMethod: singlepod
# fullImageName overrides the default image construction logic. The exact
# image provided (registry, image, tag) will be used for aggregator.
# fullImageName:
imagePullPolicy: IfNotPresent
# For legacy configuration support, `enabled: true` overrides deployMethod
# and causes `deployMethod: "statefulset"`
enabled: false
# Replicas sets the number of Aggregator replicas. It only has an effect if
# `deployMethod: "statefulset"`
replicas: 1
# Log level for the aggregator container. Options are "trace", "debug", "info", "warn", "error", "fatal", "panic"
logLevel: info
# stagingEmptyDirSizeLimit changes how large the "staging"
# /var/configs/waterfowl emptyDir is. It only takes effect in StatefulSet
# configurations of Aggregator, other configurations are unaffected.
#
# It should be set to approximately 8x the size of the largest bingen file in
# object storage. For example, if your largest bingen file is a daily
# Allocation file with size 300MiB, this value should be set to approximately
# 2400Mi. In most environments, the default should suffice.
stagingEmptyDirSizeLimit: 2Gi
# this is the number of partitions the datastore is split into for copying
# the higher this number, the lower the ram usage but the longer it takes for
# new data to show in the kubecost UI
# set to 0 for max partitioning (minimum possible ram usage, but the slowest)
# the default of 25 is sufficient for 95%+ of users. This should only be modified
# after consulting with Kubecost's support team
numDBCopyPartitions: 25
# How many threads the read database is configured with (i.e. Kubecost API /
# UI queries). If increasing this value, it is recommended to increase the
# aggregator's memory requests & limits.
# default: 1
dbReadThreads: 1
# How many threads the write database is configured with (i.e. ingestion of
# new data from S3). If increasing this value, it is recommended to increase
# the aggregator's memory requests & limits.
# default: 1
dbWriteThreads: 1
# How many threads to use when ingesting Asset/Allocation/CloudCost data
# from the federated store bucket. In most cases the default is sufficient,
# but can be increased if trying to backfill historical data.
# default: 1
dbConcurrentIngestionCount: 1
# Memory limit applied to read database and write database connections. The
# default of "no limit" is appropriate when first establishing a baseline of
# resource usage required. It is eventually recommended to set these values
# such that dbMemoryLimit + dbWriteMemoryLimit < the total memory available
# to the aggregator pod.
# default: 0GB is no limit
dbMemoryLimit: 0GB
dbWriteMemoryLimit: 0GB
# How much data to ingest from the federated store bucket, and how much data
# to keep in the DB before rolling the data off.
#
# Note: If increasing this value to backfill historical data, it will take
# time to gradually ingest and process those historical ETL files. Consider
# also increasing the resources available to the aggregator as well as the
# refresh and concurrency env vars.
#
# default: 91
etlDailyStoreDurationDays: 91
# How much hourly data to ingest from the federated store bucket, and how much
# to keep in the DB before rolling the data off.
#
# In high scale environments setting this to `0` can improve performance if hourly
# resolution is not a requirement.
#
# default: 49
etlHourlyStoreDurationHours: 49
# How much container resource usage data to retain in the DB, in terms of days.
#
# In high scale environments setting this to `0` can improve performance if hourly
# resolution is not a requirement.
#
# default: 1
containerResourceUsageRetentionDays: 1
# Trim memory on close, only change if advised by Kubecost support.
dbTrimMemoryOnClose: true
persistentConfigsStorage:
storageClass: "" # default storage class
storageRequest: 1Gi
aggregatorDbStorage:
storageClass: "" # default storage class
storageRequest: 128Gi
resources: {}
# requests:
# cpu: 1000m
# memory: 1Gi
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## Set additional environment variables for the aggregator pod
# extraEnv:
# - name: SOME_VARIABLE
# value: "some_value"
## Add a priority class to the aggregator pod
# priority:
# enabled: false
# name: ""
## Optional - add extra ports to the aggregator container. For kubecost development purposes only - not recommended for users.
# extraPorts: []
# - name: debug
# port: 40000
# targetPort: 40000
# containerPort: 40000
## Define a securityContext for the aggregator pod. This will take highest precedence.
# securityContext: {}
## Define the container-level security context for the aggregator pod. This will take highest precedence.
# containerSecurityContext: {}
## Provide a Service Account name for aggregator.
# serviceAccountName: ""
## Define a nodeSelector for the aggregator pod
# nodeSelector: {}
## Define tolerations for the aggregator pod
# tolerations: []
## Annotations to be added for aggregator deployment or statefulset
# annotations: {}
## Define Pod affinity for the aggregator pod
# affinity: {}
## Define extra volumes for the aggregator pod
# extraVolumes: []
## Define extra volumemounts for the aggregator pod
# extraVolumeMounts: []
## Creates a new container/pod to retrieve CloudCost data. By default it uses
## the same serviceaccount as the cost-analyzer pod. A custom serviceaccount
## can be specified.
cloudCost:
# The cloudCost component of Aggregator depends on
# kubecostAggregator.deployMethod:
# kA.dM = "singlepod" -> cloudCost is run as container inside cost-analyzer
# kA.dM = "statefulset" -> cloudCost is run as single-replica Deployment
resources: {}
# requests:
# cpu: 1000m
# memory: 1Gi
# refreshRateHours:
# queryWindowDays:
# runWindowDays:
# serviceAccountName:
readinessProbe:
enabled: true
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 200
## Add a nodeSelector for aggregator cloud costs
# nodeSelector: {}
## Tolerations for the aggregator cloud costs
# tolerations: []
## Affinity for the aggregator cloud costs
# affinity: {}
## ServiceAccount for the aggregator cloud costs
# serviceAccountName: ""
## Define environment variables for cloud cost
# env: {}
## Define extra volumes for the cloud cost pod
# extraVolumes: []
## Define extra volumemounts for the cloud cost pod
# extraVolumeMounts: []
## Configure the Collections service for aggregator.
# collections:
# cache:
# enabled: false
# Jaeger is an optional container attached to wherever the Aggregator
# container is running. It is used for performance investigation. Enable if
# Kubecost Support asks.
jaeger:
enabled: false
image: jaegertracing/all-in-one
imageVersion: latest
service:
labels: {}
## Kubecost Multi-cluster Diagnostics (beta)
## A single view into the health of all agent clusters. Each agent cluster sends
## its diagnostic data to a storage bucket. Future versions may include
## repairing & alerting from the primary.
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/multi-cluster-diagnostics
##
diagnostics:
enabled: true
## The primary aggregates all diagnostic data and handles API requests. It's
## also responsible for deleting diagnostic data (on disk & bucket) beyond
## retention. When in readonly mode it does not push its own diagnostic data
## to the bucket.
primary:
enabled: false
retention: "7d"
readonly: false
## How frequently to run & push diagnostics. Defaults to 5 minutes.
pollingInterval: "300s"
## Creates a new Diagnostic file in the bucket for every run.
keepDiagnosticHistory: false
## Pushes the cluster's Kubecost Helm Values to the bucket once upon startup.
## This may contain sensitive information and is roughly 30kb per cluster.
collectHelmValues: false
## By default, the Multi-cluster Diagnostics service runs within the
## cost-model container in the cost-analyzer pod. For higher availability, it
## can be run as a separate deployment.
deployment:
enabled: false
resources:
requests:
cpu: "10m"
memory: "20Mi"
env: {}
labels: {}
securityContext: {}
containerSecurityContext: {}
nodeSelector: {}
tolerations: []
## Annotations to be added for diagnostics Deployment.
annotations: {}
affinity: {}
## Provide a full name override for the diagnostics Deployment.
# diagnosticsFullnameOverride: ""
# Kubecost Cluster Controller for Right Sizing and Cluster Turndown
clusterController:
enabled: false
image:
repository: gcr.io/kubecost1/cluster-controller
tag: v0.16.11
imagePullPolicy: IfNotPresent
priorityClassName: ""
tolerations: []
## Annotations to be added for cluster controller template
annotations: {}
resources: {}
affinity: {}
nodeSelector: {}
actionConfigs:
# this configures the Kubecost Cluster Turndown action
# for more details, see documentation at https://github.com/kubecost/cluster-turndown/tree/develop?tab=readme-ov-file#setting-a-turndown-schedule
clusterTurndown: []
# - name: my-schedule
# start: "2024-02-09T00:00:00Z"
# end: "2024-02-09T12:00:00Z"
# repeat: daily
# - name: my-schedule2
# start: "2024-02-09T00:00:00Z"
# end: "2024-02-09T01:00:00Z"
# repeat: weekly
# this configures the Kubecost Namespace Turndown action
# for more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#namespace-turndown
namespaceTurndown:
# - name: my-ns-turndown-action
# dryRun: false
# schedule: "0 0 * * *"
# type: Scheduled
# targetObjs:
# - namespace
# keepPatterns:
# - ignorednamespace
# keepLabels:
# turndown: ignore
# params:
# minNamespaceAge: 4h
# this configures the Kubecost Cluster Sizing action
# for more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#cluster-sizing
clusterRightsize:
# startTime: '2024-01-02T15:04:05Z'
# frequencyMinutes: 1440
# lastCompleted: ''
# recommendationParams:
# window: 48h
# architecture: ''
# targetUtilization: 0.8
# minNodeCount: 1
# allowSharedCore: false
# allowCostIncrease: false
# recommendationType: ''
# This configures the Kubecost Continuous Request Sizing Action
#
# Using this configuration overrides annotation-based configuration of
# Continuous Request Sizing. Annotation configuration will be ignored while
# this configuration method is present in the cluster.
#
# For more details, see documentation at https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/savings/savings-actions#automated-request-sizing
containerRightsize:
# Workloads can be selected by an _exact_ key (namespace, controllerKind,
# controllerName). This will only match a single controller. The cluster
# ID is current irrelevant because Cluster Controller can only modify
# workloads within the cluster it is running in.
# workloads:
# - clusterID: cluster-one
# namespace: my-namespace
# controllerKind: deployment
# controllerName: my-controller
# An alternative to exact key selection is filter selection. The filters
# are syntactically identical to Kubecost's "v2" filters [1] but only
# support a small set of filter fields, those being:
# - namespace
# - controllerKind
# - controllerName
# - label
# - annotation
#
# If multiple filters are listed, they will be ORed together at the top
# level.
#
# See the examples below.
#
# [1] https://docs.kubecost.com/apis/filters-api
# filterConfig:
# - filter: |
# namespace:"abc"+controllerKind:"deployment"
# - filter: |
# controllerName:"abc123"+controllerKind:"daemonset"
# - filter: |
# namespace:"foo"+controllerKind!:"statefulset"
# - filter: |
# namespace:"bar","baz"
# schedule:
# start: "2024-01-30T15:04:05Z"
# frequencyMinutes: 5
# recommendationQueryWindow: "48h"
# lastModified: ''
# targetUtilizationCPU: 0.8 # results in a cpu request setting that is 20% higher than the max seen over last 48h
# targetUtilizationMemory: 0.8 # results in a RAM request setting that is 20% higher than the max seen over last 48h
kubescaler:
# If true, will cause all (supported) workloads to be have their requests
# automatically right-sized on a regular basis.
defaultResizeAll: false
# fqdn: kubecost-cluster-controller.kubecost.svc.cluster.local:9731
namespaceTurndown:
rbac:
enabled: true
reporting:
# Kubecost bug report feature: Logs access/collection limited to .Release.Namespace
# Ref: http://docs.kubecost.com/bug-report
logCollection: true
# Basic frontend analytics
productAnalytics: true
# Report Javascript errors
errorReporting: true
valuesReporting: true
# googleAnalyticsTag allows you to embed your Google Global Site Tag to track usage of Kubecost.
# googleAnalyticsTag is only included in our Enterprise offering.
# googleAnalyticsTag: G-XXXXXXXXX
serviceMonitor: # the kubecost included prometheus uses scrapeConfigs and does not support service monitors. The following options assume an existing prometheus that supports serviceMonitors.
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings: []
networkCosts:
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings: []
aggregatorMetrics:
enabled: false
interval: 1m
scrapeTimeout: 10s
additionalLabels: {}
metricRelabelings: []
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_namespace
targetLabel: namespace
prometheusRule:
enabled: false
additionalLabels: {}
supportNFS: false
# initChownDataImage ensures all Kubecost filepath permissions on PV or local storage are set up correctly.
initChownDataImage: "busybox" # Supports a fully qualified Docker image, e.g. registry.hub.docker.com/library/busybox:latest
initChownData:
resources: {}
## Kubecost's Bundled Grafana
## You can access it by visiting http://kubecost.me.com/grafana/
## Ref: https://docs.kubecost.com/install-and-configure/advanced-configuration/custom-grafana
grafana:
# namespace_datasources: kubecost # override the default namespace here
# namespace_dashboards: kubecost # override the default namespace here
rbac:
create: true
serviceAccount:
create: true
name: ""
## Provide a full name override for the Grafana Deployment.
# fullnameOverride: ""
## Provide a name override for the Grafana Deployment.
# nameOverride: ""
## Configure grafana datasources
## ref: http://docs.grafana.org/administration/provisioning/#datasources
##
# datasources:
# datasources.yaml:
# apiVersion: 1
# datasources:
# - name: prometheus-kubecost
# type: prometheus
# url: http://kubecost-prometheus-server.kubecost.svc.cluster.local
# access: proxy
# isDefault: false
# jsonData:
# httpMethod: POST
# prometheusType: Prometheus
# prometheusVersion: 2.35.0
# timeInterval: 1m
replicas: 1
deploymentStrategy: RollingUpdate
readinessProbe:
httpGet:
path: /api/health
port: 3000
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 60
timeoutSeconds: 30
failureThreshold: 10
image:
repository: grafana/grafana
tag: 11.3.1
pullPolicy: IfNotPresent
# pullSecrets:
securityContext: {}
priorityClassName: ""
## Container image settings for Grafana initContainer used to download dashboards. Will only be used when dashboards are present.
downloadDashboardsImage:
repository: curlimages/curl
tag: latest
pullPolicy: IfNotPresent
podAnnotations: {}
annotations: {}
service:
type: ClusterIP
port: 80
annotations: {}
labels: {}
resources: {}
nodeSelector: {}
tolerations: []
affinity: {}
persistence:
enabled: false
# storageClassName: default
# accessModes:
# - ReadWriteOnce
# size: 10Gi
# annotations: {}
# subPath: ""
# existingClaim:
adminUser: admin
adminPassword: strongpassword
# schedulerName:
env: {}
envFromSecret: ""
extraSecretMounts: []
plugins: []
dashboardProviders: {}
dashboards: {}
dashboardsConfigMaps: {}
## Grafana sidecars that collect the configmaps with specified label and stores the included files them into the respective folders
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image:
repository: ghcr.io/kiwigrid/k8s-sidecar
tag: 1.28.1
pullPolicy: IfNotPresent
resources: {}
dashboards:
enabled: true
# label that the configmaps with dashboards are marked with
label: grafana_dashboard
labelValue: "1"
# set sidecar ERROR_THROTTLE_SLEEP env var from default 5s to 0s -> fixes https://github.com/kubecost/cost-analyzer-helm-chart/issues/877
annotations: {}
error_throttle_sleep: 0
folder: /tmp/dashboards
datasources:
# dataSourceFilename: foo.yml # If you need to change the name of the datasource file
enabled: false
error_throttle_sleep: 0
# label that the configmaps with datasources are marked with
label: grafana_datasource
## Grafana's primary configuration
## NOTE: values in map will be converted to ini format
## ref: http://docs.grafana.org/installation/configuration/
##
## For grafana to be accessible, add the path to root_url. For example, if you run kubecost at www.foo.com:9090/kubecost
## set root_url to "%(protocol)s://%(domain)s:%(http_port)s/kubecost/grafana". No change is necessary here if kubecost runs at a root URL
grafana.ini:
server:
serve_from_sub_path: false # Set to false on Grafana v10+
root_url: "%(protocol)s://%(domain)s:%(http_port)s/grafana"
paths:
data: /var/lib/grafana/data
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
analytics:
check_for_updates: true
log:
mode: console
grafana_net:
url: https://grafana.net
auth.anonymous:
enabled: true
org_role: Editor
org_name: Main Org.
serviceAccount:
create: true # Set this to false if you're bringing your own service account.
annotations: {}
awsstore:
useAwsStore: false
imageNameAndVersion: gcr.io/kubecost1/awsstore:latest
createServiceAccount: false
priorityClassName: ""
nodeSelector: {}
annotations: {}
## Federated ETL Architecture
## Ref: https://docs.kubecost.com/install-and-configure/install/multi-cluster/federated-etl
##
federatedETL:
## If true, installs the minimal set of components required for a Federated ETL cluster.
agentOnly: false
## If true, push ETL data to the federated storage bucket
federatedCluster: false
## If true, this cluster will be able to read from the federated-store but will
## not write to it. This is useful in situations when you want to deploy a
## primary cluster, but don't want the primary cluster's ETL data to be
## pushed to the bucket
readOnlyPrimary: false
## If true, changes the dir of S3 backup to the Federated combined store.
## Commonly used when transitioning from Thanos to Federated ETL architecture.
redirectS3Backup: false
## If true, will query metrics from a central PromQL DB (e.g. Amazon Managed
## Prometheus)
useMultiClusterDB: false
## Kubecost Admission Controller (beta feature)
## To use this feature, ensure you have run the `create-admission-controller.sh`
## script. This generates a k8s secret with TLS keys/certificats and a
## corresponding CA bundle.
##
kubecostAdmissionController:
enabled: false
secretName: webhook-server-tls
caBundle: ${CA_BUNDLE}
# Enables or disables the Cost Event Audit pipeline, which tracks recent changes at cluster level
# and provides an estimated cost impact via the Kubecost Predict API.
#
# It is disabled by default to avoid problems in high-scale environments.
costEventsAudit:
enabled: false
## Disable updates to kubecost from the frontend UI and via POST request
## This feature is considered beta, entrprise users should use teams:
## https://docs.kubecost.com/using-kubecost/navigating-the-kubecost-ui/teams
# readonly: false
# # These configs can also be set from the Settings page in the Kubecost product
# # UI. Values in this block override config changes in the Settings UI on pod
# # restart
# kubecostProductConfigs:
# # An optional list of cluster definitions that can be added for frontend
# # access. The local cluster is *always* included by default, so this list is
# # for non-local clusters.
# clusters:
# - name: "Cluster A"
# address: http://cluster-a.kubecost.com:9090
# # Optional authentication credentials - only basic auth is currently supported.
# auth:
# type: basic
# # Secret name should be a secret formatted based on: https://github.com/kubecost/poc-common-configurations/tree/main/ingress-examples
# secretName: cluster-a-auth
# # Or pass auth directly as base64 encoded user:pass
# data: YWRtaW46YWRtaW4=
# # Or user and pass directly
# user: admin
# pass: admin
# - name: "Cluster B"
# address: http://cluster-b.kubecost.com:9090
# # Enabling customPricesEnabled and defaultModelPricing instructs Kubecost to
# # use these custom monthly resource prices when reporting node costs. Note,
# # that the below configuration is for the monthly cost of the resource.
# # Kubecost considers there to be 730 hours in a month. Also note, that these
# # configurations will have no effect on metrics emitted such as
# # `node_ram_hourly_cost` or `node_cpu_hourly_cost`.
# # Ref: https://docs.kubecost.com/install-and-configure/install/provider-installations/air-gapped
# customPricesEnabled: false
# defaultModelPricing:
# enabled: true
# CPU: "28.0"
# spotCPU: "4.86"
# RAM: "3.09"
# spotRAM: "0.65"
# GPU: "693.50"
# spotGPU: "225.0"
# storage: "0.04"
# zoneNetworkEgress: "0.01"
# regionNetworkEgress: "0.01"
# internetNetworkEgress: "0.12"
# # The cluster profile represents a predefined set of parameters to use when calculating savings.
# # Possible values are: [ development, production, high-availability ]
# clusterProfile: production
# spotLabel: lifecycle
# spotLabelValue: Ec2Spot
# gpuLabel: gpu
# gpuLabelValue: true
# alibabaServiceKeyName: ""
# alibabaServiceKeyPassword: ""
# awsServiceKeyName: ""
# awsServiceKeyPassword: ""
# awsSpotDataRegion: us-east-1
# awsSpotDataBucket: spot-data-feed-s3-bucket
# awsSpotDataPrefix: dev
# athenaProjectID: "530337586277" # The AWS AccountID where the Athena CUR is. Generally your masterpayer account
# athenaBucketName: "s3://aws-athena-query-results-530337586277-us-east-1"
# athenaRegion: us-east-1
# athenaDatabase: athenacurcfn_athena_test1
# athenaTable: "athena_test1"
# athenaWorkgroup: "primary" # The default workgroup in AWS is 'primary'
# masterPayerARN: ""
# projectID: "123456789" # Also known as AccountID on AWS -- the current account/project that this instance of Kubecost is deployed on.
# gcpSecretName: gcp-secret # Name of a secret representing the gcp service key
# gcpSecretKeyName: compute-viewer-kubecost-key.json # Name of the secret's key containing the gcp service key
# bigQueryBillingDataDataset: billing_data.gcp_billing_export_v1_01AC9F_74CF1D_5565A2
# labelMappingConfigs: # names of k8s labels or annotations used to designate different allocation concepts
# enabled: true
# owner_label: "owner"
# team_label: "team"
# department_label: "dept"
# product_label: "product"
# environment_label: "env"
# namespace_external_label: "kubernetes_namespace" # external labels/tags are used to map external cloud costs to kubernetes concepts
# cluster_external_label: "kubernetes_cluster"
# controller_external_label: "kubernetes_controller"
# product_external_label: "kubernetes_label_app"
# service_external_label: "kubernetes_service"
# deployment_external_label: "kubernetes_deployment"
# owner_external_label: "kubernetes_label_owner"
# team_external_label: "kubernetes_label_team"
# environment_external_label: "kubernetes_label_env"
# department_external_label: "kubernetes_label_department"
# statefulset_external_label: "kubernetes_statefulset"
# daemonset_external_label: "kubernetes_daemonset"
# pod_external_label: "kubernetes_pod"
# grafanaURL: ""
# # Provide a mapping from Account ID to a readable Account Name in a key/value object. Provide Account IDs as they are displayed in CloudCost
# # as the 'key' and the Account Name associated with it as the 'value'
# cloudAccountMapping:
# EXAMPLE_ACCOUNT_ID: EXAMPLE_ACCOUNT_NAME
# clusterName: "" # clusterName is the default context name in settings.
# clusterAccountID: "" # Manually set Account property for assets
# currencyCode: "USD" # official support for USD, AUD, BRL, CAD, CHF, CNY, DKK, EUR, GBP, IDR, INR, JPY, NOK, PLN, SEK
# azureBillingRegion: US # Represents 2-letter region code, e.g. West Europe = NL, Canada = CA. ref: https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
# azureSubscriptionID: 0bd50fdf-c923-4e1e-850c-196dd3dcc5d3
# azureClientID: f2ef6f7d-71fb-47c8-b766-8d63a19db017
# azureTenantID: 72faf3ff-7a3f-4597-b0d9-7b0b201bb23a
# azureClientPassword: fake key # Only use if your values.yaml are stored encrypted. Otherwise provide an existing secret via serviceKeySecretName
# azureOfferDurableID: "MS-AZR-0003p"
# discount: "" # percentage discount applied to compute
# negotiatedDiscount: "" # custom negotiated cloud provider discount
# standardDiscount: "" # custom negotiated cloud provider discount, applied to all incoming asset compute costs in a federated environment. Overrides negotiatedDiscount on any cluster in the federated environment.
# defaultIdle: false
# serviceKeySecretName: "" # Use an existing AWS or Azure secret with format as in aws-service-key-secret.yaml or azure-service-key-secret.yaml. Leave blank if using createServiceKeySecret
# createServiceKeySecret: true # Creates a secret representing your cloud service key based on data in values.yaml. If you are storing unencrypted values, add a secret manually
# sharedNamespaces: "" # namespaces with shared workloads, example value: "kube-system\,ingress-nginx\,kubecost\,monitoring"
# sharedOverhead: "" # value representing a fixed external cost per month to be distributed among aggregations.
# shareTenancyCosts: true # enable or disable sharing costs such as cluster management fees (defaults to "true" on Settings page)
# metricsConfigs: # configuration for metrics emitted by Kubecost
# disabledMetrics: [] # list of metrics that Kubecost will not emit. Note that disabling metrics can lead to unexpected behavior in the cost-model.
# productKey: # Apply enterprise product license
# enabled: false
# key: ""
# secretname: productkeysecret # Reference an existing k8s secret created from a file named productkey.json of format { "key": "enterprise-key-here" }. If the secretname is specified, a configmap with the key will not be created.
# mountPath: "/some/custom/path/productkey.json" # (use instead of secretname) Declare the path at which the product key file is mounted (eg. by a secrets provisioner). The file must be of format { "key": "enterprise-key-here" }.
# # The following block enables the use of a custom SMTP server which overrides Kubecost's built-in, external SMTP server for alerts and reports
# smtp:
# config: |
# {
# "sender_email": "",
# "host": "",
# "port": 587,
# "authentication": true,
# "username": "",
# "password": "",
# "secure": true
# }
# secretname: smtpconfigsecret # Reference an existing k8s secret created from a file named smtp.json of format specified by config above. If the secretname is specified, a configmap with the key will not be created.
# mountPath: "/some/custom/path/smtp.json" # (use instead of secretname) Declare the path at which the SMTP config file is mounted (eg. by a secrets provisioner). The file must be of format specified by config above.
# carbonEstimates: false # Enables Kubecost beta carbon estimation endpoints /assets/carbon and /allocations/carbon
# The below options to hide UI elements are only supported in Enterprise
# hideDiagnostics: false # useful if the primary is not monitored. Supported in limited environments.
# hideOrphanedResources: false # OrphanedResources works on the primary-cluster's cloud-provider only.
# hideKubecostActions: false
# hideReservedInstances: false
# hideSpotCommander: false
# hideUnclaimedVolumes: false
# hideCloudIntegrationsUI: false
# hideBellIcon: false
# hideTeams: false
# savingsRecommendationsAllowLists: # Define select list of instance types to be evaluated in computing Savings Recommendations
# AWS: []
# GCP: []
# Azure: []
## Specify an existing Kubernetes Secret holding the cloud integration information. This Secret must contain
## a key with name `cloud-integration.json` and the contents must be in a specific format. It is expected
## to exist in the release Namespace. This is mutually exclusive with cloudIntegrationJSON where only one must be defined.
# cloudIntegrationSecret: "cloud-integration"
## Specify the cloud integration information in JSON form if pointing to an existing Secret is not desired or you'd rather
## define the cloud integration information directly in the values file. This will result in a new Secret being created
## named `cloud-integration` in the release Namespace. It is mutually exclusive with the cloudIntegrationSecret where only one must be defined.
# cloudIntegrationJSON: |-
# {
# "aws": [
# {
# "athenaBucketName": "s3://AWS_cloud_integration_athenaBucketName",
# "athenaRegion": "AWS_cloud_integration_athenaRegion",
# "athenaDatabase": "AWS_cloud_integration_athenaDatabase",
# "athenaTable": "AWS_cloud_integration_athenaBucketName",
# "projectID": "AWS_cloud_integration_athena_projectID",
# "serviceKeyName": "AWS_cloud_integration_athena_serviceKeyName",
# "serviceKeySecret": "AWS_cloud_integration_athena_serviceKeySecret"
# }
# ],
# "azure": [
# {
# "azureSubscriptionID": "my-subscription-id",
# "azureStorageAccount": "my-storage-account",
# "azureStorageAccessKey": "my-storage-access-key",
# "azureStorageContainer": "my-storage-container"
# }
# ],
# "gcp": [
# {
# "projectID": "my-project-id",
# "billingDataDataset": "detailedbilling.my-billing-dataset",
# "key": {
# "type": "service_account",
# "project_id": "my-project-id",
# "private_key_id": "my-private-key-id",
# "private_key": "my-pem-encoded-private-key",
# "client_email": "my-service-account-name@my-project-id.iam.gserviceaccount.com",
# "client_id": "my-client-id",
# "auth_uri": "auth-uri",
# "token_uri": "token-uri",
# "auth_provider_x509_cert_url": "my-x509-provider-cert",
# "client_x509_cert_url": "my-x509-cert-url"
# }
# }
# ]
# }
# ingestPodUID: false # Enables using UIDs to uniquely ID pods. This requires either Kubecost's replicated KSM metrics, or KSM v2.1.0+. This may impact performance, and changes the default cost-model allocation behavior.
# regionOverrides: "region1,region2,region3" # list of regions which will override default costmodel provider regions
# Explicit names of various ConfigMaps to use. If not set, a default will apply.
# pricingConfigmapName: ""
# productConfigmapName: ""
# smtpConfigmapName: ""
# -- Array of extra K8s manifests to deploy
## Note: Supports use of custom Helm templates
extraObjects: []
# Cloud Billing Integration:
# - apiVersion: v1
# kind: Secret
# metadata:
# name: cloud-integration
# namespace: kubecost
# type: Opaque
# data:
# cloud-integration.json: BASE64_SECRET
# Istio:
# - apiVersion: networking.istio.io/v1alpha3
# kind: VirtualService
# metadata:
# name: my-virtualservice
# spec:
# hosts:
# - kubecost.myorg.com
# gateways:
# - my-gateway
# http:
# - route:
# - destination:
# host: kubecost.kubecost.svc.cluster.local
# port:
# number: 80
# -- Optional override for the image used for the basic health test container
# basicHealth:
# fullImageName: alpine/k8s:1.26.9