From 11198e55547f1215df4c8a50f5e5bd10bd99dda2 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 17:58:14 -0700 Subject: [PATCH 1/9] Integrate OpenTelemetry Weaver into Cortex (Distributor POC) Schema-driven telemetry: define metrics/spans in YAML, generate Go code and documentation from the schema using OpenTelemetry Weaver. - Add telemetry/registry/ with Distributor metric and span definitions - Add Jinja2 templates for Go code generation and markdown docs - Generate pkg/distributor/telemetry_gen.go with metric registration - Refactor distributor.go New() to use generated registerDistributorMetrics() - Add Makefile targets: telemetry-check, telemetry-generate, check-telemetry - Add CI steps for schema validation and generated code freshness - Add Weaver binary to build-image Dockerfile - Add Rego naming policy for Cortex metric conventions Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- .github/workflows/test-build-deploy.yml | 4 + Makefile | 16 +- build-image/Dockerfile | 10 + docs/telemetry/cortex_distributor.md | 35 ++ pkg/distributor/distributor.go | 130 +----- pkg/distributor/telemetry_gen.go | 203 ++++++++++ telemetry/manifest.yaml | 4 + telemetry/policies/naming.rego | 20 + telemetry/registry/distributor/metrics.yaml | 382 ++++++++++++++++++ telemetry/registry/distributor/spans.yaml | 153 +++++++ telemetry/templates/registry/go/metrics.go.j2 | 152 +++++++ telemetry/templates/registry/go/weaver.yaml | 65 +++ .../templates/registry/markdown/metrics.md.j2 | 15 + .../templates/registry/markdown/weaver.yaml | 5 + 14 files changed, 1076 insertions(+), 118 deletions(-) create mode 100644 docs/telemetry/cortex_distributor.md create mode 100644 pkg/distributor/telemetry_gen.go create mode 100644 telemetry/manifest.yaml create mode 100644 telemetry/policies/naming.rego create mode 100644 telemetry/registry/distributor/metrics.yaml create mode 100644 telemetry/registry/distributor/spans.yaml create mode 100644 telemetry/templates/registry/go/metrics.go.j2 create mode 100644 telemetry/templates/registry/go/weaver.yaml create mode 100644 telemetry/templates/registry/markdown/metrics.md.j2 create mode 100644 telemetry/templates/registry/markdown/weaver.yaml diff --git a/.github/workflows/test-build-deploy.yml b/.github/workflows/test-build-deploy.yml index 05f708b350f..fde198490fa 100644 --- a/.github/workflows/test-build-deploy.yml +++ b/.github/workflows/test-build-deploy.yml @@ -40,6 +40,10 @@ jobs: run: make BUILD_IN_CONTAINER=false check-protos - name: Check Modernize run: make BUILD_IN_CONTAINER=false check-modernize + - name: Check Telemetry Schema + run: make BUILD_IN_CONTAINER=false telemetry-check + - name: Check Generated Telemetry Code + run: make BUILD_IN_CONTAINER=false check-telemetry test: strategy: diff --git a/Makefile b/Makefile index f612c173592..5df68c476fd 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ # WARNING: do not commit to a repository! -include Makefile.local -.PHONY: all test cover clean images protos exes dist doc clean-doc check-doc push-multiarch-build-image +.PHONY: all test cover clean images protos exes dist doc clean-doc check-doc push-multiarch-build-image telemetry-check telemetry-generate check-telemetry .DEFAULT_GOAL := all # Version number @@ -126,7 +126,7 @@ GOVOLUMES= -v $(shell pwd)/.cache:/go/cache:delegated,z \ -v $(shell pwd)/.pkg:/go/pkg:delegated,z \ -v $(shell pwd):/go/src/github.com/cortexproject/cortex:delegated,z -exes $(EXES) protos $(PROTO_GOS) lint test cover shell mod-check check-protos doc modernize: build-image/$(UPTODATE) +exes $(EXES) protos $(PROTO_GOS) lint test cover shell mod-check check-protos doc modernize telemetry-check telemetry-generate check-telemetry: build-image/$(UPTODATE) @mkdir -p $(shell pwd)/.pkg @mkdir -p $(shell pwd)/.cache @echo @@ -238,6 +238,18 @@ mod-check: GO111MODULE=on go mod vendor @git diff --exit-code -- go.sum go.mod vendor/ +# Telemetry schema validation and code generation (requires weaver CLI). +telemetry-check: + weaver registry check -r telemetry/registry + +telemetry-generate: + weaver registry generate -r telemetry/registry -t telemetry/templates go pkg/distributor/ + weaver registry generate -r telemetry/registry -t telemetry/templates markdown docs/telemetry/ + +check-telemetry: telemetry-generate + @git diff --exit-code -- pkg/distributor/telemetry_gen.go docs/telemetry/ || \ + (echo "Generated telemetry code is out of date. Run 'make telemetry-generate' and commit the results." && false) + check-protos: clean-protos protos @git diff --exit-code -- $(PROTO_GOS) diff --git a/build-image/Dockerfile b/build-image/Dockerfile index 4d455535eee..39ba4d4d39d 100644 --- a/build-image/Dockerfile +++ b/build-image/Dockerfile @@ -27,6 +27,16 @@ RUN go install github.com/client9/misspell/cmd/misspell@v0.3.4 &&\ go install github.com/campoy/embedmd@v1.0.0 &&\ rm -rf /go/pkg /go/src /root/.cache +# Install OpenTelemetry Weaver for telemetry schema validation and code generation. +ENV WEAVER_VERSION=0.16.2 +RUN GOARCH=$(go env GOARCH) && \ + URL="https://github.com/open-telemetry/weaver/releases/download/v${WEAVER_VERSION}/weaver-x86_64-unknown-linux-gnu.tar.gz" && \ + if [ "$GOARCH" = "arm64" ]; then \ + URL="https://github.com/open-telemetry/weaver/releases/download/v${WEAVER_VERSION}/weaver-aarch64-unknown-linux-gnu.tar.gz"; \ + fi && \ + curl -fsSL "${URL}" | tar xz -C /usr/bin weaver && \ + chmod +x /usr/bin/weaver + COPY build.sh / ENV GOCACHE=/go/cache ENTRYPOINT ["/build.sh"] diff --git a/docs/telemetry/cortex_distributor.md b/docs/telemetry/cortex_distributor.md new file mode 100644 index 00000000000..374b9cb5b73 --- /dev/null +++ b/docs/telemetry/cortex_distributor.md @@ -0,0 +1,35 @@ + + + +# Distributor Telemetry Reference + +This document is auto-generated from the telemetry schema defined in +`telemetry/registry/distributor/`. Do not edit manually. + +## Metrics + +| Name | Type | Unit | Description | Labels | +|------|------|------|-------------|--------| +| `cortex_distributor_deduped_samples_total` | counter | {sample} | The total number of deduplicated samples. | `user`, `cluster` | +| `cortex_distributor_exemplars_in_total` | counter | {exemplar} | The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars. | `user` | +| `cortex_distributor_inflight_client_requests` | gauge | {request} | Current number of inflight client requests in distributor. | - | +| `cortex_distributor_inflight_push_requests` | gauge | {request} | Current number of inflight push requests in distributor. | - | +| `cortex_distributor_ingester_append_failures_total` | counter | {append} | The total number of failed batch appends sent to ingesters. | `ingester`, `type`, `status` | +| `cortex_distributor_ingester_appends_total` | counter | {append} | The total number of batch appends sent to ingesters. | `ingester`, `type` | +| `cortex_distributor_ingester_partial_data_queries_total` | counter | {query} | The total number of queries sent to ingesters that may have returned partial data. | - | +| `cortex_distributor_ingester_push_timeouts_total` | counter | {timeout} | The total number of push requests to ingesters that were canceled due to timeout. | - | +| `cortex_distributor_ingester_queries_total` | counter | {query} | The total number of queries sent to ingesters. | `ingester` | +| `cortex_distributor_ingester_query_failures_total` | counter | {query} | The total number of failed queries sent to ingesters. | `ingester` | +| `cortex_distributor_ingestion_rate_samples_per_second` | gauge | {sample}/s | Current ingestion rate in samples/sec that distributor is using to limit access. | - | +| `cortex_distributor_instance_limits` | gauge | {limit} | Instance limits used by this distributor. | `limit` | +| `cortex_distributor_latest_seen_sample_timestamp_seconds` | gauge | s | Unix timestamp of latest received sample per user. | `user` | +| `cortex_distributor_metadata_in_total` | counter | {metadata} | The total number of metadata that have come in to the distributor, including rejected. | `user` | +| `cortex_distributor_non_ha_samples_received_total` | counter | {sample} | The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels. | `user` | +| `cortex_distributor_query_duration_seconds` | histogram | s | Time spent executing expression and exemplar queries. | `method`, `status_code` | +| `cortex_distributor_received_exemplars_total` | counter | {exemplar} | The total number of received exemplars, excluding rejected and deduped exemplars. | `user` | +| `cortex_distributor_received_metadata_total` | counter | {metadata} | The total number of received metadata, excluding rejected. | `user` | +| `cortex_distributor_received_samples_per_labelset_total` | counter | {sample} | The total number of received samples per label set, excluding rejected and deduped samples. | `user`, `type`, `labelset` | +| `cortex_distributor_received_samples_total` | counter | {sample} | The total number of received samples, excluding rejected and deduped samples. | `user`, `type` | +| `cortex_distributor_replication_factor` | gauge | {factor} | The configured replication factor. | - | +| `cortex_distributor_samples_in_total` | counter | {sample} | The total number of samples that have come in to the distributor, including rejected or deduped samples. | `user`, `type` | +| `cortex_labels_per_sample` | histogram | {label} | Number of labels per sample. | - | \ No newline at end of file diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 04f62fabbe6..3a32b47f3bd 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -323,106 +323,23 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove HATracker: haTracker, ingestionRate: util_math.NewEWMARate(0.2, instanceIngestionRateTickInterval), - queryDuration: instrument.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "cortex", - Name: "distributor_query_duration_seconds", - Help: "Time spent executing expression and exemplar queries.", - Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 20, 30}, - }, []string{"method", "status_code"})), - receivedSamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_received_samples_total", - Help: "The total number of received samples, excluding rejected and deduped samples.", - }, []string{"user", "type"}), - receivedSamplesPerLabelSet: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_received_samples_per_labelset_total", - Help: "The total number of received samples per label set, excluding rejected and deduped samples.", - }, []string{"user", "type", "labelset"}), - receivedExemplars: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_received_exemplars_total", - Help: "The total number of received exemplars, excluding rejected and deduped exemplars.", - }, []string{"user"}), - receivedMetadata: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_received_metadata_total", - Help: "The total number of received metadata, excluding rejected.", - }, []string{"user"}), - incomingSamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_samples_in_total", - Help: "The total number of samples that have come in to the distributor, including rejected or deduped samples.", - }, []string{"user", "type"}), - incomingExemplars: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_exemplars_in_total", - Help: "The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars.", - }, []string{"user"}), - incomingMetadata: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_metadata_in_total", - Help: "The total number of metadata that have come in to the distributor, including rejected.", - }, []string{"user"}), - nonHASamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_non_ha_samples_received_total", - Help: "The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.", - }, []string{"user"}), - dedupedSamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_deduped_samples_total", - Help: "The total number of deduplicated samples.", - }, []string{"user", "cluster"}), - labelsHistogram: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ - Namespace: "cortex", - Name: "labels_per_sample", - Help: "Number of labels per sample.", - Buckets: []float64{5, 10, 15, 20, 25}, - }), - ingesterAppends: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_ingester_appends_total", - Help: "The total number of batch appends sent to ingesters.", - }, []string{"ingester", "type"}), - ingesterAppendFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_ingester_append_failures_total", - Help: "The total number of failed batch appends sent to ingesters.", - }, []string{"ingester", "type", "status"}), - ingesterQueries: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_ingester_queries_total", - Help: "The total number of queries sent to ingesters.", - }, []string{"ingester"}), - ingesterQueryFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_ingester_query_failures_total", - Help: "The total number of failed queries sent to ingesters.", - }, []string{"ingester"}), - ingesterPartialDataQueries: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "cortex", - Name: "distributor_ingester_partial_data_queries_total", - Help: "The total number of queries sent to ingesters that may have returned partial data.", - }), - replicationFactor: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "cortex", - Name: "distributor_replication_factor", - Help: "The configured replication factor.", - }), - latestSeenSampleTimestampPerUser: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_distributor_latest_seen_sample_timestamp_seconds", - Help: "Unix timestamp of latest received sample per user.", - }, []string{"user"}), - distributorIngesterPushTimeout: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_distributor_ingester_push_timeouts_total", - Help: "The total number of push requests to ingesters that were canceled due to timeout.", - }), - validateMetrics: validation.NewValidateMetrics(reg), asyncExecutor: util.NewNoOpExecutor(), } + // Register all Distributor-owned metrics from the generated schema. + registerDistributorMetrics(d, reg, DistributorGaugeFuncs{ + CortexDistributorInflightPushRequestsFunc: func() float64 { + return float64(d.inflightPushRequests.Load()) + }, + CortexDistributorInflightClientRequestsFunc: func() float64 { + return float64(d.inflightClientRequests.Load()) + }, + CortexDistributorIngestionRateSamplesPerSecondFunc: func() float64 { + return d.ingestionRate.Rate() + }, + }) + d.labelSetTracker = labelset.NewLabelSetTracker() if cfg.NumPushWorkers > 0 { @@ -430,6 +347,7 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove d.asyncExecutor = util.NewWorkerPool("distributor", cfg.NumPushWorkers, reg) } + // Instance limits metrics use ConstLabels and are kept hand-written. promauto.With(reg).NewGauge(prometheus.GaugeOpts{ Name: instanceLimitsMetric, Help: instanceLimitsMetricHelp, @@ -446,26 +364,6 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove ConstLabels: map[string]string{limitLabel: "max_ingestion_rate"}, }).Set(cfg.InstanceLimits.MaxIngestionRate) - promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_distributor_inflight_push_requests", - Help: "Current number of inflight push requests in distributor.", - }, func() float64 { - return float64(d.inflightPushRequests.Load()) - }) - - promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_distributor_inflight_client_requests", - Help: "Current number of inflight client requests in distributor.", - }, func() float64 { - return float64(d.inflightClientRequests.Load()) - }) - promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_distributor_ingestion_rate_samples_per_second", - Help: "Current ingestion rate in samples/sec that distributor is using to limit access.", - }, func() float64 { - return d.ingestionRate.Rate() - }) - d.replicationFactor.Set(float64(ingestersRing.ReplicationFactor())) d.activeUsers = users.NewActiveUsersCleanupWithDefaultValues(d.cleanupInactiveUser) diff --git a/pkg/distributor/telemetry_gen.go b/pkg/distributor/telemetry_gen.go new file mode 100644 index 00000000000..92a99223e2e --- /dev/null +++ b/pkg/distributor/telemetry_gen.go @@ -0,0 +1,203 @@ +// Code generated by OpenTelemetry Weaver. DO NOT EDIT. +// source: telemetry/registry/distributor/metrics.yaml + +package distributor + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/weaveworks/common/instrument" +) + +// Metric name constants. +const ( + MetricCortexDistributorDedupedSamplesTotal = "cortex_distributor_deduped_samples_total" + MetricCortexDistributorExemplarsInTotal = "cortex_distributor_exemplars_in_total" + MetricCortexDistributorInflightClientRequests = "cortex_distributor_inflight_client_requests" + MetricCortexDistributorInflightPushRequests = "cortex_distributor_inflight_push_requests" + MetricCortexDistributorIngesterAppendFailuresTotal = "cortex_distributor_ingester_append_failures_total" + MetricCortexDistributorIngesterAppendsTotal = "cortex_distributor_ingester_appends_total" + MetricCortexDistributorIngesterPartialDataQueriesTotal = "cortex_distributor_ingester_partial_data_queries_total" + MetricCortexDistributorIngesterPushTimeoutsTotal = "cortex_distributor_ingester_push_timeouts_total" + MetricCortexDistributorIngesterQueriesTotal = "cortex_distributor_ingester_queries_total" + MetricCortexDistributorIngesterQueryFailuresTotal = "cortex_distributor_ingester_query_failures_total" + MetricCortexDistributorIngestionRateSamplesPerSecond = "cortex_distributor_ingestion_rate_samples_per_second" + MetricCortexDistributorLatestSeenSampleTimestampSeconds = "cortex_distributor_latest_seen_sample_timestamp_seconds" + MetricCortexDistributorMetadataInTotal = "cortex_distributor_metadata_in_total" + MetricCortexDistributorNonHaSamplesReceivedTotal = "cortex_distributor_non_ha_samples_received_total" + MetricCortexDistributorQueryDurationSeconds = "cortex_distributor_query_duration_seconds" + MetricCortexDistributorReceivedExemplarsTotal = "cortex_distributor_received_exemplars_total" + MetricCortexDistributorReceivedMetadataTotal = "cortex_distributor_received_metadata_total" + MetricCortexDistributorReceivedSamplesPerLabelsetTotal = "cortex_distributor_received_samples_per_labelset_total" + MetricCortexDistributorReceivedSamplesTotal = "cortex_distributor_received_samples_total" + MetricCortexDistributorReplicationFactor = "cortex_distributor_replication_factor" + MetricCortexDistributorSamplesInTotal = "cortex_distributor_samples_in_total" + MetricCortexLabelsPerSample = "cortex_labels_per_sample" +) + +// DistributorGaugeFuncs provides callback functions for GaugeFunc metrics. +type DistributorGaugeFuncs struct { + // Current number of inflight client requests in distributor. + CortexDistributorInflightClientRequestsFunc func() float64 + // Current number of inflight push requests in distributor. + CortexDistributorInflightPushRequestsFunc func() float64 + // Current ingestion rate in samples/sec that distributor is using to limit access. + CortexDistributorIngestionRateSamplesPerSecondFunc func() float64 +} + +// registerDistributorMetrics creates and registers all Distributor-owned metrics, +// assigning them to the corresponding fields on the Distributor struct. +func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf DistributorGaugeFuncs) { + + // The total number of deduplicated samples. + d.dedupedSamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_deduped_samples_total", + Help: "The total number of deduplicated samples.", + }, []string{"user", "cluster"}) + + // The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars. + d.incomingExemplars = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_exemplars_in_total", + Help: "The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars.", + }, []string{"user"}) + + // Current number of inflight client requests in distributor. + promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cortex_distributor_inflight_client_requests", + Help: "Current number of inflight client requests in distributor.", + }, gf.CortexDistributorInflightClientRequestsFunc) + + // Current number of inflight push requests in distributor. + promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cortex_distributor_inflight_push_requests", + Help: "Current number of inflight push requests in distributor.", + }, gf.CortexDistributorInflightPushRequestsFunc) + + // The total number of failed batch appends sent to ingesters. + d.ingesterAppendFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_ingester_append_failures_total", + Help: "The total number of failed batch appends sent to ingesters.", + }, []string{"ingester", "type", "status"}) + + // The total number of batch appends sent to ingesters. + d.ingesterAppends = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_ingester_appends_total", + Help: "The total number of batch appends sent to ingesters.", + }, []string{"ingester", "type"}) + + // The total number of queries sent to ingesters that may have returned partial data. + d.ingesterPartialDataQueries = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_ingester_partial_data_queries_total", + Help: "The total number of queries sent to ingesters that may have returned partial data.", + }) + + // The total number of push requests to ingesters that were canceled due to timeout. + d.distributorIngesterPushTimeout = promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "cortex_distributor_ingester_push_timeouts_total", + Help: "The total number of push requests to ingesters that were canceled due to timeout.", + }) + + // The total number of queries sent to ingesters. + d.ingesterQueries = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_ingester_queries_total", + Help: "The total number of queries sent to ingesters.", + }, []string{"ingester"}) + + // The total number of failed queries sent to ingesters. + d.ingesterQueryFailures = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_ingester_query_failures_total", + Help: "The total number of failed queries sent to ingesters.", + }, []string{"ingester"}) + + // Current ingestion rate in samples/sec that distributor is using to limit access. + promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ + Name: "cortex_distributor_ingestion_rate_samples_per_second", + Help: "Current ingestion rate in samples/sec that distributor is using to limit access.", + }, gf.CortexDistributorIngestionRateSamplesPerSecondFunc) + // cortex_distributor_instance_limits is excluded from code generation (uses ConstLabels). + + // Unix timestamp of latest received sample per user. + d.latestSeenSampleTimestampPerUser = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_distributor_latest_seen_sample_timestamp_seconds", + Help: "Unix timestamp of latest received sample per user.", + }, []string{"user"}) + + // The total number of metadata that have come in to the distributor, including rejected. + d.incomingMetadata = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_metadata_in_total", + Help: "The total number of metadata that have come in to the distributor, including rejected.", + }, []string{"user"}) + + // The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels. + d.nonHASamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_non_ha_samples_received_total", + Help: "The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.", + }, []string{"user"}) + + // Time spent executing expression and exemplar queries. + d.queryDuration = instrument.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "cortex", + Name: "distributor_query_duration_seconds", + Help: "Time spent executing expression and exemplar queries.", + Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 20, 30}, + }, []string{"method", "status_code"})) + + // The total number of received exemplars, excluding rejected and deduped exemplars. + d.receivedExemplars = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_received_exemplars_total", + Help: "The total number of received exemplars, excluding rejected and deduped exemplars.", + }, []string{"user"}) + + // The total number of received metadata, excluding rejected. + d.receivedMetadata = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_received_metadata_total", + Help: "The total number of received metadata, excluding rejected.", + }, []string{"user"}) + + // The total number of received samples per label set, excluding rejected and deduped samples. + d.receivedSamplesPerLabelSet = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_received_samples_per_labelset_total", + Help: "The total number of received samples per label set, excluding rejected and deduped samples.", + }, []string{"user", "type", "labelset"}) + + // The total number of received samples, excluding rejected and deduped samples. + d.receivedSamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_received_samples_total", + Help: "The total number of received samples, excluding rejected and deduped samples.", + }, []string{"user", "type"}) + + // The configured replication factor. + d.replicationFactor = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + Namespace: "cortex", + Name: "distributor_replication_factor", + Help: "The configured replication factor.", + }) + + // The total number of samples that have come in to the distributor, including rejected or deduped samples. + d.incomingSamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ + Namespace: "cortex", + Name: "distributor_samples_in_total", + Help: "The total number of samples that have come in to the distributor, including rejected or deduped samples.", + }, []string{"user", "type"}) + + // Number of labels per sample. + d.labelsHistogram = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Namespace: "cortex", + Name: "labels_per_sample", + Help: "Number of labels per sample.", + Buckets: []float64{5, 10, 15, 20, 25}, + }) +} \ No newline at end of file diff --git a/telemetry/manifest.yaml b/telemetry/manifest.yaml new file mode 100644 index 00000000000..c7f7048d96c --- /dev/null +++ b/telemetry/manifest.yaml @@ -0,0 +1,4 @@ +file_format: 1.2.0 +schema_url: https://github.com/cortexproject/cortex/telemetry +registries: + - registry diff --git a/telemetry/policies/naming.rego b/telemetry/policies/naming.rego new file mode 100644 index 00000000000..5dc3ec6098a --- /dev/null +++ b/telemetry/policies/naming.rego @@ -0,0 +1,20 @@ +package telemetry + +import rego.v1 + +# All Cortex distributor metrics must start with cortex_distributor_ or cortex_labels_. +deny contains msg if { + some group in input.groups + group.type == "metric" + not startswith(group.metric_name, "cortex_distributor_") + not startswith(group.metric_name, "cortex_labels_") + msg := sprintf("Metric %s does not follow Cortex naming convention (must start with cortex_distributor_ or cortex_labels_)", [group.metric_name]) +} + +# All metrics must have a brief description. +deny contains msg if { + some group in input.groups + group.type == "metric" + not group.brief + msg := sprintf("Metric %s is missing a brief description", [group.metric_name]) +} diff --git a/telemetry/registry/distributor/metrics.yaml b/telemetry/registry/distributor/metrics.yaml new file mode 100644 index 00000000000..27ad85d033b --- /dev/null +++ b/telemetry/registry/distributor/metrics.yaml @@ -0,0 +1,382 @@ +groups: + # -- Push path: incoming sample counters -- + + - id: metric.cortex.distributor.received_samples + type: metric + metric_name: cortex_distributor_received_samples_total + stability: stable + brief: "The total number of received samples, excluding rejected and deduped samples." + instrument: counter + unit: "{sample}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + - id: type + type: + members: + - id: float + value: "float" + stability: stable + - id: histogram + value: "histogram" + stability: stable + - id: nhcb + value: "nhcb" + stability: stable + stability: stable + brief: "Sample metric type." + requirement_level: required + + - id: metric.cortex.distributor.received_samples_per_labelset + type: metric + metric_name: cortex_distributor_received_samples_per_labelset_total + stability: stable + brief: "The total number of received samples per label set, excluding rejected and deduped samples." + instrument: counter + unit: "{sample}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + - id: type + type: string + stability: stable + brief: "Sample metric type." + requirement_level: required + examples: ["float", "histogram"] + - id: labelset + type: string + stability: stable + brief: "Label set string representation." + requirement_level: required + examples: ["{job=\"api\"}"] + + - id: metric.cortex.distributor.received_exemplars + type: metric + metric_name: cortex_distributor_received_exemplars_total + stability: stable + brief: "The total number of received exemplars, excluding rejected and deduped exemplars." + instrument: counter + unit: "{exemplar}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + + - id: metric.cortex.distributor.received_metadata + type: metric + metric_name: cortex_distributor_received_metadata_total + stability: stable + brief: "The total number of received metadata, excluding rejected." + instrument: counter + unit: "{metadata}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + + - id: metric.cortex.distributor.samples_in + type: metric + metric_name: cortex_distributor_samples_in_total + stability: stable + brief: "The total number of samples that have come in to the distributor, including rejected or deduped samples." + instrument: counter + unit: "{sample}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + - id: type + type: string + stability: stable + brief: "Sample metric type." + requirement_level: required + examples: ["float", "histogram", "nhcb"] + + - id: metric.cortex.distributor.exemplars_in + type: metric + metric_name: cortex_distributor_exemplars_in_total + stability: stable + brief: "The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars." + instrument: counter + unit: "{exemplar}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + + - id: metric.cortex.distributor.metadata_in + type: metric + metric_name: cortex_distributor_metadata_in_total + stability: stable + brief: "The total number of metadata that have come in to the distributor, including rejected." + instrument: counter + unit: "{metadata}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + + # -- HA deduplication counters -- + + - id: metric.cortex.distributor.non_ha_samples_received + type: metric + metric_name: cortex_distributor_non_ha_samples_received_total + stability: stable + brief: "The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels." + instrument: counter + unit: "{sample}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + + - id: metric.cortex.distributor.deduped_samples + type: metric + metric_name: cortex_distributor_deduped_samples_total + stability: stable + brief: "The total number of deduplicated samples." + instrument: counter + unit: "{sample}" + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + - id: cluster + type: string + stability: stable + brief: "HA cluster identifier." + requirement_level: required + examples: ["cluster-a"] + + # -- Histograms -- + + - id: metric.cortex.labels_per_sample + type: metric + metric_name: cortex_labels_per_sample + stability: stable + brief: "Number of labels per sample." + instrument: histogram + unit: "{label}" + + - id: metric.cortex.distributor.query_duration_seconds + type: metric + metric_name: cortex_distributor_query_duration_seconds + stability: stable + brief: "Time spent executing expression and exemplar queries." + instrument: histogram + unit: s + attributes: + - id: method + type: string + stability: stable + brief: "Query method name." + requirement_level: required + examples: ["QueryStream", "QueryExemplars"] + - id: status_code + type: string + stability: stable + brief: "HTTP status code." + requirement_level: required + examples: ["200", "500"] + + # -- Ingester interaction counters -- + + - id: metric.cortex.distributor.ingester_appends + type: metric + metric_name: cortex_distributor_ingester_appends_total + stability: stable + brief: "The total number of batch appends sent to ingesters." + instrument: counter + unit: "{append}" + attributes: + - id: ingester + type: string + stability: stable + brief: "Ingester instance identifier." + requirement_level: required + examples: ["ingester-0"] + - id: type + type: string + stability: stable + brief: "Append type: samples or metadata." + requirement_level: required + examples: ["samples", "metadata"] + + - id: metric.cortex.distributor.ingester_append_failures + type: metric + metric_name: cortex_distributor_ingester_append_failures_total + stability: stable + brief: "The total number of failed batch appends sent to ingesters." + instrument: counter + unit: "{append}" + attributes: + - id: ingester + type: string + stability: stable + brief: "Ingester instance identifier." + requirement_level: required + examples: ["ingester-0"] + - id: type + type: string + stability: stable + brief: "Append type." + requirement_level: required + examples: ["samples", "metadata"] + - id: status + type: string + stability: stable + brief: "Failure HTTP status class." + requirement_level: required + examples: ["4xx", "5xx"] + + - id: metric.cortex.distributor.ingester_queries + type: metric + metric_name: cortex_distributor_ingester_queries_total + stability: stable + brief: "The total number of queries sent to ingesters." + instrument: counter + unit: "{query}" + attributes: + - id: ingester + type: string + stability: stable + brief: "Ingester instance identifier." + requirement_level: required + examples: ["ingester-0"] + + - id: metric.cortex.distributor.ingester_query_failures + type: metric + metric_name: cortex_distributor_ingester_query_failures_total + stability: stable + brief: "The total number of failed queries sent to ingesters." + instrument: counter + unit: "{query}" + attributes: + - id: ingester + type: string + stability: stable + brief: "Ingester instance identifier." + requirement_level: required + examples: ["ingester-0"] + + - id: metric.cortex.distributor.ingester_partial_data_queries + type: metric + metric_name: cortex_distributor_ingester_partial_data_queries_total + stability: stable + brief: "The total number of queries sent to ingesters that may have returned partial data." + instrument: counter + unit: "{query}" + + # -- Gauges -- + + - id: metric.cortex.distributor.replication_factor + type: metric + metric_name: cortex_distributor_replication_factor + stability: stable + brief: "The configured replication factor." + instrument: gauge + unit: "{factor}" + + - id: metric.cortex.distributor.latest_seen_sample_timestamp_seconds + type: metric + metric_name: cortex_distributor_latest_seen_sample_timestamp_seconds + stability: stable + brief: "Unix timestamp of latest received sample per user." + instrument: gauge + unit: s + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: required + examples: ["tenant-1"] + + - id: metric.cortex.distributor.ingester_push_timeouts + type: metric + metric_name: cortex_distributor_ingester_push_timeouts_total + stability: stable + brief: "The total number of push requests to ingesters that were canceled due to timeout." + instrument: counter + unit: "{timeout}" + + # -- GaugeFunc metrics (registered with callback functions) -- + + - id: metric.cortex.distributor.inflight_push_requests + type: metric + metric_name: cortex_distributor_inflight_push_requests + stability: stable + brief: "Current number of inflight push requests in distributor." + instrument: gauge + unit: "{request}" + + - id: metric.cortex.distributor.inflight_client_requests + type: metric + metric_name: cortex_distributor_inflight_client_requests + stability: stable + brief: "Current number of inflight client requests in distributor." + instrument: gauge + unit: "{request}" + + - id: metric.cortex.distributor.ingestion_rate_samples_per_second + type: metric + metric_name: cortex_distributor_ingestion_rate_samples_per_second + stability: stable + brief: "Current ingestion rate in samples/sec that distributor is using to limit access." + instrument: gauge + unit: "{sample}/s" + + # -- Instance limits (registered with ConstLabels, kept hand-written) -- + + - id: metric.cortex.distributor.instance_limits + type: metric + metric_name: cortex_distributor_instance_limits + stability: stable + brief: "Instance limits used by this distributor." + instrument: gauge + unit: "{limit}" + attributes: + - id: limit + type: + members: + - id: max_inflight_push_requests + value: "max_inflight_push_requests" + stability: stable + - id: max_inflight_client_requests + value: "max_inflight_client_requests" + stability: stable + - id: max_ingestion_rate + value: "max_ingestion_rate" + stability: stable + stability: stable + brief: "Type of instance limit." + requirement_level: required diff --git a/telemetry/registry/distributor/spans.yaml b/telemetry/registry/distributor/spans.yaml new file mode 100644 index 00000000000..1349545b23b --- /dev/null +++ b/telemetry/registry/distributor/spans.yaml @@ -0,0 +1,153 @@ +groups: + - id: span.cortex.distributor.push + type: span + span_kind: server + stability: stable + brief: "Handles incoming remote write push requests." + attributes: + - id: user + type: string + stability: stable + brief: "Tenant ID." + requirement_level: recommended + examples: ["tenant-1"] + + - id: span.cortex.distributor.do_batch + type: span + span_kind: internal + stability: stable + brief: "Sends batched samples to ingesters via the ring." + attributes: [] + + - id: span.cortex.distributor.prepare_series_keys + type: span + span_kind: internal + stability: stable + brief: "Computes ingester ring tokens for series distribution." + attributes: [] + + - id: span.cortex.distributor.query_exemplars + type: span + span_kind: client + stability: stable + brief: "Queries exemplars from ingesters." + attributes: + - id: series + type: int + stability: stable + brief: "Number of series in the response." + requirement_level: recommended + examples: [42] + + - id: span.cortex.distributor.query_stream + type: span + span_kind: client + stability: stable + brief: "Queries time series from ingesters via streaming." + attributes: + - id: chunk_series + type: int + stability: stable + brief: "Number of chunk series in the response." + requirement_level: recommended + examples: [100] + + - id: span.cortex.distributor.merge_ingester_streams + type: span + span_kind: internal + stability: stable + brief: "Merges streaming query responses from multiple ingesters." + attributes: + - id: fetched_series + type: int + stability: stable + brief: "Number of series fetched." + requirement_level: recommended + examples: [100] + - id: fetched_chunks + type: int + stability: stable + brief: "Number of chunks fetched." + requirement_level: recommended + examples: [1000] + - id: fetched_data_bytes + type: int + stability: stable + brief: "Total response data bytes." + requirement_level: recommended + examples: [1048576] + - id: fetched_chunks_bytes + type: int + stability: stable + brief: "Chunk-specific data bytes." + requirement_level: recommended + examples: [524288] + + - id: span.cortex.distributor.label_values + type: span + span_kind: client + stability: stable + brief: "Queries label values from ingesters." + attributes: + - id: name + type: string + stability: stable + brief: "Label name being queried." + requirement_level: recommended + examples: ["__name__"] + - id: start + type: int + stability: stable + brief: "Query start time as Unix timestamp." + requirement_level: recommended + examples: [1700000000] + - id: end + type: int + stability: stable + brief: "Query end time as Unix timestamp." + requirement_level: recommended + examples: [1700003600] + - id: result_length + type: int + stability: stable + brief: "Number of values returned." + requirement_level: recommended + examples: [50] + + - id: span.cortex.distributor.label_names + type: span + span_kind: client + stability: stable + brief: "Queries label names from ingesters." + attributes: + - id: start + type: int + stability: stable + brief: "Query start time as Unix timestamp." + requirement_level: recommended + examples: [1700000000] + - id: end + type: int + stability: stable + brief: "Query end time as Unix timestamp." + requirement_level: recommended + examples: [1700003600] + - id: result_length + type: int + stability: stable + brief: "Number of names returned." + requirement_level: recommended + examples: [20] + + - id: span.cortex.distributor.response_merge + type: span + span_kind: internal + stability: stable + brief: "Merges query responses from multiple ingesters." + attributes: + - id: result_length + type: int + stability: stable + brief: "Number of items in the merged result." + requirement_level: recommended + examples: [50] diff --git a/telemetry/templates/registry/go/metrics.go.j2 b/telemetry/templates/registry/go/metrics.go.j2 new file mode 100644 index 00000000000..34b6fb50bd3 --- /dev/null +++ b/telemetry/templates/registry/go/metrics.go.j2 @@ -0,0 +1,152 @@ +// Code generated by OpenTelemetry Weaver. DO NOT EDIT. +// source: telemetry/registry/distributor/metrics.yaml + +package {{ params.pkg }} + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/weaveworks/common/instrument" +) + +// Metric name constants. +const ( +{%- for metric in ctx.metrics %} +{%- if metric.metric_name not in params.excluded %} + Metric{{ metric.metric_name | pascal_case }} = "{{ metric.metric_name }}" +{%- endif %} +{%- endfor %} +) + +// DistributorGaugeFuncs provides callback functions for GaugeFunc metrics. +type DistributorGaugeFuncs struct { +{%- for metric in ctx.metrics %} +{%- if metric.metric_name in params.gauge_funcs %} + // {{ metric.brief }} + {{ metric.metric_name | pascal_case }}Func func() float64 +{%- endif %} +{%- endfor %} +} + +{# Macro to render a label list like: []string{"user", "type"} #} +{%- macro label_list(attributes) -%} +[]string{{ "{" }} +{%- for attr in attributes -%} +"{{ attr.name }}"{% if not loop.last %}, {% endif %} +{%- endfor -%} +{{ "}" }} +{%- endmacro -%} + +{# Macro to render a bucket list like: []float64{5, 10, 15} #} +{%- macro bucket_list(buckets) -%} +[]float64{{ "{" }} +{%- for b in buckets -%} +{{ b }}{% if not loop.last %}, {% endif %} +{%- endfor -%} +{{ "}" }} +{%- endmacro -%} + +// registerDistributorMetrics creates and registers all Distributor-owned metrics, +// assigning them to the corresponding fields on the Distributor struct. +func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf DistributorGaugeFuncs) { +{%- for metric in ctx.metrics %} +{%- if metric.metric_name in params.excluded %} + // {{ metric.metric_name }} is excluded from code generation (uses ConstLabels). +{%- elif metric.metric_name in params.gauge_funcs %} + + // {{ metric.brief }} + promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ + Name: "{{ metric.metric_name }}", + Help: "{{ metric.brief }}", + }, gf.{{ metric.metric_name | pascal_case }}Func) +{%- elif metric.metric_name in params.field_map %} +{%- set field = params.field_map[metric.metric_name] %} +{%- set ns = params.namespace_split[metric.metric_name] | default(none) %} +{%- set buckets = params.histogram_buckets[metric.metric_name] | default(none) %} +{%- set is_wrapped = metric.metric_name in params.histogram_collector_wrap %} + + // {{ metric.brief }} +{%- if metric.instrument == "counter" %} +{%- if metric.attributes %} + d.{{ field }} = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ +{%- if ns %} + Namespace: "{{ ns.namespace }}", + Name: "{{ ns.name }}", +{%- else %} + Name: "{{ metric.metric_name }}", +{%- endif %} + Help: "{{ metric.brief }}", + }, {{ label_list(metric.attributes) }}) +{%- else %} + d.{{ field }} = promauto.With(reg).NewCounter(prometheus.CounterOpts{ +{%- if ns %} + Namespace: "{{ ns.namespace }}", + Name: "{{ ns.name }}", +{%- else %} + Name: "{{ metric.metric_name }}", +{%- endif %} + Help: "{{ metric.brief }}", + }) +{%- endif %} +{%- elif metric.instrument == "gauge" %} +{%- if metric.attributes %} + d.{{ field }} = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ +{%- if ns %} + Namespace: "{{ ns.namespace }}", + Name: "{{ ns.name }}", +{%- else %} + Name: "{{ metric.metric_name }}", +{%- endif %} + Help: "{{ metric.brief }}", + }, {{ label_list(metric.attributes) }}) +{%- else %} + d.{{ field }} = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ +{%- if ns %} + Namespace: "{{ ns.namespace }}", + Name: "{{ ns.name }}", +{%- else %} + Name: "{{ metric.metric_name }}", +{%- endif %} + Help: "{{ metric.brief }}", + }) +{%- endif %} +{%- elif metric.instrument == "histogram" %} +{%- if metric.attributes %} +{%- if is_wrapped %} + d.{{ field }} = instrument.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ +{%- else %} + d.{{ field }} = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ +{%- endif %} +{%- if ns %} + Namespace: "{{ ns.namespace }}", + Name: "{{ ns.name }}", +{%- else %} + Name: "{{ metric.metric_name }}", +{%- endif %} + Help: "{{ metric.brief }}", +{%- if buckets %} + Buckets: {{ bucket_list(buckets) }}, +{%- endif %} +{%- if is_wrapped %} + }, {{ label_list(metric.attributes) }})) +{%- else %} + }, {{ label_list(metric.attributes) }}) +{%- endif %} +{%- else %} + d.{{ field }} = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ +{%- if ns %} + Namespace: "{{ ns.namespace }}", + Name: "{{ ns.name }}", +{%- else %} + Name: "{{ metric.metric_name }}", +{%- endif %} + Help: "{{ metric.brief }}", +{%- if buckets %} + Buckets: {{ bucket_list(buckets) }}, +{%- endif %} + }) +{%- endif %} +{%- endif %} +{%- endif %} +{%- endfor %} +} diff --git a/telemetry/templates/registry/go/weaver.yaml b/telemetry/templates/registry/go/weaver.yaml new file mode 100644 index 00000000000..7beb6e0404d --- /dev/null +++ b/telemetry/templates/registry/go/weaver.yaml @@ -0,0 +1,65 @@ +comment_format: "//" + +params: + pkg: distributor + + field_map: + cortex_distributor_query_duration_seconds: queryDuration + cortex_distributor_received_samples_total: receivedSamples + cortex_distributor_received_samples_per_labelset_total: receivedSamplesPerLabelSet + cortex_distributor_received_exemplars_total: receivedExemplars + cortex_distributor_received_metadata_total: receivedMetadata + cortex_distributor_samples_in_total: incomingSamples + cortex_distributor_exemplars_in_total: incomingExemplars + cortex_distributor_metadata_in_total: incomingMetadata + cortex_distributor_non_ha_samples_received_total: nonHASamples + cortex_distributor_deduped_samples_total: dedupedSamples + cortex_labels_per_sample: labelsHistogram + cortex_distributor_ingester_appends_total: ingesterAppends + cortex_distributor_ingester_append_failures_total: ingesterAppendFailures + cortex_distributor_ingester_queries_total: ingesterQueries + cortex_distributor_ingester_query_failures_total: ingesterQueryFailures + cortex_distributor_ingester_partial_data_queries_total: ingesterPartialDataQueries + cortex_distributor_replication_factor: replicationFactor + cortex_distributor_latest_seen_sample_timestamp_seconds: latestSeenSampleTimestampPerUser + cortex_distributor_ingester_push_timeouts_total: distributorIngesterPushTimeout + + namespace_split: + cortex_distributor_query_duration_seconds: {namespace: cortex, name: distributor_query_duration_seconds} + cortex_distributor_received_samples_total: {namespace: cortex, name: distributor_received_samples_total} + cortex_distributor_received_samples_per_labelset_total: {namespace: cortex, name: distributor_received_samples_per_labelset_total} + cortex_distributor_received_exemplars_total: {namespace: cortex, name: distributor_received_exemplars_total} + cortex_distributor_received_metadata_total: {namespace: cortex, name: distributor_received_metadata_total} + cortex_distributor_samples_in_total: {namespace: cortex, name: distributor_samples_in_total} + cortex_distributor_exemplars_in_total: {namespace: cortex, name: distributor_exemplars_in_total} + cortex_distributor_metadata_in_total: {namespace: cortex, name: distributor_metadata_in_total} + cortex_distributor_non_ha_samples_received_total: {namespace: cortex, name: distributor_non_ha_samples_received_total} + cortex_distributor_deduped_samples_total: {namespace: cortex, name: distributor_deduped_samples_total} + cortex_labels_per_sample: {namespace: cortex, name: labels_per_sample} + cortex_distributor_ingester_appends_total: {namespace: cortex, name: distributor_ingester_appends_total} + cortex_distributor_ingester_append_failures_total: {namespace: cortex, name: distributor_ingester_append_failures_total} + cortex_distributor_ingester_queries_total: {namespace: cortex, name: distributor_ingester_queries_total} + cortex_distributor_ingester_query_failures_total: {namespace: cortex, name: distributor_ingester_query_failures_total} + cortex_distributor_ingester_partial_data_queries_total: {namespace: cortex, name: distributor_ingester_partial_data_queries_total} + cortex_distributor_replication_factor: {namespace: cortex, name: distributor_replication_factor} + + histogram_buckets: + cortex_labels_per_sample: [5, 10, 15, 20, 25] + cortex_distributor_query_duration_seconds: [.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 20, 30] + + gauge_funcs: + - cortex_distributor_inflight_push_requests + - cortex_distributor_inflight_client_requests + - cortex_distributor_ingestion_rate_samples_per_second + + excluded: + - cortex_distributor_instance_limits + + histogram_collector_wrap: + - cortex_distributor_query_duration_seconds + +templates: + - pattern: metrics.go.j2 + filter: semconv_grouped_metrics + application_mode: each + file_name: telemetry_gen.go diff --git a/telemetry/templates/registry/markdown/metrics.md.j2 b/telemetry/templates/registry/markdown/metrics.md.j2 new file mode 100644 index 00000000000..3dcc1d76b61 --- /dev/null +++ b/telemetry/templates/registry/markdown/metrics.md.j2 @@ -0,0 +1,15 @@ + + + +# Distributor Telemetry Reference + +This document is auto-generated from the telemetry schema defined in +`telemetry/registry/distributor/`. Do not edit manually. + +## Metrics + +| Name | Type | Unit | Description | Labels | +|------|------|------|-------------|--------| +{%- for metric in ctx.metrics %} +| `{{ metric.metric_name }}` | {{ metric.instrument }} | {{ metric.unit | default("-") }} | {{ metric.brief }} | {% if metric.attributes %}{% for attr in metric.attributes %}`{{ attr.name }}`{% if not loop.last %}, {% endif %}{% endfor %}{% else %}-{% endif %} | +{%- endfor %} diff --git a/telemetry/templates/registry/markdown/weaver.yaml b/telemetry/templates/registry/markdown/weaver.yaml new file mode 100644 index 00000000000..0c6eca9eb57 --- /dev/null +++ b/telemetry/templates/registry/markdown/weaver.yaml @@ -0,0 +1,5 @@ +templates: + - pattern: metrics.md.j2 + filter: semconv_grouped_metrics + application_mode: each + file_name: "{{ ctx.root_namespace }}_distributor.md" From ce58fba34fd30effc4b6fdcc72f76b4500495dc5 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 18:39:13 -0700 Subject: [PATCH 2/9] Fix CI: correct Weaver download URL and Hugo markdown compatibility - Fix Dockerfile: use .tar.xz format, update to v0.22.1, handle missing arm64 Linux builds gracefully - Fix generated markdown: replace HTML comments with Hugo frontmatter to avoid Hugo build errors Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- build-image/Dockerfile | 16 +++++++++------- docs/telemetry/cortex_distributor.md | 5 +++-- .../templates/registry/markdown/metrics.md.j2 | 7 +++++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/build-image/Dockerfile b/build-image/Dockerfile index 39ba4d4d39d..b5a3548a86a 100644 --- a/build-image/Dockerfile +++ b/build-image/Dockerfile @@ -28,14 +28,16 @@ RUN go install github.com/client9/misspell/cmd/misspell@v0.3.4 &&\ rm -rf /go/pkg /go/src /root/.cache # Install OpenTelemetry Weaver for telemetry schema validation and code generation. -ENV WEAVER_VERSION=0.16.2 +# Note: Only available for x86_64 Linux. arm64 builds will skip Weaver installation. +ENV WEAVER_VERSION=0.22.1 RUN GOARCH=$(go env GOARCH) && \ - URL="https://github.com/open-telemetry/weaver/releases/download/v${WEAVER_VERSION}/weaver-x86_64-unknown-linux-gnu.tar.gz" && \ - if [ "$GOARCH" = "arm64" ]; then \ - URL="https://github.com/open-telemetry/weaver/releases/download/v${WEAVER_VERSION}/weaver-aarch64-unknown-linux-gnu.tar.gz"; \ - fi && \ - curl -fsSL "${URL}" | tar xz -C /usr/bin weaver && \ - chmod +x /usr/bin/weaver + if [ "$GOARCH" = "amd64" ]; then \ + URL="https://github.com/open-telemetry/weaver/releases/download/v${WEAVER_VERSION}/weaver-x86_64-unknown-linux-gnu.tar.xz" && \ + curl -fsSL "${URL}" | xz -d | tar x -C /usr/bin weaver && \ + chmod +x /usr/bin/weaver; \ + else \ + echo "Weaver not available for $GOARCH, skipping installation"; \ + fi COPY build.sh / ENV GOCACHE=/go/cache diff --git a/docs/telemetry/cortex_distributor.md b/docs/telemetry/cortex_distributor.md index 374b9cb5b73..749ccdddcc0 100644 --- a/docs/telemetry/cortex_distributor.md +++ b/docs/telemetry/cortex_distributor.md @@ -1,5 +1,6 @@ - - +--- +title: "Distributor Telemetry Reference" +--- # Distributor Telemetry Reference diff --git a/telemetry/templates/registry/markdown/metrics.md.j2 b/telemetry/templates/registry/markdown/metrics.md.j2 index 3dcc1d76b61..d83c6b772b8 100644 --- a/telemetry/templates/registry/markdown/metrics.md.j2 +++ b/telemetry/templates/registry/markdown/metrics.md.j2 @@ -1,5 +1,8 @@ - - +{#- Code generated by OpenTelemetry Weaver. DO NOT EDIT. -#} +{#- source: telemetry/registry/distributor/ -#} +--- +title: "Distributor Telemetry Reference" +--- # Distributor Telemetry Reference From 34ce0a0ee3d16b1d07bd5728b7d5bc283018815b Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 18:43:14 -0700 Subject: [PATCH 3/9] Fix CI: install xz-utils for Weaver tar.xz extraction Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- build-image/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-image/Dockerfile b/build-image/Dockerfile index b5a3548a86a..e970b33ccf0 100644 --- a/build-image/Dockerfile +++ b/build-image/Dockerfile @@ -1,7 +1,7 @@ FROM golang:1.25.8-trixie ARG goproxyValue ENV GOPROXY=${goproxyValue} -RUN apt-get update && apt-get install -y curl file gettext jq unzip protobuf-compiler libprotobuf-dev && \ +RUN apt-get update && apt-get install -y curl file gettext jq unzip xz-utils protobuf-compiler libprotobuf-dev && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* ENV SHFMT_VERSION=3.2.4 From 06691d0efc29af228574ebae95b1ed29ec16597c Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 18:47:26 -0700 Subject: [PATCH 4/9] Fix CI: strip directory prefix when extracting Weaver binary The tar archive contains weaver-x86_64-unknown-linux-gnu/weaver, not a top-level weaver binary. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- build-image/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-image/Dockerfile b/build-image/Dockerfile index e970b33ccf0..2b20fa0312b 100644 --- a/build-image/Dockerfile +++ b/build-image/Dockerfile @@ -33,7 +33,7 @@ ENV WEAVER_VERSION=0.22.1 RUN GOARCH=$(go env GOARCH) && \ if [ "$GOARCH" = "amd64" ]; then \ URL="https://github.com/open-telemetry/weaver/releases/download/v${WEAVER_VERSION}/weaver-x86_64-unknown-linux-gnu.tar.xz" && \ - curl -fsSL "${URL}" | xz -d | tar x -C /usr/bin weaver && \ + curl -fsSL "${URL}" | xz -d | tar x --strip-components=1 -C /usr/bin weaver-x86_64-unknown-linux-gnu/weaver && \ chmod +x /usr/bin/weaver; \ else \ echo "Weaver not available for $GOARCH, skipping installation"; \ From e4177d8f43559a826910f531bb39aa0f4cb7a168 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 18:59:11 -0700 Subject: [PATCH 5/9] Fix CI: auto-install Weaver if not present in build image The current CI build image doesn't include Weaver yet. Add an install-weaver target that downloads it on-demand so telemetry checks work before the build image is updated. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- Makefile | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5df68c476fd..15c97380b9f 100644 --- a/Makefile +++ b/Makefile @@ -239,10 +239,24 @@ mod-check: @git diff --exit-code -- go.sum go.mod vendor/ # Telemetry schema validation and code generation (requires weaver CLI). -telemetry-check: +WEAVER_VERSION ?= 0.22.1 + +# Install weaver if not already available. +.PHONY: install-weaver +install-weaver: + @if ! command -v weaver >/dev/null 2>&1; then \ + echo "Installing weaver v$(WEAVER_VERSION)..."; \ + GOARCH=$$(go env GOARCH) && \ + if [ "$$GOARCH" = "amd64" ]; then ARCH=x86_64; else echo "Weaver not available for $$GOARCH"; exit 1; fi && \ + URL="https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-$${ARCH}-unknown-linux-gnu.tar.xz" && \ + curl -fsSL "$$URL" | xz -d | tar x --strip-components=1 -C /usr/bin weaver-$${ARCH}-unknown-linux-gnu/weaver && \ + chmod +x /usr/bin/weaver; \ + fi + +telemetry-check: install-weaver weaver registry check -r telemetry/registry -telemetry-generate: +telemetry-generate: install-weaver weaver registry generate -r telemetry/registry -t telemetry/templates go pkg/distributor/ weaver registry generate -r telemetry/registry -t telemetry/templates markdown docs/telemetry/ From ad87a6c0c3188a0e8dd4612df43540e6a68f39d3 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 19:16:22 -0700 Subject: [PATCH 6/9] Fix CI: use Weaver installer script instead of manual tar extraction The CI build image lacks xz-utils. Use the official installer script which handles its own decompression dependencies. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 15c97380b9f..05822b17707 100644 --- a/Makefile +++ b/Makefile @@ -246,11 +246,7 @@ WEAVER_VERSION ?= 0.22.1 install-weaver: @if ! command -v weaver >/dev/null 2>&1; then \ echo "Installing weaver v$(WEAVER_VERSION)..."; \ - GOARCH=$$(go env GOARCH) && \ - if [ "$$GOARCH" = "amd64" ]; then ARCH=x86_64; else echo "Weaver not available for $$GOARCH"; exit 1; fi && \ - URL="https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-$${ARCH}-unknown-linux-gnu.tar.xz" && \ - curl -fsSL "$$URL" | xz -d | tar x --strip-components=1 -C /usr/bin weaver-$${ARCH}-unknown-linux-gnu/weaver && \ - chmod +x /usr/bin/weaver; \ + curl -fsSL "https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-installer.sh" | sh -s -- --yes --install-dir /usr/bin; \ fi telemetry-check: install-weaver From bc33558fe8aa333125e9027a753c8a6f4b9b4ea7 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Tue, 31 Mar 2026 09:38:55 -0700 Subject: [PATCH 7/9] Fix CI: use correct Weaver installer flags and PATH handling The installer script doesn't support --yes or --install-dir. Use --no-modify-path --quiet flags and configure PATH to find the installed binary in ~/.cargo/bin. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- Makefile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 05822b17707..7467d7622e1 100644 --- a/Makefile +++ b/Makefile @@ -240,21 +240,23 @@ mod-check: # Telemetry schema validation and code generation (requires weaver CLI). WEAVER_VERSION ?= 0.22.1 +WEAVER_BIN_DIR ?= $(HOME)/.cargo/bin +WEAVER := PATH="$(WEAVER_BIN_DIR):$(PATH)" weaver # Install weaver if not already available. .PHONY: install-weaver install-weaver: - @if ! command -v weaver >/dev/null 2>&1; then \ + @if ! PATH="$(WEAVER_BIN_DIR):$(PATH)" command -v weaver >/dev/null 2>&1; then \ echo "Installing weaver v$(WEAVER_VERSION)..."; \ - curl -fsSL "https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-installer.sh" | sh -s -- --yes --install-dir /usr/bin; \ + curl -fsSL "https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-installer.sh" | sh -s -- --no-modify-path --quiet; \ fi telemetry-check: install-weaver - weaver registry check -r telemetry/registry + $(WEAVER) registry check -r telemetry/registry telemetry-generate: install-weaver - weaver registry generate -r telemetry/registry -t telemetry/templates go pkg/distributor/ - weaver registry generate -r telemetry/registry -t telemetry/templates markdown docs/telemetry/ + $(WEAVER) registry generate -r telemetry/registry -t telemetry/templates go pkg/distributor/ + $(WEAVER) registry generate -r telemetry/registry -t telemetry/templates markdown docs/telemetry/ check-telemetry: telemetry-generate @git diff --exit-code -- pkg/distributor/telemetry_gen.go docs/telemetry/ || \ From d9735c4cf6b20e9f976697fc228dfa1f75433d1b Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Tue, 31 Mar 2026 10:32:59 -0700 Subject: [PATCH 8/9] Fix CI: use python3 lzma to extract Weaver binary The CI build image has python3 but not xz-utils. Use python3's built-in lzma module to decompress the .tar.xz archive. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7467d7622e1..f7e722dafbd 100644 --- a/Makefile +++ b/Makefile @@ -248,7 +248,11 @@ WEAVER := PATH="$(WEAVER_BIN_DIR):$(PATH)" weaver install-weaver: @if ! PATH="$(WEAVER_BIN_DIR):$(PATH)" command -v weaver >/dev/null 2>&1; then \ echo "Installing weaver v$(WEAVER_VERSION)..."; \ - curl -fsSL "https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-installer.sh" | sh -s -- --no-modify-path --quiet; \ + mkdir -p $(WEAVER_BIN_DIR) && \ + curl -fsSL "https://github.com/open-telemetry/weaver/releases/download/v$(WEAVER_VERSION)/weaver-x86_64-unknown-linux-gnu.tar.xz" -o /tmp/weaver.tar.xz && \ + python3 -c "import lzma,tarfile,sys; t=tarfile.open(fileobj=lzma.open('/tmp/weaver.tar.xz')); m=next(x for x in t if x.name.endswith('/weaver')); m.name='weaver'; t.extract(m,'$(WEAVER_BIN_DIR)',filter='data' if sys.version_info>=(3,12) else None); t.close()" && \ + chmod +x $(WEAVER_BIN_DIR)/weaver && \ + rm -f /tmp/weaver.tar.xz; \ fi telemetry-check: install-weaver From aec2e0f8fcec6736c3fb4d219b91c405f7945f12 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Tue, 31 Mar 2026 10:55:31 -0700 Subject: [PATCH 9/9] Fix CI: pin label ordering and upgrade to Weaver v0.22.1 Weaver v0.22.1 sorts attributes alphabetically, which would change the label order in generated []string{} slices and break existing WithLabelValues() call sites. Add explicit label ordering in params.labels to preserve the original Go label order regardless of Weaver version. Regenerate with v0.22.1. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- docs/telemetry/cortex_distributor.md | 10 ++++---- telemetry/templates/registry/go/metrics.go.j2 | 23 ++++++++++--------- telemetry/templates/registry/go/weaver.yaml | 19 +++++++++++++++ 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/docs/telemetry/cortex_distributor.md b/docs/telemetry/cortex_distributor.md index 749ccdddcc0..1ec85696d37 100644 --- a/docs/telemetry/cortex_distributor.md +++ b/docs/telemetry/cortex_distributor.md @@ -11,11 +11,11 @@ This document is auto-generated from the telemetry schema defined in | Name | Type | Unit | Description | Labels | |------|------|------|-------------|--------| -| `cortex_distributor_deduped_samples_total` | counter | {sample} | The total number of deduplicated samples. | `user`, `cluster` | +| `cortex_distributor_deduped_samples_total` | counter | {sample} | The total number of deduplicated samples. | `cluster`, `user` | | `cortex_distributor_exemplars_in_total` | counter | {exemplar} | The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars. | `user` | | `cortex_distributor_inflight_client_requests` | gauge | {request} | Current number of inflight client requests in distributor. | - | | `cortex_distributor_inflight_push_requests` | gauge | {request} | Current number of inflight push requests in distributor. | - | -| `cortex_distributor_ingester_append_failures_total` | counter | {append} | The total number of failed batch appends sent to ingesters. | `ingester`, `type`, `status` | +| `cortex_distributor_ingester_append_failures_total` | counter | {append} | The total number of failed batch appends sent to ingesters. | `ingester`, `status`, `type` | | `cortex_distributor_ingester_appends_total` | counter | {append} | The total number of batch appends sent to ingesters. | `ingester`, `type` | | `cortex_distributor_ingester_partial_data_queries_total` | counter | {query} | The total number of queries sent to ingesters that may have returned partial data. | - | | `cortex_distributor_ingester_push_timeouts_total` | counter | {timeout} | The total number of push requests to ingesters that were canceled due to timeout. | - | @@ -29,8 +29,8 @@ This document is auto-generated from the telemetry schema defined in | `cortex_distributor_query_duration_seconds` | histogram | s | Time spent executing expression and exemplar queries. | `method`, `status_code` | | `cortex_distributor_received_exemplars_total` | counter | {exemplar} | The total number of received exemplars, excluding rejected and deduped exemplars. | `user` | | `cortex_distributor_received_metadata_total` | counter | {metadata} | The total number of received metadata, excluding rejected. | `user` | -| `cortex_distributor_received_samples_per_labelset_total` | counter | {sample} | The total number of received samples per label set, excluding rejected and deduped samples. | `user`, `type`, `labelset` | -| `cortex_distributor_received_samples_total` | counter | {sample} | The total number of received samples, excluding rejected and deduped samples. | `user`, `type` | +| `cortex_distributor_received_samples_per_labelset_total` | counter | {sample} | The total number of received samples per label set, excluding rejected and deduped samples. | `labelset`, `type`, `user` | +| `cortex_distributor_received_samples_total` | counter | {sample} | The total number of received samples, excluding rejected and deduped samples. | `type`, `user` | | `cortex_distributor_replication_factor` | gauge | {factor} | The configured replication factor. | - | -| `cortex_distributor_samples_in_total` | counter | {sample} | The total number of samples that have come in to the distributor, including rejected or deduped samples. | `user`, `type` | +| `cortex_distributor_samples_in_total` | counter | {sample} | The total number of samples that have come in to the distributor, including rejected or deduped samples. | `type`, `user` | | `cortex_labels_per_sample` | histogram | {label} | Number of labels per sample. | - | \ No newline at end of file diff --git a/telemetry/templates/registry/go/metrics.go.j2 b/telemetry/templates/registry/go/metrics.go.j2 index 34b6fb50bd3..1515c294c43 100644 --- a/telemetry/templates/registry/go/metrics.go.j2 +++ b/telemetry/templates/registry/go/metrics.go.j2 @@ -28,11 +28,12 @@ type DistributorGaugeFuncs struct { {%- endfor %} } -{# Macro to render a label list like: []string{"user", "type"} #} -{%- macro label_list(attributes) -%} +{# Macro to render a label list like: []string{"user", "type"} using params.labels for ordering #} +{%- macro label_list(metric_name) -%} +{%- set lbls = params.labels[metric_name] -%} []string{{ "{" }} -{%- for attr in attributes -%} -"{{ attr.name }}"{% if not loop.last %}, {% endif %} +{%- for l in lbls -%} +"{{ l }}"{% if not loop.last %}, {% endif %} {%- endfor -%} {{ "}" }} {%- endmacro -%} @@ -67,7 +68,7 @@ func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf Di // {{ metric.brief }} {%- if metric.instrument == "counter" %} -{%- if metric.attributes %} +{%- if metric.metric_name in params.labels %} d.{{ field }} = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ {%- if ns %} Namespace: "{{ ns.namespace }}", @@ -76,7 +77,7 @@ func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf Di Name: "{{ metric.metric_name }}", {%- endif %} Help: "{{ metric.brief }}", - }, {{ label_list(metric.attributes) }}) + }, {{ label_list(metric.metric_name) }}) {%- else %} d.{{ field }} = promauto.With(reg).NewCounter(prometheus.CounterOpts{ {%- if ns %} @@ -89,7 +90,7 @@ func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf Di }) {%- endif %} {%- elif metric.instrument == "gauge" %} -{%- if metric.attributes %} +{%- if metric.metric_name in params.labels %} d.{{ field }} = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ {%- if ns %} Namespace: "{{ ns.namespace }}", @@ -98,7 +99,7 @@ func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf Di Name: "{{ metric.metric_name }}", {%- endif %} Help: "{{ metric.brief }}", - }, {{ label_list(metric.attributes) }}) + }, {{ label_list(metric.metric_name) }}) {%- else %} d.{{ field }} = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ {%- if ns %} @@ -111,7 +112,7 @@ func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf Di }) {%- endif %} {%- elif metric.instrument == "histogram" %} -{%- if metric.attributes %} +{%- if metric.metric_name in params.labels %} {%- if is_wrapped %} d.{{ field }} = instrument.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ {%- else %} @@ -128,9 +129,9 @@ func registerDistributorMetrics(d *Distributor, reg prometheus.Registerer, gf Di Buckets: {{ bucket_list(buckets) }}, {%- endif %} {%- if is_wrapped %} - }, {{ label_list(metric.attributes) }})) + }, {{ label_list(metric.metric_name) }})) {%- else %} - }, {{ label_list(metric.attributes) }}) + }, {{ label_list(metric.metric_name) }}) {%- endif %} {%- else %} d.{{ field }} = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ diff --git a/telemetry/templates/registry/go/weaver.yaml b/telemetry/templates/registry/go/weaver.yaml index 7beb6e0404d..d2c8ad4c264 100644 --- a/telemetry/templates/registry/go/weaver.yaml +++ b/telemetry/templates/registry/go/weaver.yaml @@ -58,6 +58,25 @@ params: histogram_collector_wrap: - cortex_distributor_query_duration_seconds + # Explicit label ordering for Go code generation (must match WithLabelValues call sites). + # Weaver may sort attributes alphabetically; this preserves the original Go label order. + labels: + cortex_distributor_received_samples_total: ["user", "type"] + cortex_distributor_received_samples_per_labelset_total: ["user", "type", "labelset"] + cortex_distributor_received_exemplars_total: ["user"] + cortex_distributor_received_metadata_total: ["user"] + cortex_distributor_samples_in_total: ["user", "type"] + cortex_distributor_exemplars_in_total: ["user"] + cortex_distributor_metadata_in_total: ["user"] + cortex_distributor_non_ha_samples_received_total: ["user"] + cortex_distributor_deduped_samples_total: ["user", "cluster"] + cortex_distributor_query_duration_seconds: ["method", "status_code"] + cortex_distributor_ingester_appends_total: ["ingester", "type"] + cortex_distributor_ingester_append_failures_total: ["ingester", "type", "status"] + cortex_distributor_ingester_queries_total: ["ingester"] + cortex_distributor_ingester_query_failures_total: ["ingester"] + cortex_distributor_latest_seen_sample_timestamp_seconds: ["user"] + templates: - pattern: metrics.go.j2 filter: semconv_grouped_metrics