From 92cc594be116420dcf6696f6d58665dbe4ff6708 Mon Sep 17 00:00:00 2001 From: Nikos Angelopoulos Date: Mon, 18 Nov 2024 10:06:37 +0100 Subject: [PATCH] change: replace cortex_discarded_samples_total label to sample-timestamp-too-old (#9885) * fix: change cortex_discarded_samples_total label to sample-timestamp-too-old This change was made in order to match err-mimir-sample-timestamp-too-old event logs * tests: update label value Signed-off-by: Nikos Angelopoulos * add CHANGELOG entry --------- Signed-off-by: Nikos Angelopoulos --- CHANGELOG.md | 1 + docs/proposals/reduce-multitenancy-cost.md | 2 +- pkg/ingester/ingester.go | 14 +++++++------- pkg/ingester/ingester_test.go | 22 +++++++++++----------- pkg/ingester/metrics.go | 8 ++++---- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4adf5db7f2..19f81b5f691 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * [CHANGE] Ingester: Change `-initial-delay` for circuit breakers to begin when the first request is received, rather than at breaker activation. #9842 * [CHANGE] Query-frontend: apply query pruning before query sharding instead of after. #9913 * [CHANGE] Ingester: remove experimental flags `-ingest-storage.kafka.ongoing-records-per-fetch` and `-ingest-storage.kafka.startup-records-per-fetch`. They are removed in favour of `-ingest-storage.kafka.max-buffered-bytes`. #9906 +* [CHANGE] Ingester: Replace `cortex_discarded_samples_total` label from `sample-out-of-bounds` to `sample-timestamp-too-old`. #9885 * [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #9367 #9368 #9398 #9399 #9403 #9417 #9418 #9419 #9420 #9482 #9504 #9505 #9507 #9518 #9531 #9532 #9533 #9553 #9558 #9588 #9589 #9639 #9641 #9642 #9651 #9664 #9681 #9717 #9719 #9724 #9874 * [FEATURE] Distributor: Add support for `lz4` OTLP compression. #9763 * [FEATURE] Query-frontend: added experimental configuration options `query-frontend.cache-errors` and `query-frontend.results-cache-ttl-for-errors` to allow non-transient responses to be cached. When set to `true` error responses from hitting limits or bad data are cached for a short TTL. #9028 diff --git a/docs/proposals/reduce-multitenancy-cost.md b/docs/proposals/reduce-multitenancy-cost.md index f7f826dd1d2..afeb21f5299 100644 --- a/docs/proposals/reduce-multitenancy-cost.md +++ b/docs/proposals/reduce-multitenancy-cost.md @@ -92,7 +92,7 @@ This is not tenant-related, it could be forwarded from the backend. This is not tenant-related, it could be forwarded from the backend. -#### sample-out-of-bounds +#### sample-timestamp-too-old This is not tenant-related, it could be forwarded from the backend. diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 4daf0751ae8..46a79abf4b4 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -106,7 +106,7 @@ const ( reasonSampleTooOld = "sample-too-old" reasonSampleTooFarInFuture = "sample-too-far-in-future" reasonNewValueForTimestamp = "new-value-for-timestamp" - reasonSampleOutOfBounds = "sample-out-of-bounds" + reasonSampleTimestampTooOld = "sample-timestamp-too-old" reasonPerUserSeriesLimit = "per_user_series_limit" reasonPerMetricSeriesLimit = "per_metric_series_limit" reasonInvalidNativeHistogram = "invalid-native-histogram" @@ -959,7 +959,7 @@ type pushStats struct { failedSamplesCount int succeededExemplarsCount int failedExemplarsCount int - sampleOutOfBoundsCount int + sampleTimestampTooOldCount int sampleOutOfOrderCount int sampleTooOldCount int sampleTooFarInFutureCount int @@ -1189,7 +1189,7 @@ func (i *Ingester) PushWithCleanup(ctx context.Context, req *mimirpb.WriteReques stats.failedSamplesCount++ }, func(timestamp int64, labels []mimirpb.LabelAdapter) { - stats.sampleOutOfBoundsCount++ + stats.sampleTimestampTooOldCount++ updateFirstPartial(i.errorSamplers.sampleTimestampTooOld, func() softError { return newSampleTimestampTooOldError(model.Time(timestamp), labels) }) @@ -1336,8 +1336,8 @@ func (i *Ingester) PushWithCleanup(ctx context.Context, req *mimirpb.WriteReques } func (i *Ingester) updateMetricsFromPushStats(userID string, group string, stats *pushStats, samplesSource mimirpb.WriteRequest_SourceEnum, db *userTSDB, discarded *discardedMetrics) { - if stats.sampleOutOfBoundsCount > 0 { - discarded.sampleOutOfBounds.WithLabelValues(userID, group).Add(float64(stats.sampleOutOfBoundsCount)) + if stats.sampleTimestampTooOldCount > 0 { + discarded.sampleTimestampTooOld.WithLabelValues(userID, group).Add(float64(stats.sampleTimestampTooOldCount)) } if stats.sampleOutOfOrderCount > 0 { discarded.sampleOutOfOrder.WithLabelValues(userID, group).Add(float64(stats.sampleOutOfOrderCount)) @@ -1405,7 +1405,7 @@ func (i *Ingester) pushSamplesToAppender(userID string, timeseries []mimirpb.Pre allOutOfBoundsHistograms(ts.Histograms, minAppendTime) { stats.failedSamplesCount += len(ts.Samples) + len(ts.Histograms) - stats.sampleOutOfBoundsCount += len(ts.Samples) + len(ts.Histograms) + stats.sampleTimestampTooOldCount += len(ts.Samples) + len(ts.Histograms) var firstTimestamp int64 if len(ts.Samples) > 0 { @@ -1426,7 +1426,7 @@ func (i *Ingester) pushSamplesToAppender(userID string, timeseries []mimirpb.Pre len(ts.Samples) > 0 && allOutOfBoundsFloats(ts.Samples, minAppendTime) { stats.failedSamplesCount += len(ts.Samples) - stats.sampleOutOfBoundsCount += len(ts.Samples) + stats.sampleTimestampTooOldCount += len(ts.Samples) firstTimestamp := ts.Samples[0].TimestampMs diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index bdf76abbb4b..fea6b13f764 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -2465,7 +2465,7 @@ func TestIngester_Push(t *testing.T) { cortex_ingester_memory_series_removed_total{user="test"} 0 # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="test"} 2 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="test"} 2 # HELP cortex_ingester_active_series Number of currently active series per user. # TYPE cortex_ingester_active_series gauge cortex_ingester_active_series{user="test"} 1 @@ -2524,7 +2524,7 @@ func TestIngester_Push(t *testing.T) { cortex_ingester_memory_series_removed_total{user="test"} 0 # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="test"} 3 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="test"} 3 # HELP cortex_ingester_active_series Number of currently active series per user. # TYPE cortex_ingester_active_series gauge cortex_ingester_active_series{user="test"} 1 @@ -2643,7 +2643,7 @@ func TestIngester_Push(t *testing.T) { cortex_ingester_memory_series_removed_total{user="test"} 0 # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="test"} 2 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="test"} 2 # HELP cortex_ingester_active_series Number of currently active series per user. # TYPE cortex_ingester_active_series gauge cortex_ingester_active_series{user="test"} 1 @@ -10611,8 +10611,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) { expectedMetrics: ` # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 8 - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 2 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 8 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 2 `, }, "should soft fail on all histograms out of bound in a write request": { @@ -10644,8 +10644,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) { expectedMetrics: ` # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 4 - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 1 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 4 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 1 `, nativeHistograms: true, }, @@ -10679,8 +10679,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) { expectedMetrics: ` # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 12 - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 3 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 12 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 3 `, nativeHistograms: true, }, @@ -10716,8 +10716,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) { expectedMetrics: ` # HELP cortex_discarded_samples_total The total number of samples that were discarded. # TYPE cortex_discarded_samples_total counter - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 8 - cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 2 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 8 + cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 2 `, }, "should soft fail on some samples with timestamp too far in future in a write request": { diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 833a802fb04..38e53d3c090 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -420,7 +420,7 @@ func (m *ingesterMetrics) deletePerUserCustomTrackerMetrics(userID string, custo } type discardedMetrics struct { - sampleOutOfBounds *prometheus.CounterVec + sampleTimestampTooOld *prometheus.CounterVec sampleOutOfOrder *prometheus.CounterVec sampleTooOld *prometheus.CounterVec sampleTooFarInFuture *prometheus.CounterVec @@ -432,7 +432,7 @@ type discardedMetrics struct { func newDiscardedMetrics(r prometheus.Registerer) *discardedMetrics { return &discardedMetrics{ - sampleOutOfBounds: validation.DiscardedSamplesCounter(r, reasonSampleOutOfBounds), + sampleTimestampTooOld: validation.DiscardedSamplesCounter(r, reasonSampleTimestampTooOld), sampleOutOfOrder: validation.DiscardedSamplesCounter(r, reasonSampleOutOfOrder), sampleTooOld: validation.DiscardedSamplesCounter(r, reasonSampleTooOld), sampleTooFarInFuture: validation.DiscardedSamplesCounter(r, reasonSampleTooFarInFuture), @@ -444,7 +444,7 @@ func newDiscardedMetrics(r prometheus.Registerer) *discardedMetrics { } func (m *discardedMetrics) DeletePartialMatch(filter prometheus.Labels) { - m.sampleOutOfBounds.DeletePartialMatch(filter) + m.sampleTimestampTooOld.DeletePartialMatch(filter) m.sampleOutOfOrder.DeletePartialMatch(filter) m.sampleTooOld.DeletePartialMatch(filter) m.sampleTooFarInFuture.DeletePartialMatch(filter) @@ -455,7 +455,7 @@ func (m *discardedMetrics) DeletePartialMatch(filter prometheus.Labels) { } func (m *discardedMetrics) DeleteLabelValues(userID string, group string) { - m.sampleOutOfBounds.DeleteLabelValues(userID, group) + m.sampleTimestampTooOld.DeleteLabelValues(userID, group) m.sampleOutOfOrder.DeleteLabelValues(userID, group) m.sampleTooOld.DeleteLabelValues(userID, group) m.sampleTooFarInFuture.DeleteLabelValues(userID, group)