Skip to content

Commit

Permalink
change: replace cortex_discarded_samples_total label to sample-timest…
Browse files Browse the repository at this point in the history
…amp-too-old (#9885)

* fix: change cortex_discarded_samples_total label to sample-timestamp-too-old

This change was made in order to match err-mimir-sample-timestamp-too-old event logs

* tests: update label value

Signed-off-by: Nikos Angelopoulos <[email protected]>

* add CHANGELOG entry

---------

Signed-off-by: Nikos Angelopoulos <[email protected]>
  • Loading branch information
NickAnge authored Nov 18, 2024
1 parent 6f668ee commit 92cc594
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* [CHANGE] Ingester: Change `-initial-delay` for circuit breakers to begin when the first request is received, rather than at breaker activation. #9842
* [CHANGE] Query-frontend: apply query pruning before query sharding instead of after. #9913
* [CHANGE] Ingester: remove experimental flags `-ingest-storage.kafka.ongoing-records-per-fetch` and `-ingest-storage.kafka.startup-records-per-fetch`. They are removed in favour of `-ingest-storage.kafka.max-buffered-bytes`. #9906
* [CHANGE] Ingester: Replace `cortex_discarded_samples_total` label from `sample-out-of-bounds` to `sample-timestamp-too-old`. #9885
* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.query-engine=mimir`. #9367 #9368 #9398 #9399 #9403 #9417 #9418 #9419 #9420 #9482 #9504 #9505 #9507 #9518 #9531 #9532 #9533 #9553 #9558 #9588 #9589 #9639 #9641 #9642 #9651 #9664 #9681 #9717 #9719 #9724 #9874
* [FEATURE] Distributor: Add support for `lz4` OTLP compression. #9763
* [FEATURE] Query-frontend: added experimental configuration options `query-frontend.cache-errors` and `query-frontend.results-cache-ttl-for-errors` to allow non-transient responses to be cached. When set to `true` error responses from hitting limits or bad data are cached for a short TTL. #9028
Expand Down
2 changes: 1 addition & 1 deletion docs/proposals/reduce-multitenancy-cost.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ This is not tenant-related, it could be forwarded from the backend.

This is not tenant-related, it could be forwarded from the backend.

#### sample-out-of-bounds
#### sample-timestamp-too-old

This is not tenant-related, it could be forwarded from the backend.

Expand Down
14 changes: 7 additions & 7 deletions pkg/ingester/ingester.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ const (
reasonSampleTooOld = "sample-too-old"
reasonSampleTooFarInFuture = "sample-too-far-in-future"
reasonNewValueForTimestamp = "new-value-for-timestamp"
reasonSampleOutOfBounds = "sample-out-of-bounds"
reasonSampleTimestampTooOld = "sample-timestamp-too-old"
reasonPerUserSeriesLimit = "per_user_series_limit"
reasonPerMetricSeriesLimit = "per_metric_series_limit"
reasonInvalidNativeHistogram = "invalid-native-histogram"
Expand Down Expand Up @@ -959,7 +959,7 @@ type pushStats struct {
failedSamplesCount int
succeededExemplarsCount int
failedExemplarsCount int
sampleOutOfBoundsCount int
sampleTimestampTooOldCount int
sampleOutOfOrderCount int
sampleTooOldCount int
sampleTooFarInFutureCount int
Expand Down Expand Up @@ -1189,7 +1189,7 @@ func (i *Ingester) PushWithCleanup(ctx context.Context, req *mimirpb.WriteReques
stats.failedSamplesCount++
},
func(timestamp int64, labels []mimirpb.LabelAdapter) {
stats.sampleOutOfBoundsCount++
stats.sampleTimestampTooOldCount++
updateFirstPartial(i.errorSamplers.sampleTimestampTooOld, func() softError {
return newSampleTimestampTooOldError(model.Time(timestamp), labels)
})
Expand Down Expand Up @@ -1336,8 +1336,8 @@ func (i *Ingester) PushWithCleanup(ctx context.Context, req *mimirpb.WriteReques
}

func (i *Ingester) updateMetricsFromPushStats(userID string, group string, stats *pushStats, samplesSource mimirpb.WriteRequest_SourceEnum, db *userTSDB, discarded *discardedMetrics) {
if stats.sampleOutOfBoundsCount > 0 {
discarded.sampleOutOfBounds.WithLabelValues(userID, group).Add(float64(stats.sampleOutOfBoundsCount))
if stats.sampleTimestampTooOldCount > 0 {
discarded.sampleTimestampTooOld.WithLabelValues(userID, group).Add(float64(stats.sampleTimestampTooOldCount))
}
if stats.sampleOutOfOrderCount > 0 {
discarded.sampleOutOfOrder.WithLabelValues(userID, group).Add(float64(stats.sampleOutOfOrderCount))
Expand Down Expand Up @@ -1405,7 +1405,7 @@ func (i *Ingester) pushSamplesToAppender(userID string, timeseries []mimirpb.Pre
allOutOfBoundsHistograms(ts.Histograms, minAppendTime) {

stats.failedSamplesCount += len(ts.Samples) + len(ts.Histograms)
stats.sampleOutOfBoundsCount += len(ts.Samples) + len(ts.Histograms)
stats.sampleTimestampTooOldCount += len(ts.Samples) + len(ts.Histograms)

var firstTimestamp int64
if len(ts.Samples) > 0 {
Expand All @@ -1426,7 +1426,7 @@ func (i *Ingester) pushSamplesToAppender(userID string, timeseries []mimirpb.Pre
len(ts.Samples) > 0 && allOutOfBoundsFloats(ts.Samples, minAppendTime) {

stats.failedSamplesCount += len(ts.Samples)
stats.sampleOutOfBoundsCount += len(ts.Samples)
stats.sampleTimestampTooOldCount += len(ts.Samples)

firstTimestamp := ts.Samples[0].TimestampMs

Expand Down
22 changes: 11 additions & 11 deletions pkg/ingester/ingester_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2465,7 +2465,7 @@ func TestIngester_Push(t *testing.T) {
cortex_ingester_memory_series_removed_total{user="test"} 0
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="test"} 2
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="test"} 2
# HELP cortex_ingester_active_series Number of currently active series per user.
# TYPE cortex_ingester_active_series gauge
cortex_ingester_active_series{user="test"} 1
Expand Down Expand Up @@ -2524,7 +2524,7 @@ func TestIngester_Push(t *testing.T) {
cortex_ingester_memory_series_removed_total{user="test"} 0
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="test"} 3
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="test"} 3
# HELP cortex_ingester_active_series Number of currently active series per user.
# TYPE cortex_ingester_active_series gauge
cortex_ingester_active_series{user="test"} 1
Expand Down Expand Up @@ -2643,7 +2643,7 @@ func TestIngester_Push(t *testing.T) {
cortex_ingester_memory_series_removed_total{user="test"} 0
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="test"} 2
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="test"} 2
# HELP cortex_ingester_active_series Number of currently active series per user.
# TYPE cortex_ingester_active_series gauge
cortex_ingester_active_series{user="test"} 1
Expand Down Expand Up @@ -10611,8 +10611,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) {
expectedMetrics: `
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 8
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 2
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 8
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 2
`,
},
"should soft fail on all histograms out of bound in a write request": {
Expand Down Expand Up @@ -10644,8 +10644,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) {
expectedMetrics: `
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 4
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 1
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 4
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 1
`,
nativeHistograms: true,
},
Expand Down Expand Up @@ -10679,8 +10679,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) {
expectedMetrics: `
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 12
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 3
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 12
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 3
`,
nativeHistograms: true,
},
Expand Down Expand Up @@ -10716,8 +10716,8 @@ func TestIngester_PushWithSampledErrors(t *testing.T) {
expectedMetrics: `
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
# TYPE cortex_discarded_samples_total counter
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-1"} 8
cortex_discarded_samples_total{group="",reason="sample-out-of-bounds",user="user-2"} 2
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-1"} 8
cortex_discarded_samples_total{group="",reason="sample-timestamp-too-old",user="user-2"} 2
`,
},
"should soft fail on some samples with timestamp too far in future in a write request": {
Expand Down
8 changes: 4 additions & 4 deletions pkg/ingester/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ func (m *ingesterMetrics) deletePerUserCustomTrackerMetrics(userID string, custo
}

type discardedMetrics struct {
sampleOutOfBounds *prometheus.CounterVec
sampleTimestampTooOld *prometheus.CounterVec
sampleOutOfOrder *prometheus.CounterVec
sampleTooOld *prometheus.CounterVec
sampleTooFarInFuture *prometheus.CounterVec
Expand All @@ -432,7 +432,7 @@ type discardedMetrics struct {

func newDiscardedMetrics(r prometheus.Registerer) *discardedMetrics {
return &discardedMetrics{
sampleOutOfBounds: validation.DiscardedSamplesCounter(r, reasonSampleOutOfBounds),
sampleTimestampTooOld: validation.DiscardedSamplesCounter(r, reasonSampleTimestampTooOld),
sampleOutOfOrder: validation.DiscardedSamplesCounter(r, reasonSampleOutOfOrder),
sampleTooOld: validation.DiscardedSamplesCounter(r, reasonSampleTooOld),
sampleTooFarInFuture: validation.DiscardedSamplesCounter(r, reasonSampleTooFarInFuture),
Expand All @@ -444,7 +444,7 @@ func newDiscardedMetrics(r prometheus.Registerer) *discardedMetrics {
}

func (m *discardedMetrics) DeletePartialMatch(filter prometheus.Labels) {
m.sampleOutOfBounds.DeletePartialMatch(filter)
m.sampleTimestampTooOld.DeletePartialMatch(filter)
m.sampleOutOfOrder.DeletePartialMatch(filter)
m.sampleTooOld.DeletePartialMatch(filter)
m.sampleTooFarInFuture.DeletePartialMatch(filter)
Expand All @@ -455,7 +455,7 @@ func (m *discardedMetrics) DeletePartialMatch(filter prometheus.Labels) {
}

func (m *discardedMetrics) DeleteLabelValues(userID string, group string) {
m.sampleOutOfBounds.DeleteLabelValues(userID, group)
m.sampleTimestampTooOld.DeleteLabelValues(userID, group)
m.sampleOutOfOrder.DeleteLabelValues(userID, group)
m.sampleTooOld.DeleteLabelValues(userID, group)
m.sampleTooFarInFuture.DeleteLabelValues(userID, group)
Expand Down

0 comments on commit 92cc594

Please sign in to comment.