From de5cb003a82da618eceefd337186f8c0dd1376bf Mon Sep 17 00:00:00 2001
From: Javier Molina Reyes <javiermolinar@live.com>
Date: Fri, 25 Oct 2024 21:48:55 +0200
Subject: [PATCH] fix: skip exemplars for instant queries (#4204)

Currently, we are computing the exemplars for query_range and  instant queries. Since exemplars are not supported for instant queries (same as Prometheus) we can save that computing time.

It also fixes a bug where the exemplars query param was not being honored

Co-authored-by: Martin Disibio <mdisibio@gmail.com>
---
 CHANGELOG.md                                  |  2 +-
 docs/sources/tempo/api_docs/_index.md         |  4 +-
 docs/sources/tempo/configuration/_index.md    |  4 ++
 docs/sources/tempo/traceql/metrics-queries.md |  4 +-
 integration/e2e/config-query-range.yaml       |  2 -
 modules/frontend/config.go                    |  1 -
 modules/frontend/frontend.go                  | 15 +++++-
 .../frontend/metrics_query_range_sharder.go   | 52 ++++++++++++-------
 8 files changed, 56 insertions(+), 28 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e58871a3e44..e355f171fc9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,7 +46,7 @@
 * [BUGFIX] Correctly handle 400 Bad Request and 404 Not Found in gRPC streaming [#4144](https://github.com/grafana/tempo/pull/4144) (@mapno)
 * [BUGFIX] Pushes a 0 to classic histogram's counter when the series is new to allow Prometheus to start from a non-null value. [#4140](https://github.com/grafana/tempo/pull/4140) (@mapno)
 * [BUGFIX] Fix counter samples being downsampled by backdate to the previous minute the initial sample when the series is new [#44236](https://github.com/grafana/tempo/pull/4236) (@javiermolinar)
-
+* [BUGFIX] Skip computing exemplars for instant queries. [#4204](https://github.com/grafana/tempo/pull/4204) (@javiermolinar)
 # v2.6.1
 
 * [CHANGE] **BREAKING CHANGE** tempo-query is no longer a Jaeger instance with grpcPlugin. It's now a standalone server. Serving a gRPC API for Jaeger on `0.0.0.0:7777` by default. [#3840](https://github.com/grafana/tempo/issues/3840) (@frzifus)
diff --git a/docs/sources/tempo/api_docs/_index.md b/docs/sources/tempo/api_docs/_index.md
index 4d85b0503ae..f601400aae6 100644
--- a/docs/sources/tempo/api_docs/_index.md
+++ b/docs/sources/tempo/api_docs/_index.md
@@ -598,6 +598,8 @@ Parameters:
   Optional. Can be used instead of `start` and `end` to define the time range in relative values. For example `since=15m` will query the last 15 minutes.  Default is last 1 hour.
 - `step = (duration string)`
   Optional. Defines the granularity of the returned time-series. For example `step=15s` will return a data point every 15s within the time range.  If not specified then the default behavior will choose a dynamic step based on the time range.
+- `exemplars = (integer)`
+  Optional. Defines the maximun number of exemplars for the query. It will be trimmed to max_exemplars if exceed it.
 
 The API is available in the query frontend service in
 a microservices deployment, or the Tempo endpoint in a monolithic mode deployment.
@@ -609,7 +611,7 @@ Actual API parameters must be url-encoded. This example is left unencoded for re
 {{% /admonition %}}
 
 ```
-GET /api/metrics/query_range?q={resource.service.name="myservice"}|rate()&since=3h&step=1m
+GET /api/metrics/query_range?q={resource.service.name="myservice"} | min_over_time() with(exemplars=true) &since=3h&step=1m&exemplars=100
 ```
 
 #### Instant
diff --git a/docs/sources/tempo/configuration/_index.md b/docs/sources/tempo/configuration/_index.md
index e8141fd68d8..3e48575f5e6 100644
--- a/docs/sources/tempo/configuration/_index.md
+++ b/docs/sources/tempo/configuration/_index.md
@@ -678,6 +678,9 @@ query_frontend:
         # 0 disables this limit.
         [max_duration: <duration> | default = 3h ]
 
+        # Maximun number of exemplars per range query. Limited to 100.
+        [max_exemplars: <int> | default = 100 ]
+    
         # query_backend_after controls where the query-frontend searches for traces.
         # Time ranges older than query_backend_after will be searched in the backend/object storage only.
         # Time ranges between query_backend_after and now will be queried from the metrics-generators.
@@ -694,6 +697,7 @@ query_frontend:
         # If set to a non-zero value, it's value will be used to decide if query is within SLO or not.
         # Query is within SLO if it returned 200 within duration_slo seconds OR processed throughput_slo bytes/s data.
         [throughput_bytes_slo: <float> | default = 0 ]
+
 ```
 
 ## Querier
diff --git a/docs/sources/tempo/traceql/metrics-queries.md b/docs/sources/tempo/traceql/metrics-queries.md
index a0ed0bad17e..b3fac1e0642 100644
--- a/docs/sources/tempo/traceql/metrics-queries.md
+++ b/docs/sources/tempo/traceql/metrics-queries.md
@@ -37,8 +37,8 @@ Exemplars are a powerful feature of TraceQL metrics.
 They allow you to see an exact trace that contributed to a given metric value.
 This is particularly useful when you want to understand why a given metric is high or low.
 
-Exemplars are available in TraceQL metrics for all functions.
-To get exemplars, you need to configure it in the query-frontend with the parameter `query_frontend.metrics.exemplars`,
+Exemplars are available in TraceQL metrics for all range queries.
+To get exemplars, you need to configure it in the query-frontend with the parameter `query_frontend.metrics.max_exemplars`,
 or pass a query hint in your query.
 
 ```
diff --git a/integration/e2e/config-query-range.yaml b/integration/e2e/config-query-range.yaml
index d17f80a77cf..faa5e5be6e0 100644
--- a/integration/e2e/config-query-range.yaml
+++ b/integration/e2e/config-query-range.yaml
@@ -9,8 +9,6 @@ query_frontend:
   search:
     query_backend_after: 0 # setting these both to 0 will force all range searches to hit the backend
     query_ingesters_until: 0
-  metrics:
-    exemplars: true
 
 distributor:
   receivers:
diff --git a/modules/frontend/config.go b/modules/frontend/config.go
index 655d876e856..0ba194df4b2 100644
--- a/modules/frontend/config.go
+++ b/modules/frontend/config.go
@@ -93,7 +93,6 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(string, *flag.FlagSet) {
 			ConcurrentRequests:    defaultConcurrentRequests,
 			TargetBytesPerRequest: defaultTargetBytesPerRequest,
 			Interval:              5 * time.Minute,
-			Exemplars:             false, // TODO: Remove?
 			MaxExemplars:          100,
 		},
 		SLO: slo,
diff --git a/modules/frontend/frontend.go b/modules/frontend/frontend.go
index e4459edbdaf..d4f73276f38 100644
--- a/modules/frontend/frontend.go
+++ b/modules/frontend/frontend.go
@@ -156,7 +156,18 @@ func New(cfg Config, next pipeline.RoundTripper, o overrides.Interface, reader t
 			queryValidatorWare,
 			pipeline.NewWeightRequestWare(pipeline.TraceQLMetrics, cfg.Weights),
 			multiTenantMiddleware(cfg, logger),
-			newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, logger),
+			newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, false, logger),
+		},
+		[]pipeline.Middleware{cacheWare, statusCodeWare, retryWare},
+		next)
+
+	queryInstantPipeline := pipeline.Build(
+		[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
+			urlDenyListWare,
+			queryValidatorWare,
+			pipeline.NewWeightRequestWare(pipeline.TraceQLMetrics, cfg.Weights),
+			multiTenantMiddleware(cfg, logger),
+			newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, true, logger),
 		},
 		[]pipeline.Middleware{cacheWare, statusCodeWare, retryWare},
 		next)
@@ -169,7 +180,7 @@ func New(cfg Config, next pipeline.RoundTripper, o overrides.Interface, reader t
 	searchTagValues := newTagValuesHTTPHandler(cfg, searchTagValuesPipeline, o, logger)
 	searchTagValuesV2 := newTagValuesV2HTTPHandler(cfg, searchTagValuesPipeline, o, logger)
 	metrics := newMetricsSummaryHandler(metricsPipeline, logger)
-	queryInstant := newMetricsQueryInstantHTTPHandler(cfg, queryRangePipeline, logger) // Reuses the same pipeline
+	queryInstant := newMetricsQueryInstantHTTPHandler(cfg, queryInstantPipeline, logger) // Reuses the same pipeline
 	queryRange := newMetricsQueryRangeHTTPHandler(cfg, queryRangePipeline, logger)
 
 	return &QueryFrontend{
diff --git a/modules/frontend/metrics_query_range_sharder.go b/modules/frontend/metrics_query_range_sharder.go
index eb157c5356f..76c66500de4 100644
--- a/modules/frontend/metrics_query_range_sharder.go
+++ b/modules/frontend/metrics_query_range_sharder.go
@@ -26,11 +26,12 @@ import (
 )
 
 type queryRangeSharder struct {
-	next      pipeline.AsyncRoundTripper[combiner.PipelineResponse]
-	reader    tempodb.Reader
-	overrides overrides.Interface
-	cfg       QueryRangeSharderConfig
-	logger    log.Logger
+	next        pipeline.AsyncRoundTripper[combiner.PipelineResponse]
+	reader      tempodb.Reader
+	overrides   overrides.Interface
+	cfg         QueryRangeSharderConfig
+	logger      log.Logger
+	instantMode bool
 }
 
 type QueryRangeSharderConfig struct {
@@ -39,28 +40,32 @@ type QueryRangeSharderConfig struct {
 	MaxDuration           time.Duration `yaml:"max_duration"`
 	QueryBackendAfter     time.Duration `yaml:"query_backend_after,omitempty"`
 	Interval              time.Duration `yaml:"interval,omitempty"`
-	Exemplars             bool          `yaml:"exemplars,omitempty"`
 	MaxExemplars          int           `yaml:"max_exemplars,omitempty"`
 }
 
 // newAsyncQueryRangeSharder creates a sharding middleware for search
-func newAsyncQueryRangeSharder(reader tempodb.Reader, o overrides.Interface, cfg QueryRangeSharderConfig, logger log.Logger) pipeline.AsyncMiddleware[combiner.PipelineResponse] {
+func newAsyncQueryRangeSharder(reader tempodb.Reader, o overrides.Interface, cfg QueryRangeSharderConfig, instantMode bool, logger log.Logger) pipeline.AsyncMiddleware[combiner.PipelineResponse] {
 	return pipeline.AsyncMiddlewareFunc[combiner.PipelineResponse](func(next pipeline.AsyncRoundTripper[combiner.PipelineResponse]) pipeline.AsyncRoundTripper[combiner.PipelineResponse] {
 		return queryRangeSharder{
-			next:      next,
-			reader:    reader,
-			overrides: o,
-
-			cfg:    cfg,
-			logger: logger,
+			next:        next,
+			reader:      reader,
+			overrides:   o,
+			instantMode: instantMode,
+			cfg:         cfg,
+			logger:      logger,
 		}
 	})
 }
 
 func (s queryRangeSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline.Responses[combiner.PipelineResponse], error) {
 	r := pipelineRequest.HTTPRequest()
+	spanName := "frontend.QueryRangeSharder.range"
+
+	if s.instantMode {
+		spanName = "frontend.QueryRangeSharder.instant"
+	}
 
-	ctx, span := tracer.Start(r.Context(), "frontend.QueryRangeSharder")
+	ctx, span := tracer.Start(r.Context(), spanName)
 	defer span.End()
 
 	req, err := api.ParseQueryRangeRequest(r)
@@ -92,6 +97,16 @@ func (s queryRangeSharder) RoundTrip(pipelineRequest pipeline.Request) (pipeline
 		return pipeline.NewBadRequest(err), nil
 	}
 
+	var maxExemplars uint32
+	// Instant queries must not compute exemplars
+	if !s.instantMode && s.cfg.MaxExemplars > 0 {
+		maxExemplars = req.Exemplars
+		if maxExemplars == 0 || maxExemplars > uint32(s.cfg.MaxExemplars) {
+			maxExemplars = uint32(s.cfg.MaxExemplars) // Enforce configuration
+		}
+	}
+	req.Exemplars = maxExemplars
+
 	var (
 		allowUnsafe           = s.overrides.UnsafeQueryHints(tenantID)
 		targetBytesPerRequest = s.jobSize(expr, allowUnsafe)
@@ -150,11 +165,11 @@ func (s *queryRangeSharder) blockMetas(start, end int64, tenantID string) []*bac
 	return metas
 }
 
-func (s *queryRangeSharder) exemplarsPerShard(total uint32) uint32 {
-	if !s.cfg.Exemplars {
+func (s *queryRangeSharder) exemplarsPerShard(total uint32, exemplars uint32) uint32 {
+	if exemplars == 0 {
 		return 0
 	}
-	return uint32(math.Ceil(float64(s.cfg.MaxExemplars)*1.2)) / total
+	return uint32(math.Ceil(float64(exemplars)*1.2)) / total
 }
 
 func (s *queryRangeSharder) backendRequests(ctx context.Context, tenantID string, parent pipeline.Request, searchReq tempopb.QueryRangeRequest, cutoff time.Time, targetBytesPerRequest int, reqCh chan pipeline.Request) (totalJobs, totalBlocks uint32, totalBlockBytes uint64) {
@@ -209,7 +224,7 @@ func (s *queryRangeSharder) buildBackendRequests(ctx context.Context, tenantID s
 	queryHash := hashForQueryRangeRequest(&searchReq)
 	colsToJSON := api.NewDedicatedColumnsToJSON()
 
-	exemplarsPerBlock := s.exemplarsPerShard(uint32(len(metas)))
+	exemplarsPerBlock := s.exemplarsPerShard(uint32(len(metas)), searchReq.Exemplars)
 	for _, m := range metas {
 		if m.EndTime.Before(m.StartTime) {
 			// Ignore blocks with bad timings from debugging
@@ -299,7 +314,6 @@ func (s *queryRangeSharder) generatorRequest(ctx context.Context, tenantID strin
 	}
 
 	searchReq.QueryMode = querier.QueryModeRecent
-	searchReq.Exemplars = uint32(s.cfg.MaxExemplars) // TODO: Review this
 
 	subR := parent.HTTPRequest().Clone(ctx)
 	subR = api.BuildQueryRangeRequest(subR, &searchReq, "") // dedicated cols are never passed to the generators