Skip to content

Commit

Permalink
Fixed mismatched schema regarding fixedVectorLen (#1866)
Browse files Browse the repository at this point in the history
* fix(query) Fixed mismatched schema regarding fixedVectorLen. (#1855)

* Fixed mismatched schema regarding fixedVectorLen.
* Do not compare against colIds on schema match.
---------

Co-authored-by: Yu Zhang <[email protected]>
(cherry picked from commit 7169e1c)

* bump up version for filodb bug fix.

---------

Co-authored-by: Yu Zhang <[email protected]>
  • Loading branch information
yu-shipit and Yu Zhang authored Oct 7, 2024
1 parent daa0136 commit 8eaa7af
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ final case class LocalPartitionReduceAggregateExec(queryContext: QueryContext,
override def reduceSchemas(r1: ResultSchema, r2: ResultSchema): ResultSchema = {
if (r1.isEmpty) r2
else if (r2.isEmpty) r1
else if (r1 != r2) {
else if (r1 == r2 && (!r1.hasSameColumnsAs(r2)) || r1.fixedVectorLen != r2.fixedVectorLen) {
throw SchemaMismatch(r1.toString, r2.toString, getClass.getSimpleName)
} else r1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ abstract class CountRowAggregator extends RowAggregator {
}
def present(aggRangeVector: RangeVector, limit: Int,
rangeParams: RangeParams, queryStats: QueryStats): Seq[RangeVector] = Seq(aggRangeVector)
def reductionSchema(source: ResultSchema): ResultSchema = schema
def presentationSchema(reductionSchema: ResultSchema): ResultSchema = reductionSchema
def reductionSchema(source: ResultSchema): ResultSchema = schema.copy(fixedVectorLen = source.fixedVectorLen)
def presentationSchema(reductionSchema: ResultSchema): ResultSchema = reductionSchema.copy(
fixedVectorLen = reductionSchema.fixedVectorLen)
}

object CountRowAggregator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,12 @@ class CountValuesRowAggregator(label: String, limit: Int = 1000) extends RowAggr
val cols = new Array[ColumnInfo](2)
cols(0) = source.columns(0)
cols(1) = ColumnInfo("map", ColumnType.StringColumn)
ResultSchema(cols, 1)
ResultSchema(cols, 1, fixedVectorLen = source.fixedVectorLen)
}

def presentationSchema(reductionSchema: ResultSchema): ResultSchema = {
ResultSchema(Array(reductionSchema.columns.head, ColumnInfo("value", ColumnType.DoubleColumn)), 1)
ResultSchema(Array(reductionSchema.columns.head, ColumnInfo("value", ColumnType.DoubleColumn)), 1,
fixedVectorLen = reductionSchema.fixedVectorLen)
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,11 @@ class QuantileRowAggregator(q: Double) extends RowAggregator {
cols(0) = source.columns(0)
// TODO need a first class blob column
cols(1) = ColumnInfo("tdig", ColumnType.StringColumn)
ResultSchema(cols, 1)
ResultSchema(cols, 1, fixedVectorLen = source.fixedVectorLen)
}

def presentationSchema(reductionSchema: ResultSchema): ResultSchema = {
ResultSchema(Array(reductionSchema.columns(0), ColumnInfo("value", ColumnType.DoubleColumn)), 1)
ResultSchema(Array(reductionSchema.columns(0), ColumnInfo("value", ColumnType.DoubleColumn)), 1,
fixedVectorLen = reductionSchema.fixedVectorLen)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,11 @@ class TopBottomKRowAggregator(k: Int, bottomK: Boolean) extends RowAggregator wi
cols(i + 1) = ColumnInfo(s"top${(i + 1)/2}-Val", ColumnType.DoubleColumn)
i += 2
}
ResultSchema(cols, 1)
ResultSchema(cols, 1, fixedVectorLen = source.fixedVectorLen)
}

def presentationSchema(reductionSchema: ResultSchema): ResultSchema = {
ResultSchema(Array(reductionSchema.columns(0), ColumnInfo("value", ColumnType.DoubleColumn)), 1)
ResultSchema(Array(reductionSchema.columns(0), ColumnInfo("value", ColumnType.DoubleColumn)), 1,
fixedVectorLen = reductionSchema.fixedVectorLen)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
package filodb.query.exec

import com.typesafe.config.ConfigFactory
import filodb.core.binaryrecord2.RecordBuilder
import filodb.core.memstore.{FixedMaxPartitionsEvictionPolicy, SomeData, TimeSeriesMemStore}
import filodb.core.metadata.Column.ColumnType.{DoubleColumn, TimestampColumn}
import filodb.core.metadata.Schemas
import filodb.core.query._
import filodb.core.store.{AllChunkScan, ChunkSource, InMemoryMetaStore, NullColumnStore, TimeRangeChunkScan}
import filodb.core.{DatasetRef, GlobalConfig, TestData}
import filodb.memory.MemFactory
import filodb.memory.format.{SeqRowReader, ZeroCopyUTF8String}
import filodb.query._
import monix.eval.Task
import monix.execution.Scheduler
import monix.execution.Scheduler.Implicits.global
import monix.reactive.Observable
import org.scalatest.BeforeAndAfterAll
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers
import org.scalatest.time.{Millis, Seconds, Span}

import scala.concurrent.duration._

object LocalPartitionDistConcatExecSpec {
val config = ConfigFactory.load("application_test.conf").getConfig("filodb")
val queryConfig = QueryConfig(config.getConfig("query"))
val querySession = QuerySession(QueryContext(), queryConfig)
val dummyDispatcher = new PlanDispatcher {
override def dispatch(plan: ExecPlanWithClientParams, source: ChunkSource)
(implicit sched: Scheduler): Task[QueryResponse] = {
plan.execPlan.execute(source, querySession)(global)
}

override def clusterName: String = ???

override def isLocalCall: Boolean = ???

override def dispatchStreaming(plan: ExecPlanWithClientParams,
source: ChunkSource)(implicit sched: Scheduler): Observable[StreamQueryResponse] = ???
}

val dsRef = DatasetRef("raw-metrics")
val dummyPlan = MultiSchemaPartitionsExec(QueryContext(), dummyDispatcher, dsRef, 0, Nil, AllChunkScan, "_metric_")

val builder = new RecordBuilder(MemFactory.onHeapFactory)
}


class LocalPartitionDistConcatExecSpec extends AnyFunSpec with Matchers with ScalaFutures with BeforeAndAfterAll {

import LocalPartitionDistConcatExecSpec._
import Schemas.promCounter
import ZeroCopyUTF8String._
import filodb.core.{MachineMetricsData => MMD}

implicit val defaultPatience = PatienceConfig(timeout = Span(30, Seconds), interval = Span(250, Millis))

val policy = new FixedMaxPartitionsEvictionPolicy(20)
val memStore = new TimeSeriesMemStore(config, new NullColumnStore, new InMemoryMetaStore(), Some(policy))

val metric = "http_req_total"
val partKeyLabelValues = Map("job" -> "myCoolService", "instance" -> "someHost:8787", "host" -> "host-1")
val partKeyKVWithMetric = partKeyLabelValues ++ Map("_metric_" -> metric)
val partTagsUTF8 = partKeyLabelValues.map { case (k, v) => (k.utf8, v.utf8) }
val now = System.currentTimeMillis()
val numRawSamples = 1000
val reportingInterval = 10000
val tuples = (numRawSamples until 0).by(-1).map { n =>
(now - n * reportingInterval, n.toDouble)
}
val schemas = Schemas(promCounter.partition,
Map(promCounter.name -> promCounter,
"histogram" -> MMD.histDataset.schema,
Schemas.dsGauge.name -> Schemas.dsGauge))

// NOTE: due to max-chunk-size in storeConf = 100, this will make (numRawSamples / 100) chunks
// Be sure to reset the builder; it is in an Object so static and shared amongst tests
builder.reset()
tuples.map { t => SeqRowReader(Seq(t._1, t._2, metric.utf8, partTagsUTF8)) }
.foreach(builder.addFromReader(_, promCounter))
val container = builder.allContainers.head

val mmdBuilder = new RecordBuilder(MemFactory.onHeapFactory)
val mmdTuples = MMD.linearMultiSeries().take(100)
val mmdSomeData = MMD.records(MMD.dataset1, mmdTuples)
val histData = MMD.linearHistSeries().take(100)
val histMaxMinData = MMD.histMaxMin(histData)

val histDataDisabledWS = MMD.linearHistSeries(ws = GlobalConfig.workspacesDisabledForMaxMin.get.head).take(100)
val histMaxMinDataDisabledWS = MMD.histMaxMin(histDataDisabledWS)

implicit val execTimeout = 5.seconds

override def beforeAll(): Unit = {
memStore.setup(dsRef, schemas, 0, TestData.storeConf, 2)
memStore.ingest(dsRef, 0, SomeData(container, 0))
memStore.ingest(dsRef, 0, MMD.records(MMD.histDataset, histData))

// set up shard, but do not ingest data to simulate an empty shard
memStore.setup(dsRef, schemas, 1, TestData.storeConf, 2)

memStore.setup(MMD.dataset1.ref, Schemas(MMD.schema1), 0, TestData.storeConf, 1)
memStore.ingest(MMD.dataset1.ref, 0, mmdSomeData)
memStore.setup(MMD.histMaxMinDS.ref, Schemas(MMD.histMaxMinDS.schema), 0, TestData.storeConf, 1)
memStore.ingest(MMD.histMaxMinDS.ref, 0, MMD.records(MMD.histMaxMinDS, histMaxMinData))
memStore.ingest(MMD.histMaxMinDS.ref, 0, MMD.records(MMD.histMaxMinDS, histMaxMinDataDisabledWS))

memStore.refreshIndexForTesting(dsRef)
memStore.refreshIndexForTesting(MMD.dataset1.ref)
memStore.refreshIndexForTesting(MMD.histMaxMinDS.ref)
}

override def afterAll(): Unit = {
memStore.shutdown()
}

it("should be able to hande complex query like sum(topk + sum)") {
val startTime = now - numRawSamples * reportingInterval
val endTime = now - (numRawSamples - 10) * reportingInterval
val filters = Seq(ColumnFilter("_metric_", Filter.Equals("http_req_total".utf8)))
val leafExecPlan = MultiSchemaPartitionsExec(QueryContext(), dummyDispatcher,
dsRef, 0, filters, TimeRangeChunkScan(startTime, endTime), "_metric_")
val transformer = PeriodicSamplesMapper(startTime, reportingInterval, endTime, None, None, QueryContext())
leafExecPlan.addRangeVectorTransformer(transformer)

val topK = LocalPartitionReduceAggregateExec(QueryContext(), dummyDispatcher,
Array[ExecPlan](leafExecPlan), AggregationOperator.TopK, Seq(1.0))
val sum = LocalPartitionReduceAggregateExec(QueryContext(), dummyDispatcher,
Array[ExecPlan](leafExecPlan), AggregationOperator.Sum, Seq(0))
val binaryjoin1 = BinaryJoinExec(QueryContext(), dummyDispatcher,
Seq(topK), Seq(sum), BinaryOperator.ADD, Cardinality.OneToOne,
None, Nil, Nil, "__name__", Some(RvRange(startMs = 0, endMs = 19, stepMs = 1)))
val binaryjoin2 = BinaryJoinExec(QueryContext(), dummyDispatcher,
Seq(sum), Seq(topK), BinaryOperator.ADD, Cardinality.OneToOne,
None, Nil, Nil, "__name__", Some(RvRange(startMs = 0, endMs = 19, stepMs = 1)))
val execPlan = LocalPartitionReduceAggregateExec(QueryContext(), dummyDispatcher,
Seq(binaryjoin1, binaryjoin2), AggregationOperator.Sum, Seq(0))


val resp = execPlan.execute(memStore, querySession).runToFuture.futureValue
val result = resp.asInstanceOf[QueryResult]
result.resultSchema.columns.map(_.colType) shouldEqual Seq(TimestampColumn, DoubleColumn)
result.resultSchema.fixedVectorLen.nonEmpty shouldEqual true
result.resultSchema.fixedVectorLen.get shouldEqual 11
}


it("should be able to hande complex query like sum(count + sum)") {
val startTime = now - numRawSamples * reportingInterval
val endTime = now - (numRawSamples - 10) * reportingInterval
val filters = Seq(ColumnFilter("_metric_", Filter.Equals("http_req_total".utf8)))
val leafExecPlan = MultiSchemaPartitionsExec(QueryContext(), dummyDispatcher,
dsRef, 0, filters, TimeRangeChunkScan(startTime, endTime), "_metric_")
val transformer = PeriodicSamplesMapper(startTime, reportingInterval, endTime, None, None, QueryContext())
leafExecPlan.addRangeVectorTransformer(transformer)

val count = LocalPartitionReduceAggregateExec(QueryContext(), dummyDispatcher,
Array[ExecPlan](leafExecPlan), AggregationOperator.Count, Seq(1.0))
val sum = LocalPartitionReduceAggregateExec(QueryContext(), dummyDispatcher,
Array[ExecPlan](leafExecPlan), AggregationOperator.Sum, Seq(0))
val binaryjoin1 = BinaryJoinExec(QueryContext(), dummyDispatcher,
Seq(count), Seq(sum), BinaryOperator.ADD, Cardinality.OneToOne,
None, Nil, Nil, "__name__", Some(RvRange(startMs = 0, endMs = 19, stepMs = 1)))
val binaryjoin2 = BinaryJoinExec(QueryContext(), dummyDispatcher,
Seq(sum), Seq(count), BinaryOperator.ADD, Cardinality.OneToOne,
None, Nil, Nil, "__name__", Some(RvRange(startMs = 0, endMs = 19, stepMs = 1)))
val execPlan = LocalPartitionReduceAggregateExec(QueryContext(), dummyDispatcher,
Seq(binaryjoin1, binaryjoin2), AggregationOperator.Sum, Seq(0))


val resp = execPlan.execute(memStore, querySession).runToFuture.futureValue
val result = resp.asInstanceOf[QueryResult]
result.resultSchema.columns.map(_.colType) shouldEqual Seq(TimestampColumn, DoubleColumn)
result.resultSchema.fixedVectorLen.nonEmpty shouldEqual true
result.resultSchema.fixedVectorLen.get shouldEqual 11
}
}
2 changes: 1 addition & 1 deletion version.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version in ThisBuild := "0.9.27.3"
version in ThisBuild := "0.9.27.4"

0 comments on commit 8eaa7af

Please sign in to comment.