Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(query) Regex equals .* must ignore the label and match series even without the label #1639

Merged
merged 5 commits into from
Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -817,12 +817,20 @@ class PartKeyLuceneIndex(ref: DatasetRef,
logger.info(s"Refreshed index searchers to make reads consistent for dataset=$ref shard=$shardNum")
}

//scalastyle:off method.length
private def leafFilter(column: String, filter: Filter): Query = {
filter match {
case EqualsRegex(value) =>
val regex = removeRegexAnchors(value.toString)
if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE)
else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value.
if(regex.r.unapplySeq("").isDefined) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: does matches work here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's exactly what I was looking for, but its available in Scala 2.13

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought, I can do this regex.r.pattern.matcher("").matches() since regex does not provide the matches method

// Check if the given regex matches the empty string, if yes, then do not consider this label
val booleanQuery = new BooleanQuery.Builder
val allDocs = new MatchAllDocsQuery
booleanQuery.add(allDocs, Occur.FILTER).build()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can just return a MatchAllDocsQuery directly (it implements Query, at least.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, done

} else {
if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE)
else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value.
}
case NotEqualsRegex(value) =>
val term = new Term(column, removeRegexAnchors(value.toString))
val allDocs = new MatchAllDocsQuery
Expand Down Expand Up @@ -864,7 +872,7 @@ class PartKeyLuceneIndex(ref: DatasetRef,
case _ => throw new UnsupportedOperationException
}
}

//scalastyle:on method.length
def partIdsFromFilters(columnFilters: Seq[ColumnFilter],
startTime: Long,
endTime: Long): debox.Buffer[Int] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1008,4 +1008,39 @@ class PartKeyLuceneIndexSpec extends AnyFunSpec with Matchers with BeforeAndAfte
// close CardinalityTracker to avoid leaking of resources
cardTracker.close()
}

it("should match records without label when .* is provided on a non existent label") {

val pkrs = partKeyFromRecords(dataset6, records(dataset6, readers.take(10)), Some(partBuilder))
.zipWithIndex.map { case (addr, i) =>
val pk = partKeyOnHeap(dataset6.schema.partKeySchema, ZeroPointer, addr)
keyIndex.addPartKey(pk, i, i, i + 10)()
PartKeyLuceneIndexRecord(pk, i, i + 10)
}
keyIndex.refreshReadersBlocking()


// Query with just the existing Label name
val filter1 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result1 = keyIndex.partKeyRecordsFromFilters(Seq(filter1), 0, Long.MaxValue)
val expected1 = Seq(pkrs(7), pkrs(8), pkrs(9))

result1.map(_.partKey.toSeq) shouldEqual expected1.map(_.partKey.toSeq)
result1.map(p => (p.startTime, p.endTime)) shouldEqual expected1.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an empty regex
val filter2 = ColumnFilter("dummy", EqualsRegex(".*".utf8))
val filter3 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result2 = keyIndex.partKeyRecordsFromFilters(Seq(filter2, filter3), 0, Long.MaxValue)
val expected2 = Seq(pkrs(7), pkrs(8), pkrs(9))

result2.map(_.partKey.toSeq) shouldEqual expected2.map(_.partKey.toSeq)
result2.map(p => (p.startTime, p.endTime)) shouldEqual expected2.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an regex matching at least 1 character
val filter4 = ColumnFilter("dummy", EqualsRegex(".+".utf8))
val filter5 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result3 = keyIndex.partKeyRecordsFromFilters(Seq(filter4, filter5), 0, Long.MaxValue)
result3 shouldEqual Seq()
}
}