Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(query) Regex equals .* must ignore the label and match series even without the label #1639

Merged
merged 5 commits into from
Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -817,12 +817,18 @@ class PartKeyLuceneIndex(ref: DatasetRef,
logger.info(s"Refreshed index searchers to make reads consistent for dataset=$ref shard=$shardNum")
}

//scalastyle:off method.length
private def leafFilter(column: String, filter: Filter): Query = {
filter match {
case EqualsRegex(value) =>
val regex = removeRegexAnchors(value.toString)
if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE)
else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value.
if(regex.r.pattern.matcher("").matches()) {
// Check if the given regex matches the empty string, if yes, then do not consider this label
new MatchAllDocsQuery
} else {
if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE)
else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value.
}
case NotEqualsRegex(value) =>
val term = new Term(column, removeRegexAnchors(value.toString))
val allDocs = new MatchAllDocsQuery
Expand Down Expand Up @@ -864,7 +870,7 @@ class PartKeyLuceneIndex(ref: DatasetRef,
case _ => throw new UnsupportedOperationException
}
}

//scalastyle:on method.length
def partIdsFromFilters(columnFilters: Seq[ColumnFilter],
startTime: Long,
endTime: Long): debox.Buffer[Int] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1008,4 +1008,55 @@ class PartKeyLuceneIndexSpec extends AnyFunSpec with Matchers with BeforeAndAfte
// close CardinalityTracker to avoid leaking of resources
cardTracker.close()
}

it("should match records without label when .* is provided on a non existent label") {

val pkrs = partKeyFromRecords(dataset6, records(dataset6, readers.take(10)), Some(partBuilder))
.zipWithIndex.map { case (addr, i) =>
val pk = partKeyOnHeap(dataset6.schema.partKeySchema, ZeroPointer, addr)
keyIndex.addPartKey(pk, i, i, i + 10)()
PartKeyLuceneIndexRecord(pk, i, i + 10)
}
keyIndex.refreshReadersBlocking()


// Query with just the existing Label name
val filter1 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result1 = keyIndex.partKeyRecordsFromFilters(Seq(filter1), 0, Long.MaxValue)
val expected1 = Seq(pkrs(7), pkrs(8), pkrs(9))

result1.map(_.partKey.toSeq) shouldEqual expected1.map(_.partKey.toSeq)
result1.map(p => (p.startTime, p.endTime)) shouldEqual expected1.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an empty regex
val filter2 = ColumnFilter("dummy", EqualsRegex(".*".utf8))
val filter3 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result2 = keyIndex.partKeyRecordsFromFilters(Seq(filter2, filter3), 0, Long.MaxValue)
val expected2 = Seq(pkrs(7), pkrs(8), pkrs(9))

result2.map(_.partKey.toSeq) shouldEqual expected2.map(_.partKey.toSeq)
result2.map(p => (p.startTime, p.endTime)) shouldEqual expected2.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an regex matching at least 1 character
val filter4 = ColumnFilter("dummy", EqualsRegex(".+".utf8))
val filter5 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result3 = keyIndex.partKeyRecordsFromFilters(Seq(filter4, filter5), 0, Long.MaxValue)
result3 shouldEqual Seq()

// Query with non existent label name with an empty regex
val filter6 = ColumnFilter("dummy", EqualsRegex("".utf8))
val filter7 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result4 = keyIndex.partKeyRecordsFromFilters(Seq(filter6, filter7), 0, Long.MaxValue)
val expected4 = Seq(pkrs(7), pkrs(8), pkrs(9))
result4.map(_.partKey.toSeq) shouldEqual expected4.map(_.partKey.toSeq)
result4.map(p => (p.startTime, p.endTime)) shouldEqual expected4.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an empty equals
val filter8 = ColumnFilter("dummy", Equals("".utf8))
val filter9 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result5 = keyIndex.partKeyRecordsFromFilters(Seq(filter8, filter9), 0, Long.MaxValue)
val expected5 = Seq(pkrs(7), pkrs(8), pkrs(9))
result5.map(_.partKey.toSeq) shouldEqual expected5.map(_.partKey.toSeq)
result5.map(p => (p.startTime, p.endTime)) shouldEqual expected5.map(p => (p.startTime, p.endTime))
}
}
Loading