From edb85c130d4356c29ab09be8a453a063008c6eab Mon Sep 17 00:00:00 2001 From: Amol Nayak Date: Tue, 25 Jul 2023 11:12:15 -0700 Subject: [PATCH 1/5] fix(query) Regex equals .* must ignore the label and match series even without the label --- .../memstore/PartKeyLuceneIndex.scala | 14 ++++++-- .../memstore/PartKeyLuceneIndexSpec.scala | 35 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala index 89b5210ed..cb0642f77 100644 --- a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala +++ b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala @@ -817,12 +817,20 @@ class PartKeyLuceneIndex(ref: DatasetRef, logger.info(s"Refreshed index searchers to make reads consistent for dataset=$ref shard=$shardNum") } + //scalastyle:off method.length private def leafFilter(column: String, filter: Filter): Query = { filter match { case EqualsRegex(value) => val regex = removeRegexAnchors(value.toString) - if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE) - else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value. + if(regex.r.unapplySeq("").isDefined) { + // Check if the given regex matches the empty string, if yes, then do not consider this label + val booleanQuery = new BooleanQuery.Builder + val allDocs = new MatchAllDocsQuery + booleanQuery.add(allDocs, Occur.FILTER).build() + } else { + if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE) + else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value. + } case NotEqualsRegex(value) => val term = new Term(column, removeRegexAnchors(value.toString)) val allDocs = new MatchAllDocsQuery @@ -864,7 +872,7 @@ class PartKeyLuceneIndex(ref: DatasetRef, case _ => throw new UnsupportedOperationException } } - + //scalastyle:on method.length def partIdsFromFilters(columnFilters: Seq[ColumnFilter], startTime: Long, endTime: Long): debox.Buffer[Int] = { diff --git a/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala b/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala index 6eb71613b..c1665b394 100644 --- a/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala +++ b/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala @@ -1008,4 +1008,39 @@ class PartKeyLuceneIndexSpec extends AnyFunSpec with Matchers with BeforeAndAfte // close CardinalityTracker to avoid leaking of resources cardTracker.close() } + + it("should match records without label when .* is provided on a non existent label") { + + val pkrs = partKeyFromRecords(dataset6, records(dataset6, readers.take(10)), Some(partBuilder)) + .zipWithIndex.map { case (addr, i) => + val pk = partKeyOnHeap(dataset6.schema.partKeySchema, ZeroPointer, addr) + keyIndex.addPartKey(pk, i, i, i + 10)() + PartKeyLuceneIndexRecord(pk, i, i + 10) + } + keyIndex.refreshReadersBlocking() + + + // Query with just the existing Label name + val filter1 = ColumnFilter("Actor2Code", Equals("GOV".utf8)) + val result1 = keyIndex.partKeyRecordsFromFilters(Seq(filter1), 0, Long.MaxValue) + val expected1 = Seq(pkrs(7), pkrs(8), pkrs(9)) + + result1.map(_.partKey.toSeq) shouldEqual expected1.map(_.partKey.toSeq) + result1.map(p => (p.startTime, p.endTime)) shouldEqual expected1.map(p => (p.startTime, p.endTime)) + + // Query with non existent label name with an empty regex + val filter2 = ColumnFilter("dummy", EqualsRegex(".*".utf8)) + val filter3 = ColumnFilter("Actor2Code", Equals("GOV".utf8)) + val result2 = keyIndex.partKeyRecordsFromFilters(Seq(filter2, filter3), 0, Long.MaxValue) + val expected2 = Seq(pkrs(7), pkrs(8), pkrs(9)) + + result2.map(_.partKey.toSeq) shouldEqual expected2.map(_.partKey.toSeq) + result2.map(p => (p.startTime, p.endTime)) shouldEqual expected2.map(p => (p.startTime, p.endTime)) + + // Query with non existent label name with an regex matching at least 1 character + val filter4 = ColumnFilter("dummy", EqualsRegex(".+".utf8)) + val filter5 = ColumnFilter("Actor2Code", Equals("GOV".utf8)) + val result3 = keyIndex.partKeyRecordsFromFilters(Seq(filter4, filter5), 0, Long.MaxValue) + result3 shouldEqual Seq() + } } \ No newline at end of file From 0ad906a0a410fda1cba72821fc565dcd16593d11 Mon Sep 17 00:00:00 2001 From: Amol Nayak Date: Tue, 25 Jul 2023 15:37:08 -0700 Subject: [PATCH 2/5] fix(query) Regex equals .* must ignore the label and match series even without the label --- .../memstore/PartKeyLuceneIndexSpec.scala | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala b/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala index c1665b394..db5575341 100644 --- a/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala +++ b/core/src/test/scala/filodb.core/memstore/PartKeyLuceneIndexSpec.scala @@ -1042,5 +1042,21 @@ class PartKeyLuceneIndexSpec extends AnyFunSpec with Matchers with BeforeAndAfte val filter5 = ColumnFilter("Actor2Code", Equals("GOV".utf8)) val result3 = keyIndex.partKeyRecordsFromFilters(Seq(filter4, filter5), 0, Long.MaxValue) result3 shouldEqual Seq() + + // Query with non existent label name with an empty regex + val filter6 = ColumnFilter("dummy", EqualsRegex("".utf8)) + val filter7 = ColumnFilter("Actor2Code", Equals("GOV".utf8)) + val result4 = keyIndex.partKeyRecordsFromFilters(Seq(filter6, filter7), 0, Long.MaxValue) + val expected4 = Seq(pkrs(7), pkrs(8), pkrs(9)) + result4.map(_.partKey.toSeq) shouldEqual expected4.map(_.partKey.toSeq) + result4.map(p => (p.startTime, p.endTime)) shouldEqual expected4.map(p => (p.startTime, p.endTime)) + + // Query with non existent label name with an empty equals + val filter8 = ColumnFilter("dummy", Equals("".utf8)) + val filter9 = ColumnFilter("Actor2Code", Equals("GOV".utf8)) + val result5 = keyIndex.partKeyRecordsFromFilters(Seq(filter8, filter9), 0, Long.MaxValue) + val expected5 = Seq(pkrs(7), pkrs(8), pkrs(9)) + result5.map(_.partKey.toSeq) shouldEqual expected5.map(_.partKey.toSeq) + result5.map(p => (p.startTime, p.endTime)) shouldEqual expected5.map(p => (p.startTime, p.endTime)) } } \ No newline at end of file From 39e4f9b352fe8df53eebb8fe2810e93812f00bc0 Mon Sep 17 00:00:00 2001 From: Amol Nayak Date: Thu, 27 Jul 2023 09:19:07 -0700 Subject: [PATCH 3/5] fix(query) Regex equals .* must ignore the label and match series even without the label --- .../scala/filodb.core/memstore/PartKeyLuceneIndex.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala index cb0642f77..63bbb9306 100644 --- a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala +++ b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala @@ -822,11 +822,9 @@ class PartKeyLuceneIndex(ref: DatasetRef, filter match { case EqualsRegex(value) => val regex = removeRegexAnchors(value.toString) - if(regex.r.unapplySeq("").isDefined) { + if(regex.r.pattern.matcher("").matches()) { // Check if the given regex matches the empty string, if yes, then do not consider this label - val booleanQuery = new BooleanQuery.Builder - val allDocs = new MatchAllDocsQuery - booleanQuery.add(allDocs, Occur.FILTER).build() + new MatchAllDocsQuery } else { if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE) else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value. From ba58ebdc5bd967ee552219ca8d1f827110b09ea2 Mon Sep 17 00:00:00 2001 From: Amol Nayak Date: Thu, 27 Jul 2023 09:43:57 -0700 Subject: [PATCH 4/5] fix(query) Regex equals .* must ignore the label and match series even without the label --- .../main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala index 63bbb9306..7c67346d1 100644 --- a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala +++ b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala @@ -822,7 +822,7 @@ class PartKeyLuceneIndex(ref: DatasetRef, filter match { case EqualsRegex(value) => val regex = removeRegexAnchors(value.toString) - if(regex.r.pattern.matcher("").matches()) { + if(regex.replaceAll("\\.\\*", "") == "") { // Check if the given regex matches the empty string, if yes, then do not consider this label new MatchAllDocsQuery } else { From 9011ff4d725f050f1384f3ba6ca17702e93c2304 Mon Sep 17 00:00:00 2001 From: Amol Nayak Date: Fri, 28 Jul 2023 09:10:32 -0700 Subject: [PATCH 5/5] fix(query) Regex equals .* must ignore the label and match series even without the label --- .../filodb.core/memstore/PartKeyLuceneIndex.scala | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala index 7c67346d1..5980821f9 100644 --- a/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala +++ b/core/src/main/scala/filodb.core/memstore/PartKeyLuceneIndex.scala @@ -822,13 +822,9 @@ class PartKeyLuceneIndex(ref: DatasetRef, filter match { case EqualsRegex(value) => val regex = removeRegexAnchors(value.toString) - if(regex.replaceAll("\\.\\*", "") == "") { - // Check if the given regex matches the empty string, if yes, then do not consider this label - new MatchAllDocsQuery - } else { - if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE) - else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value. - } + if (regex.replaceAll("\\.\\*", "").nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE) + else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value. + case NotEqualsRegex(value) => val term = new Term(column, removeRegexAnchors(value.toString)) val allDocs = new MatchAllDocsQuery