From 14f164fb7c5c831613e8c5cebca8afcd2846d728 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 22 Oct 2024 10:49:20 +0200 Subject: [PATCH 1/3] recalculate cardinalities --- src/hnsw/hnsw_optimize_join.cpp | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/hnsw/hnsw_optimize_join.cpp b/src/hnsw/hnsw_optimize_join.cpp index 8a6e283..2441565 100644 --- a/src/hnsw/hnsw_optimize_join.cpp +++ b/src/hnsw/hnsw_optimize_join.cpp @@ -185,6 +185,7 @@ class LogicalHNSWIndexJoin final : public LogicalExtensionOperator { vector GetLeftBindings(); vector GetRightBindings(); unique_ptr CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) override; + idx_t EstimateCardinality(ClientContext &context) override; public: idx_t table_index; @@ -275,7 +276,8 @@ vector LogicalHNSWIndexJoin::GetColumnBindings() { unique_ptr LogicalHNSWIndexJoin::CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) { - auto result = make_uniq(types, 0, table, hnsw_index, limit); + + auto result = make_uniq(types, estimated_cardinality, table, hnsw_index, limit); result->limit = limit; result->inner_column_ids = inner_column_ids; result->inner_projection_ids = inner_projection_ids; @@ -288,6 +290,19 @@ unique_ptr LogicalHNSWIndexJoin::CreatePlan(ClientContext &con return std::move(result); } +idx_t LogicalHNSWIndexJoin::EstimateCardinality(ClientContext &context) { + // The cardinality of the HNSW index join is the cardinality of the outer table + if (has_estimated_cardinality) { + return estimated_cardinality; + } + + const auto child_cardinality = children[0]->EstimateCardinality(context); + estimated_cardinality = child_cardinality * limit; + has_estimated_cardinality = true; + + return estimated_cardinality; +} + //------------------------------------------------------------------------------ // Optimizer //------------------------------------------------------------------------------ @@ -306,6 +321,14 @@ HNSWIndexJoinOptimizer::HNSWIndexJoinOptimizer() { optimize_function = Optimize; } +class CardinalityResetter final : public LogicalOperatorVisitor { +public: + void VisitOperator(LogicalOperator &op) override { + op.has_estimated_cardinality = false; + VisitOperatorChildren(op); + } +}; + bool HNSWIndexJoinOptimizer::TryOptimize(Binder &binder, ClientContext &context, unique_ptr &root, unique_ptr &plan) { @@ -639,6 +662,10 @@ bool HNSWIndexJoinOptimizer::TryOptimize(Binder &binder, ClientContext &context, // Swap the plan plan = std::move(new_projection); + CardinalityResetter resetter; + resetter.VisitOperator(*root); + root->EstimateCardinality(context); + return true; } From 07d134844c67854d183d41882a86f77482ef2859 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 22 Oct 2024 12:11:31 +0200 Subject: [PATCH 2/3] copy names --- src/hnsw/hnsw_optimize_join.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/hnsw/hnsw_optimize_join.cpp b/src/hnsw/hnsw_optimize_join.cpp index 2441565..5c3f246 100644 --- a/src/hnsw/hnsw_optimize_join.cpp +++ b/src/hnsw/hnsw_optimize_join.cpp @@ -45,6 +45,7 @@ class PhysicalHNSWIndexJoin final : public PhysicalOperator { unique_ptr GetOperatorState(ExecutionContext &context) const override; OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state) const override; + InsertionOrderPreservingMap ParamsToString() const override; public: DuckTableEntry &table; @@ -167,6 +168,17 @@ OperatorResultType PhysicalHNSWIndexJoin::Execute(ExecutionContext &context, Dat return OperatorResultType::HAVE_MORE_OUTPUT; } +InsertionOrderPreservingMap PhysicalHNSWIndexJoin::ParamsToString() const { + InsertionOrderPreservingMap result; + auto table_name = table.name; + auto index_name = hnsw_index.name; + result.insert("table", table_name); + result.insert("index", index_name); + result.insert("limit", to_string(limit)); + SetEstimatedCardinality(result, estimated_cardinality); + return result; +} + //------------------------------------------------------------------------------ // Logical Operator //------------------------------------------------------------------------------ @@ -658,14 +670,11 @@ bool HNSWIndexJoinOptimizer::TryOptimize(Binder &binder, ClientContext &context, // Add the new projection on top of the join new_projection->children.emplace_back(std::move(index_join)); + new_projection->EstimateCardinality(context); // Swap the plan plan = std::move(new_projection); - CardinalityResetter resetter; - resetter.VisitOperator(*root); - root->EstimateCardinality(context); - return true; } From 57617f31166e1388c1e1fd466cd09bbed6b2dce3 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 22 Oct 2024 12:40:58 +0200 Subject: [PATCH 3/3] ok actually compute cardinalities bottom up --- src/hnsw/hnsw_optimize_join.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/hnsw/hnsw_optimize_join.cpp b/src/hnsw/hnsw_optimize_join.cpp index 5c3f246..fb79fdf 100644 --- a/src/hnsw/hnsw_optimize_join.cpp +++ b/src/hnsw/hnsw_optimize_join.cpp @@ -335,9 +335,15 @@ HNSWIndexJoinOptimizer::HNSWIndexJoinOptimizer() { class CardinalityResetter final : public LogicalOperatorVisitor { public: + ClientContext &context; + + explicit CardinalityResetter(ClientContext &context_p) : context(context_p) { + } + void VisitOperator(LogicalOperator &op) override { op.has_estimated_cardinality = false; VisitOperatorChildren(op); + op.EstimateCardinality(context); } }; @@ -675,6 +681,9 @@ bool HNSWIndexJoinOptimizer::TryOptimize(Binder &binder, ClientContext &context, // Swap the plan plan = std::move(new_projection); + CardinalityResetter cardinality_resetter(context); + cardinality_resetter.VisitOperator(*root); + return true; }