Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry pick Rows out in EXPLAIN ANALYZE #670

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions src/backend/cdb/cdbvars.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ int gp_hashagg_groups_per_bucket = 5;
int gp_motion_slice_noop = 0;

/* Cloudberry Database Experimental Feature GUCs */
bool gp_enable_explain_rows_out = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is better to send a param to explain command than using a guc to control if print out the "rows out"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rows out feature created for auto_explain. If we add "rows out" as parameter of explain, then we'll need change auto_explain source, and auto_explain wont be compatible with vanilla

Copy link
Contributor

@fanfuxiaoran fanfuxiaoran Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

 auto_explain wont be compatible with vanilla

Hmm... cannot understand. Could you give more details? Is auto_explain aslo included in vanilla?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto_explain is included in vanilla https://github.com/greenplum-db/gpdb-archive/tree/main/contrib/auto_explain

Also GUC gp_enable_explain_rows_out is made by analogy with GUC gp_enable_explain_allstat

bool gp_enable_explain_allstat = false;
bool gp_enable_motion_deadlock_sanity = false; /* planning time sanity
* check */
Expand Down
42 changes: 41 additions & 1 deletion src/backend/commands/explain_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,7 @@ cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ct
*/
typedef struct CdbExplain_DepStatAcc
{
/* vmax, vsum, vcnt, segmax */
/* vmax, vmin, vsum, vcnt, segmax, segmin */
CdbExplain_Agg agg;
/* max's received StatHdr */
CdbExplain_StatHdr *rshmax;
Expand Down Expand Up @@ -1716,6 +1716,46 @@ cdbexplain_showExecStats(struct PlanState *planstate, ExplainState *es)
}
pfree(extraData.data);

/*
* Print "Rows out"
*/

if (gp_enable_explain_rows_out && es->analyze && ns->ninst > 0) {
double ntuples_max = ns->ntuples.vmax;
int ntuples_imax = ns->ntuples.imax;
double ntuples_min = ns->ntuples.vmin;
int ntuples_imin = ns->ntuples.imin;
double ntuples_avg = cdbexplain_agg_avg(&ns->ntuples);
int ntuples_cnt = ns->ntuples.vcnt;

if (es->format == EXPLAIN_FORMAT_TEXT)
{
/*
* create a header for all stats: separate each individual stat by an
* underscore, separate the grouped stats for each node by a slash
*/
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfoString(es->str, "Rows out: ");

appendStringInfo(es->str,
"%.2f rows avg x %d workers, %.0f rows max (seg%d), %.0f rows min (seg%d).\n",
ntuples_avg,
ntuples_cnt,
ntuples_max,
ntuples_imax,
ntuples_min,
ntuples_imin);
}
else {
ExplainPropertyInteger("Workers", NULL, ntuples_cnt, es);
ExplainPropertyFloat("Average Rows", NULL, ntuples_avg, 1, es);
ExplainPropertyFloat("Max Rows", NULL, ntuples_max, 0, es);
ExplainPropertyInteger("Max Rows Segment", NULL, ntuples_imax, es);
ExplainPropertyFloat("Min Rows", NULL, ntuples_min, 0, es);
ExplainPropertyInteger("Min Rows Segment", NULL, ntuples_imin, es);
}
}

/*
fanfuxiaoran marked this conversation as resolved.
Show resolved Hide resolved
* Dump stats for all workers.
*/
Expand Down
11 changes: 11 additions & 0 deletions src/backend/utils/misc/guc_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,17 @@ struct config_bool ConfigureNamesBool_gp[] =
NULL, NULL, NULL
},

{
{"gp_enable_explain_rows_out", PGC_USERSET, CLIENT_CONN_OTHER,
gettext_noop("Print avg, min and max rows out and which segments reach them in EXPLAIN ANALYZE."),
NULL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
},
&gp_enable_explain_rows_out,
false,
NULL, NULL, NULL
},

{
{"gp_enable_explain_allstat", PGC_USERSET, CLIENT_CONN_OTHER,
gettext_noop("Experimental feature: dump stats for all segments in EXPLAIN ANALYZE."),
Expand Down
11 changes: 11 additions & 0 deletions src/include/cdb/cdbexplain.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,22 @@ struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */
typedef struct
{
double vmax; /* maximum value of statistic */
double vmin; /* minimum value of statistic */
double vsum; /* sum of values */
int vcnt; /* count of values > 0 */
int imax; /* id of 1st observation having maximum value */
int imin; /* id of 1st observation having minimum value */
} CdbExplain_Agg;

static inline void
cdbexplain_agg_init0(CdbExplain_Agg *agg)
{
agg->vmax = 0;
agg->vmin = 0;
agg->vsum = 0;
agg->vcnt = 0;
agg->imax = 0;
agg->imin = 0;
}

static inline bool
Expand All @@ -48,6 +52,13 @@ cdbexplain_agg_upd(CdbExplain_Agg *agg, double v, int id)
agg->vsum += v;
agg->vcnt++;

if (v < agg->vmin ||
agg->vcnt == 1)
{
agg->vmin = v;
agg->imin = id;
}

if (v > agg->vmax ||
agg->vcnt == 1)
{
Expand Down
6 changes: 6 additions & 0 deletions src/include/cdb/cdbvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,12 @@ extern bool gp_enable_agg_pushdown;
*/
extern bool gp_enable_preunique;

/* May Cloudberry print statistics as average, minimum and maximum rows out
* and on which segments reach them for each node during EXPLAIN ANALYZE?
*
*/
fanfuxiaoran marked this conversation as resolved.
Show resolved Hide resolved
extern bool gp_enable_explain_rows_out;

/* May Cloudberry dump statistics for all segments as a huge ugly string
* during EXPLAIN ANALYZE?
*
Expand Down
1 change: 1 addition & 0 deletions src/include/utils/unsync_guc_name.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@
"gp_enable_agg_pushdown",
"gp_enable_ao_indexscan",
"gp_enable_direct_dispatch",
"gp_enable_explain_rows_out",
"gp_enable_explain_allstat",
"gp_enable_fast_sri",
"gp_enable_global_deadlock_detector",
Expand Down
16 changes: 16 additions & 0 deletions src/test/regress/expected/gp_explain.out
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,22 @@ explain analyze SELECT * FROM explaintest;
(8 rows)

set gp_enable_explain_allstat=DEFAULT;
-- Test explain rows out.
set gp_enable_explain_rows_out=on;
explain analyze SELECT * FROM explaintest;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..3.10 rows=10 width=4) (actual time=0.266..0.384 rows=5 loops=2)
-> Seq Scan on explaintest (cost=0.00..3.10 rows=4 width=4) (actual time=0.011..0.013 rows=2 loops=2)
Rows out: 3.33 rows avg x 3 workers, 5 rows max (seg1), 2 rows min (seg2).
(slice0) Executor memory: 322K bytes.
(slice1) Executor memory: 50K bytes avg x 3 workers, 50K bytes max (seg0).
Memory used: 128000kB
Optimizer: Postgres query optimizer
Total runtime: 2.600 ms
(8 rows)

set gp_enable_explain_rows_out=DEFAULT;
--
-- Test GPDB-specific EXPLAIN (SLICETABLE) option.
--
Expand Down
16 changes: 16 additions & 0 deletions src/test/regress/expected/gp_explain_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,22 @@ explain analyze SELECT * FROM explaintest;
(8 rows)

set gp_enable_explain_allstat=DEFAULT;
-- Test explain rows out.
set gp_enable_explain_rows_out=on;
explain analyze SELECT * FROM explaintest;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=10 width=4) (actual time=0.298..0.302 rows=5 loops=2)
-> Seq Scan on explaintest (cost=0.00..431.00 rows=4 width=4) (actual time=0.013..0.015 rows=2 loops=2)
Rows out: 3.33 rows avg x 3 workers, 5 rows max (seg1), 2 rows min (seg2).
(slice0) Executor memory: 290K bytes.
(slice1) Executor memory: 50K bytes avg x 3 workers, 50K bytes max (seg0).
Memory used: 128000kB
Optimizer: Pivotal Optimizer (GPORCA) version 3.2.0
Total runtime: 1.577 ms
(8 rows)

set gp_enable_explain_rows_out=DEFAULT;
--
-- Test GPDB-specific EXPLAIN (SLICETABLE) option.
--
Expand Down
5 changes: 5 additions & 0 deletions src/test/regress/sql/gp_explain.sql
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ set gp_enable_explain_allstat=on;
explain analyze SELECT * FROM explaintest;
set gp_enable_explain_allstat=DEFAULT;

-- Test explain rows out.
set gp_enable_explain_rows_out=on;
explain analyze SELECT * FROM explaintest;
set gp_enable_explain_rows_out=DEFAULT;


--
-- Test GPDB-specific EXPLAIN (SLICETABLE) option.
Expand Down
Loading