Skip to content

Commit 0772b52

Browse files
yugo-nCommitfest Bot
authored andcommitted
Allow to collect statistics on virtual generated columns
During ANALYZE, generation expressions are expanded, and statistics are computed using compute_expr_stats(). To support this, both compute_expr_stats() and AnlExprData are now exported from extended_stats.c. To enable the optimizer to make use of these statistics, a new field named virtual_gencols is added to RelOptInfo. This field holds the expressions of virtual generated columns in the table. In examine_variable(), if an expression in a WHERE clause matches a virtual generated column, the corresponding statistics are used for that expression.
1 parent 7c2061b commit 0772b52

File tree

9 files changed

+167
-28
lines changed

9 files changed

+167
-28
lines changed

doc/src/sgml/ref/alter_table.sgml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,6 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
210210
When this form is used, the column's statistics are removed,
211211
so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
212212
on the table afterwards is recommended.
213-
For a virtual generated column, <command>ANALYZE</command>
214-
is not necessary because such columns never have statistics.
215213
</para>
216214
</listitem>
217215
</varlistentry>
@@ -275,12 +273,9 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
275273
</para>
276274

277275
<para>
278-
When this form is used on a stored generated column, its statistics
279-
are removed, so running
280-
<link linkend="sql-analyze"><command>ANALYZE</command></link>
276+
When this form is used, the column's statistics are removed,
277+
so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
281278
on the table afterwards is recommended.
282-
For a virtual generated column, <command>ANALYZE</command>
283-
is not necessary because such columns never have statistics.
284279
</para>
285280
</listitem>
286281
</varlistentry>

src/backend/commands/analyze.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "parser/parse_oper.h"
4141
#include "parser/parse_relation.h"
4242
#include "pgstat.h"
43+
#include "rewrite/rewriteHandler.h"
4344
#include "statistics/extended_stats_internal.h"
4445
#include "statistics/statistics.h"
4546
#include "storage/bufmgr.h"
@@ -558,13 +559,28 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
558559
{
559560
VacAttrStats *stats = vacattrstats[i];
560561
AttributeOpts *aopt;
562+
Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, stats->tupattnum - 1);
561563

562-
stats->rows = rows;
563-
stats->tupDesc = onerel->rd_att;
564-
stats->compute_stats(stats,
565-
std_fetch_func,
566-
numrows,
567-
totalrows);
564+
/*
565+
* For a virtual generated column, compute statistics for the expression value.
566+
*/
567+
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
568+
{
569+
AnlExprData *exprdata = (AnlExprData *) palloc0(sizeof(AnlExprData));
570+
571+
exprdata->expr = build_generation_expression(onerel, stats->tupattnum);
572+
exprdata->vacattrstat = stats;
573+
compute_expr_stats(onerel, exprdata, 1, rows, numrows);
574+
}
575+
else
576+
{
577+
stats->rows = rows;
578+
stats->tupDesc = onerel->rd_att;
579+
stats->compute_stats(stats,
580+
std_fetch_func,
581+
numrows,
582+
totalrows);
583+
}
568584

569585
/*
570586
* If the appropriate flavor of the n_distinct option is
@@ -1049,10 +1065,6 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
10491065
if (attr->attisdropped)
10501066
return NULL;
10511067

1052-
/* Don't analyze virtual generated columns */
1053-
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
1054-
return NULL;
1055-
10561068
/*
10571069
* Get attstattarget value. Set to -1 if null. (Analyze functions expect
10581070
* -1 to mean use default_statistics_target; see for example

src/backend/optimizer/util/plancat.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ static List *get_relation_constraints(PlannerInfo *root,
7878
bool include_partition);
7979
static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
8080
Relation heapRelation);
81+
static List *get_relation_virtual_gencols(PlannerInfo *root, RelOptInfo *rel,
82+
Relation relation);
8183
static List *get_relation_statistics(PlannerInfo *root, RelOptInfo *rel,
8284
Relation relation);
8385
static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
@@ -510,6 +512,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
510512

511513
rel->indexlist = indexinfos;
512514

515+
/* Make list of virtual generated columns */
516+
rel->virtual_gencols = get_relation_virtual_gencols(root, rel, relation);
517+
513518
rel->statlist = get_relation_statistics(root, rel, relation);
514519

515520
/* Grab foreign-table info using the relcache, while we have it */
@@ -1606,6 +1611,62 @@ get_relation_constraints(PlannerInfo *root,
16061611
return result;
16071612
}
16081613

1614+
/*
1615+
* get_relation_virtual_gencols
1616+
* Retrieve virtual generated columns defined on the table.
1617+
*
1618+
* Returns a List (possibly empty) of VirtualGeneratedColumnInfoInfo objects
1619+
* containing the generation expressions. Each one has been processed by
1620+
* eval_const_expressions(), and its Vars are changed to have the varno
1621+
* indicated by rel->relid. This allows the expressions to be easily
1622+
* compared to expressions taken from WHERE.
1623+
*/
1624+
static List *get_relation_virtual_gencols(PlannerInfo *root, RelOptInfo *rel,
1625+
Relation relation)
1626+
{
1627+
TupleDesc tupdesc = RelationGetDescr(relation);
1628+
Index varno = rel->relid;
1629+
List *virtual_gencols = NIL;
1630+
1631+
if (tupdesc->constr && tupdesc->constr->has_generated_virtual)
1632+
{
1633+
for (int i = 0; i < tupdesc->natts; i++)
1634+
{
1635+
Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
1636+
1637+
if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
1638+
{
1639+
VirtualGeneratedColumnInfo *info;
1640+
1641+
info = makeNode(VirtualGeneratedColumnInfo);
1642+
info->attno = attr->attnum;
1643+
info->expr = build_generation_expression(relation, attr->attnum);
1644+
1645+
/*
1646+
* Run the expressions through eval_const_expressions. This is
1647+
* not just an optimization, but is necessary, because the
1648+
* planner will be comparing them to similarly-processed qual
1649+
* clauses, and may fail to detect valid matches without this.
1650+
* We must not use canonicalize_qual, however, since these
1651+
* aren't qual expressions.
1652+
*/
1653+
info->expr = eval_const_expressions(NULL, info->expr);
1654+
1655+
/* May as well fix opfuncids too */
1656+
fix_opfuncids(info->expr);
1657+
1658+
/* Fix Vars to have the desired varno */
1659+
if (varno != 1)
1660+
ChangeVarNodes((Node *) info->expr, 1, varno, 0);
1661+
1662+
virtual_gencols = lappend(virtual_gencols, info);
1663+
}
1664+
}
1665+
}
1666+
1667+
return virtual_gencols;
1668+
}
1669+
16091670
/*
16101671
* Try loading data for the statistics object.
16111672
*

src/backend/statistics/extended_stats.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,6 @@ static void statext_store(Oid statOid, bool inh,
8282
static int statext_compute_stattarget(int stattarget,
8383
int nattrs, VacAttrStats **stats);
8484

85-
/* Information needed to analyze a single simple expression. */
86-
typedef struct AnlExprData
87-
{
88-
Node *expr; /* expression to analyze */
89-
VacAttrStats *vacattrstat; /* statistics attrs to analyze */
90-
} AnlExprData;
91-
92-
static void compute_expr_stats(Relation onerel, AnlExprData *exprdata,
93-
int nexprs, HeapTuple *rows, int numrows);
9485
static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
9586
static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
9687
static AnlExprData *build_expr_data(List *exprs, int stattarget);
@@ -2084,7 +2075,7 @@ examine_opclause_args(List *args, Node **exprp, Const **cstp,
20842075
/*
20852076
* Compute statistics about expressions of a relation.
20862077
*/
2087-
static void
2078+
void
20882079
compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
20892080
HeapTuple *rows, int numrows)
20902081
{

src/backend/utils/adt/selfuncs.c

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5731,6 +5731,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
57315731
*/
57325732
ListCell *ilist;
57335733
ListCell *slist;
5734+
ListCell *vlist;
57345735

57355736
/*
57365737
* The nullingrels bits within the expression could prevent us from
@@ -5850,6 +5851,46 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
58505851
break;
58515852
}
58525853

5854+
/*
5855+
* Search virtual generated columns for one with a matching expression
5856+
* and use the statistics collected for it if we have.
5857+
*/
5858+
foreach(vlist, onerel->virtual_gencols)
5859+
{
5860+
VirtualGeneratedColumnInfo *info = (VirtualGeneratedColumnInfo *) lfirst(vlist);
5861+
Node *expr = info->expr;
5862+
5863+
/*
5864+
* Stop once we've found statistics for the expression (either
5865+
* for a virtual generated columns or an index in the preceding
5866+
* loop).
5867+
*/
5868+
if (vardata->statsTuple)
5869+
break;
5870+
5871+
/* strip RelabelType before comparing it */
5872+
if (expr && IsA(expr, RelabelType))
5873+
expr = (Node *) ((RelabelType *) expr)->arg;
5874+
5875+
if (equal(node, expr))
5876+
{
5877+
Var *var = makeVar(onerel->relid,
5878+
info->attno,
5879+
vardata->atttype,
5880+
vardata->atttypmod,
5881+
exprCollation(node),
5882+
0);
5883+
/*
5884+
* There cannot be a unique constraint on a virtual generated column.
5885+
* Other fields other than the stats tuple must be already set.
5886+
*/
5887+
vardata->isunique = false;
5888+
5889+
/* Try to locate some stats */
5890+
examine_simple_variable(root, var, vardata);
5891+
}
5892+
}
5893+
58535894
/*
58545895
* Search extended statistics for one with a matching expression.
58555896
* There might be multiple ones, so just grab the first one. In the
@@ -5865,7 +5906,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
58655906

58665907
/*
58675908
* Stop once we've found statistics for the expression (either
5868-
* from extended stats, or for an index in the preceding loop).
5909+
* from extended stats, or for an index or a virtual generated
5910+
* column in the preceding loop).
58695911
*/
58705912
if (vardata->statsTuple)
58715913
break;

src/include/nodes/pathnodes.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,8 @@ typedef struct RelOptInfo
995995
List *indexlist;
996996
/* list of StatisticExtInfo */
997997
List *statlist;
998+
/* list of VirtualGeneratedColumnInfo */
999+
List *virtual_gencols;
9981000
/* size estimates derived from pg_class */
9991001
BlockNumber pages;
10001002
Cardinality tuples;
@@ -1434,6 +1436,23 @@ typedef struct StatisticExtInfo
14341436
List *exprs;
14351437
} StatisticExtInfo;
14361438

1439+
/*
1440+
* VirtualGeneratedColumnInfo
1441+
* Information about virtual generated columns for planning/optimization
1442+
*/
1443+
typedef struct VirtualGeneratedColumnInfo
1444+
{
1445+
pg_node_attr(no_copy_equal, no_read, no_query_jumble)
1446+
1447+
NodeTag type;
1448+
1449+
/* attribute number of virtual generated column */
1450+
AttrNumber attno;
1451+
1452+
/* generation expression */
1453+
Node *expr;
1454+
} VirtualGeneratedColumnInfo;
1455+
14371456
/*
14381457
* JoinDomains
14391458
*

src/include/statistics/extended_stats_internal.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ typedef struct StatsBuildData
6868
bool **nulls;
6969
} StatsBuildData;
7070

71+
/* Information needed to analyze a single simple expression. */
72+
typedef struct AnlExprData
73+
{
74+
Node *expr; /* expression to analyze */
75+
VacAttrStats *vacattrstat; /* statistics attrs to analyze */
76+
} AnlExprData;
7177

7278
extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
7379
extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
@@ -127,4 +133,7 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
127133
Selectivity *overlap_basesel,
128134
Selectivity *totalsel);
129135

136+
extern void
137+
compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
138+
HeapTuple *rows, int numrows);
130139
#endif /* EXTENDED_STATS_INTERNAL_H */

src/test/regress/expected/generated_virtual.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,13 @@ create table gtest32 (
15141514
);
15151515
insert into gtest32 values (1), (2);
15161516
analyze gtest32;
1517+
-- Ensure that statistics on virtual generated column are available
1518+
select count(*) from pg_stats where tablename = 'gtest32';
1519+
count
1520+
-------
1521+
5
1522+
(1 row)
1523+
15171524
-- Ensure that nullingrel bits are propagated into the generation expressions
15181525
explain (costs off)
15191526
select sum(t2.b) over (partition by t2.a),

src/test/regress/sql/generated_virtual.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,9 @@ create table gtest32 (
824824
insert into gtest32 values (1), (2);
825825
analyze gtest32;
826826

827+
-- Ensure that statistics on virtual generated column are available
828+
select count(*) from pg_stats where tablename = 'gtest32';
829+
827830
-- Ensure that nullingrel bits are propagated into the generation expressions
828831
explain (costs off)
829832
select sum(t2.b) over (partition by t2.a),

0 commit comments

Comments
 (0)