diff options
46 files changed, 11586 insertions, 0 deletions
diff --git a/contrib/Makefile b/contrib/Makefile index 2f0a88d3f7..dd04c20acd 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -34,6 +34,7 @@ SUBDIRS = \ pg_freespacemap \ pg_logicalinspect \ pg_overexplain \ + pg_plan_advice \ pg_prewarm \ pg_stat_statements \ pg_surgery \ diff --git a/contrib/meson.build b/contrib/meson.build index ed30ee7d63..cb718dbdac 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -48,6 +48,7 @@ subdir('pgcrypto') subdir('pg_freespacemap') subdir('pg_logicalinspect') subdir('pg_overexplain') +subdir('pg_plan_advice') subdir('pg_prewarm') subdir('pgrowlocks') subdir('pg_stat_statements') diff --git a/contrib/pg_plan_advice/.gitignore b/contrib/pg_plan_advice/.gitignore new file mode 100644 index 0000000000..19a1425301 --- /dev/null +++ b/contrib/pg_plan_advice/.gitignore @@ -0,0 +1,3 @@ +/pgpa_parser.h +/pgpa_parser.c +/pgpa_scanner.c diff --git a/contrib/pg_plan_advice/Makefile b/contrib/pg_plan_advice/Makefile new file mode 100644 index 0000000000..1d4c559aed --- /dev/null +++ b/contrib/pg_plan_advice/Makefile @@ -0,0 +1,50 @@ +# contrib/pg_plan_advice/Makefile + +MODULE_big = pg_plan_advice +OBJS = \ + $(WIN32RES) \ + pg_plan_advice.o \ + pgpa_ast.o \ + pgpa_collector.o \ + pgpa_identifier.o \ + pgpa_join.o \ + pgpa_output.o \ + pgpa_parser.o \ + pgpa_planner.o \ + pgpa_scan.o \ + pgpa_scanner.o \ + pgpa_trove.o \ + pgpa_walker.o + +EXTENSION = pg_plan_advice +DATA = pg_plan_advice--1.0.sql +PGFILEDESC = "pg_plan_advice - help the planner get the right plan" + +REGRESS = gather join_order join_strategy partitionwise scan +TAP_TESTS = 1 + +EXTRA_CLEAN = pgpa_parser.h pgpa_parser.c pgpa_scanner.c + +# required for 001_regress.pl +REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX) +export REGRESS_SHLIB + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_plan_advice +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +# See notes in src/backend/parser/Makefile about the following two rules +pgpa_parser.h: pgpa_parser.c + touch $@ + +pgpa_parser.c: BISONFLAGS += -d + +# Force these dependencies to be known even without dependency info built: +pgpa_parser.o pgpa_scanner.o: pgpa_parser.h diff --git a/contrib/pg_plan_advice/README b/contrib/pg_plan_advice/README new file mode 100644 index 0000000000..7cd3abddb9 --- /dev/null +++ b/contrib/pg_plan_advice/README @@ -0,0 +1,271 @@ +contrib/pg_plan_advice/README + +Plan Advice +=========== + +This module implements a mini-language for "plan advice" that allows for +control of certain key planner decisions. Goals include (1) enforcing plan +stability (my previous plan was good and I would like to keep getting a +similar one) and (2) allowing users to experiment with plans other than +the one preferred by the optimizer. Non-goals include (1) controlling +every possible planner decision and (2) forcing consideration of plans +that the optimizer rejects for reasons other than cost. (There is some +room for bikeshedding about what exactly this non-goal means: what if +we skip path generation entirely for a certain case on the theory that +we know it cannot win on cost? Does that count as a cost-based rejection +even though no cost was ever computed?) + +Generally, plan advice is a series of whitespace-separated advice items, +each of which applies an advice tag to a list of advice targets. For +example, "SEQ_SCAN(foo) HASH_JOIN(bar@ss)" contains two items of advice, +the first of which applies the SEQ_SCAN tag to "foo" and the second of +which applies the HASH_JOIN tag to "bar@ss". In this simple example, each +target identifies a single relation; see "Relation Identifiers", below. +Advice tags can also be applied to groups of relations; for example, +"HASH_JOIN(baz (bletch quux))" applies the HASH_JOIN tag to the single +relation identifier "baz" as well as to the 2-item list containing +"bletch" and "quux". + +Critically, this module knows both how to generate plan advice from an +already-existing plan, and also how to enforce it during future planning +cycles. Everything it does is intended to be "round-trip safe": if you +generate advice from a plan and then feed that back into a future planing +cycle, each piece of advice should be guaranteed to apply to the exactly the +same part of the query from which it was generated without ambiguity or +guesswork, and it should succesfully enforce the same planning decision that +led to it being generated in the first place. Note that there is no +intention that these guarantees hold in the presence of intervening DDL; +e.g. if you change the properties of a function so that a subquery is no +longer inlined, or if you drop an index named in the plan advice, the advice +isn't going to work any more. That's expected. + +This module aims to force the planner to follow any provided advice without +regard to whether it is appears to be good advice or bad advice. If the +user provides bad advice, whether derived from a previously-generated plan +or manually written, they may get a bad plan. We regard this as user error, +not a defect in this module. It seems likely that applying advice +judiciously and only when truly required to avoid problems will be a more +successful strategy than applying it with a broad brush, but users are free +to experiment with whatever strategies they think best. + +Relation Identifiers +==================== + +Uniquely identifying the part of a query to which a certain piece of +advice applies is harder than it sounds. Our basic approach is to use +relation aliases as a starting point, and then disambiguate. There are +three ways that same relation alias can occur multiple times: + +1. It can appear in more than one subquery. + +2. It can appear more than once in the same subquery, + e.g. (foo JOIN bar) x JOIN foo. + +3. The table can be partitioned. + +Any combination of these things can occur simultaneously. Therefore, our +general syntax for a relation identifier is: + +alias_name#occurrence_number/partition_schema.partition_name@plan_name + +All components except for the alias_name are optional and included only +when required. When a component is omitted, the associated punctuation +must also be omitted. Occurrence numbers are counted ignoring children of +partitioned tables. When the generated occurrence number is 1, we omit +the occurrence number. The partition schema and partition name are included +only for children of partitioned tables. In generated advice, the +partition_schema is always included whenever there is a partition_name, +but user-written advice may mention the name and omit the schema. The +plan_name is omitted for the top-level PlannerInfo. + +Scan Advice +=========== + +For many types of scan, no advice is generated or possible; for instance, +a subquery is always scanned using a subquery scan. While that scan may be +elided via setrefs processing, this doesn't change the fact that only one +basic approach exists. Hence, scan advice applies mostly to relations, which +can be scanned in multiple ways. + +We tend to think of a scan as targeting a single relation, and that's +normally the case, but it doesn't have to be. For instance, if a join is +proven empty, the whole thing may be replaced with a single Result node +which, in effect, is a degenerate scan of every relation in the collapsed +portion of the join tree. Similarly, it's possible to inject a custom scan +in such a way that it replaces an entire join. If we ever emit advice +for these cases, it would target sets of relation identifiers surrounded +by curly brances, e.g. SOME_SORT_OF_SCAN(foo (bar baz)) would mean that the +the given scan type would be used for foo as a single relation and also the +combination of bar and baz as a join product. We have no such cases at +present. + +For index and index-only scans, both the relation being scanned and the +index or indexes being used must be specified. For example, INDEX_SCAN(foo +foo_a_idx bar bar_b_idx) indicates that an index scan (not an index-only +scan) should be used on foo_a_idx when scanning foo, and that an index scan +should be used on bar_b_idx when scanning bar. + +Bitmap heap scans allow for a more complicated index specification. For +example, BITMAP_HEAP_SCAN(foo &&(foo_a_idx ||(foo_b_idx foo_c_idx))) says +that foo should be scanned using a BitmapHeapScan over a BitmapAnd between +foo_a_idx and the result of a BitmapOr between foo_b_idx and foo_c_idx. + +XXX: Currently, BITMAP_HEAP_SCAN does not enforce the index specification, +because the available hooks are insufficient to do so. It's possible that +this should be changed to exclude the index specification altogether and +simply insist that some sort of bitmap heap scan is used; alternatively, +we need better hooks. + +Join Order Advice +================= + +The JOIN_ORDER tag specifies the order in which several tables that are +part of the same join problem should be joined. Each subquery (except for +those that are inlined) is a separate join problem. Within a subquery, +partitionwise joins can create additional, separate join problems. Hence, +queries involving partitionwise joins may use JOIN_ORDER() many times. + +We take the canonical join structure to be an outer-deep tree, so +JOIN_ORDER(t1 t2 t3) says that t1 is the driving table and should be joined +first to t2 and then to t3. If the join problem involves additional tables, +they can be joined in any order after the join between t1, t2, and t3 has +been constructured. Generated join advice always mentions all tables +in the join problem, but manually written join advice need not do so. + +For trees which are not outer-deep, parentheses can be used. For example, +JOIN_ORDER(t1 (t2 t3)) says that the top-level join should have t1 on the +outer side and a join between t2 and t3 on the inner side. That join should +be constructed so that t2 is on the outer side and t3 is on the inner side. + +In some cases, it's not possible to fully specify the join order in this way. +For example, if t2 and t3 are being scanned by a single custom scan or foreign +scan, or if a partitionwise join is being performed between those tables, then +it's impossible to say that t2 is the outer table and t3 is the inner table, +or the other way around; it's just undefined. In such cases, we generate +join advice that uses curly braces, intending to indicate a lack of ordering: +JOIN_ORDER(t1 {t2 t3}) says that the uppermost join should have t1 on the outer +side and some kind of join between t2 and t3 on the inner side, but without +saying how that join must be performed or anything about which relation should +appear on which side of the join, or even whether this kind of join has sides. + +Join Strategy Advice +==================== + +Tags such as NESTED_LOOP_PLAIN specify the method that should be used to +perform a certain join. More specifically, NESTED_LOOP_PLAIN(x (y z)) says +that the plan should put the relation whose identifier is "x" on the inner +side of a plain nested loop (one without materialization or memoization) +and that it should also put a join between the relation whose identifier is +"y" and the relation whose identifier is "z" on the inner side of a nested +loop. Hence, for an N-table join problem, there will be N-1 pieces of join +strategy advice; no join strategy advice is required for the outermost +table in the join problem. + +Considering that we have both join order advice and join strategy advice, +it might seem natural to say that NESTED_LOOP_PLAIN(x) should be redefined +to mean that x should appear by itself on one side or the other of a nested +loop, rather than specifically on the inner side, but this definition appears +useless in practice. It gives the planner too much freedom to do things that +bear little resemblance to what the user probably had in mind. This makes +only a limited amount of practical difference in the case of a merge join or +unparameterized nested loop, but for a parameterized nested loop or a hash +join, the two sides are treated very differently and saying that a certain +relation should be involved in one of those operations without saying which +role it should take isn't saying much. + +This choice of definition implies that join strategy advice also imposes some +join order constraints. For example, given a join between foo and bar, +HASH_JOIN(bar) implies that foo is the driving table. Otherwise, it would +be impossible to put bar beneath the inner side of a Hash Join. + +Note that, given this definition, it's reasonable to consider deleting the +join order advice but applying the join strategy advice. For example, +consider a star schema with tables fact, dim1, dim2, dim3, dim4, and dim5. +The automatically generated advice might specify JOIN_ORDER(fact dim1 dim3 +dim4 dim2 dim5) HASH_JOIN(dim2 dim4) NESTED_LOOP_PLAIN(dim1 dim3 dim5). +Deleting the JOIN_ORDER advice allows the planner to reorder the joins +however it likes while still forcing the same choice of join method. This +seems potentially useful, and is one reason why a unified syntax that controls +both join order and join method in a single locution was not chosen. + +Advice Completeness +=================== + +An essential guiding principle is that no inference may made on the basis +of the absence of advice. The user is entitled to remove any portion of the +generated advice which they deem unsuitable or counterproductive and the +result should only be to increase the flexibility afforded to the planner. +This means that if advice can say that a certain optimization or technique +should be used, it should also be able to say that the optimization or +technique should not be used. We should never assume that the absence of an +instruction to do a certain thing means that it should not be done; all +instructions must be explicit. + +Semijoin Uniqueness +=================== + +Faced with a semijoin, the planner considers both a direct implementation +and a plan where the one side is made unique and then an inner join is +performed. We emit SEMIJOIN_UNIQUE() advice when this transformation occurs +and SEMIJOIN_NON_UNIQUE() advice when it doesn't. These items work like +join strategy advice: the inner side of the relevant join is named, and the +chosen join order must be compatible with the advice having some effect. + +XXX: Right semijoins haven't been properly thought through. The associated +code probably just doesn't work. + +XXX: Semijoin uniqueness advice has no automated tests and need substantially +more manual testing. + +Partitionwise +============= + +PARTITIONWISE() advise can be used to specify both those partitionwise joins +which should be performed and those which should not be performed; the idea +is that each argument to PARTITIONWISE specifies a set of relations that +should be scanned partitionwise after being joined to each other and nothing +else. Hence, for example, PARTITIONWISE((t1 t2) t3) specifies that the +query should contain a partitionwise join between t1 and t2 and that t3 +should not be part of any partitionwise join. If there are no other rels +in the query, specifying just PARTITIONWISE((t1 t2)) would have the same +effect, since there would be no other rels to which t3 could be joined in +a partitionwise fashion. + +Parallel Query (Gather, etc.) +============================= + +Each argument to GATHER() or GATHER_MERGE() is a single relation or an +exact set of relations on top of which a Gather or Gather Merge node, +respectively, should be placed. Each argument to NO_GATHER() is a single +relation that should not appear beneath any Gather or Gather Merge node; +that is, parallelism should not be used. + +Implicit Join Order Constraints +=============================== + +When JOIN_ORDER() advice is not provided for a particular join problem, +other pieces of advice may still incidentally constraint the join order. +For example, a user who specifies HASH_JOIN((foo bar)) is explicitly saying +that there should be a hash join with exactly foo and bar on the outer +side of it, but that also implies that foo and bar must be joined to +each other before either of them is joined to anything else. Otherwise, +the join the user is attempting to constraint won't actually occur in the +query, which ends up looking like the system has just decided to ignore +the advice altogether. + +Future Work +=========== + +We don't handle choice of aggregation: it would be nice to be able to force +sorted or grouped aggregation. I'm guessing this can be left to future work. + +More seriously, we don't know anything about eager aggregation, which could +have a large impact on the shape of the plan tree. XXX: This needs some study +to determine how large a problem it is, and might need to be fixed sooner +rather than later. + +We don't offer any control over estimates, only outcomes. It seems like a +good idea to incorporate that ability at some future point, as pg_hint_plan +does. However, since primary goal of the initial development work is to be +able to induce the planner to recreate a desired plan that worked well in +the past, this has not been included in the initial development effort. diff --git a/contrib/pg_plan_advice/expected/gather.out b/contrib/pg_plan_advice/expected/gather.out new file mode 100644 index 0000000000..bd4f4c24e2 --- /dev/null +++ b/contrib/pg_plan_advice/expected/gather.out @@ -0,0 +1,344 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 1; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SET debug_parallel_query = off; +CREATE TABLE gt_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE gt_dim; +CREATE TABLE gt_fact ( + id int not null, + dim_id integer not null references gt_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO gt_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE gt_fact; +-- By default, we expect Gather Merge with a parallel hash join. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER_MERGE((f d)) +(14 rows) + +-- Force Gather or Gather Merge of both relations together. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE((f d)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER_MERGE((f d)) +(16 rows) + +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Sort + Sort Key: f.dim_id + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER((f d)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER((f d)) +(16 rows) + +COMMIT; +-- Force a separate Gather or Gather Merge operation for each relation. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Seq Scan on gt_fact f + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: d.id + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE(f) /* matched */ + GATHER_MERGE(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER_MERGE(f d) +(20 rows) + +SET LOCAL pg_plan_advice.advice = 'gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Sort + Sort Key: f.dim_id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_fact f + -> Sort + Sort Key: d.id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER(f) /* matched */ + GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER(f d) +(20 rows) + +SET LOCAL pg_plan_advice.advice = 'gather((d d/d.d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Seq Scan on gt_fact f + -> Index Scan using gt_dim_pkey on gt_dim d + Supplied Plan Advice: + GATHER((d d/d.d)) /* partially matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + GATHER_MERGE(f) + NO_GATHER(d) +(17 rows) + +COMMIT; +-- Force a Gather or Gather Merge on one relation but no parallelism on other. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Seq Scan on gt_fact f + -> Index Scan using gt_dim_pkey on gt_dim d + Supplied Plan Advice: + GATHER_MERGE(f) /* matched */ + NO_GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + GATHER_MERGE(f) + NO_GATHER(d) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Sort + Sort Key: f.dim_id + -> Seq Scan on gt_fact f + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: d.id + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE(d) /* matched */ + NO_GATHER(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER_MERGE(d) + NO_GATHER(f) +(19 rows) + +SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using gt_dim_pkey on gt_dim d + -> Sort + Sort Key: f.dim_id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_fact f + Supplied Plan Advice: + GATHER(f) /* matched */ + NO_GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + GATHER(f) + NO_GATHER(d) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Sort + Sort Key: f.dim_id + -> Seq Scan on gt_fact f + -> Sort + Sort Key: d.id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER(d) /* matched */ + NO_GATHER(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER(d) + NO_GATHER(f) +(19 rows) + +COMMIT; +-- Force no Gather or Gather Merge use at all. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'no_gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------ + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using gt_dim_pkey on gt_dim d + -> Sort + Sort Key: f.dim_id + -> Seq Scan on gt_fact f + Supplied Plan Advice: + NO_GATHER(f) /* matched */ + NO_GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + NO_GATHER(f d) +(15 rows) + +COMMIT; +-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------------------- + Gather + Disabled: true + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE((f d)) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER((f d)) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------------------- + Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER((f d)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER((f d)) +(14 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/join_order.out b/contrib/pg_plan_advice/expected/join_order.out new file mode 100644 index 0000000000..e87652370c --- /dev/null +++ b/contrib/pg_plan_advice/expected/join_order.out @@ -0,0 +1,292 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,100) g; +VACUUM ANALYZE jo_dim1; +CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim2 (id, dim2, val2) + SELECT g, 'some filler text ' || g, (g % 7) + 1 + FROM generate_series(1,1000) g; +VACUUM ANALYZE jo_dim2; +CREATE TABLE jo_fact ( + id int primary key, + dim1_id integer not null references jo_dim1 (id), + dim2_id integer not null references jo_dim2 (id) +) WITH (autovacuum_enabled = false); +INSERT INTO jo_fact + SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE jo_fact; +-- We expect to join to d2 first and then d1, since the condition on d2 +-- is more selective. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + HASH_JOIN(d2 d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(f d1 d2) +(16 rows) + +-- Force a few different join orders. Some of these are very inefficient, +-- but the planner considers them all viable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(f d1 d2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d1 d2) + HASH_JOIN(d1 d2) + SEQ_SCAN(f d1 d2) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + Supplied Plan Advice: + JOIN_ORDER(f d2 d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + HASH_JOIN(d2 d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +----------------------------------------- + Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Hash Join + Hash Cond: (d1.id = f.dim1_id) + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(d1 f d2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d1 f d2) + HASH_JOIN(f d2) + SEQ_SCAN(d1 f d2) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------------------ + Hash Join + Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id)) + -> Seq Scan on jo_fact f + -> Hash + -> Nested Loop + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Materialize + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(f (d1 d2)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f (d1 d2)) + NESTED_LOOP_MATERIALIZE(d2) + HASH_JOIN(d2) + SEQ_SCAN(f d1 d2) + NO_GATHER(f d1 d2) +(18 rows) + +COMMIT; +-- The unusual formulation of this query is intended to prevent the query +-- planner from reducing the FULL JOIN to some other join type, so that we +-- can test what happens with a join type that cannot be reordered. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Materialize + -> Seq Scan on jo_dim1 d1 + Generated Plan Advice: + JOIN_ORDER(d2 f d1) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(d2 f d1) + NO_GATHER(d1 f d2) +(18 rows) + +-- We should not be able to force the planner to join f to d1 first, because +-- that is not a valid join order, but we should be able to force the planner +-- to make either d2 or f the driving table. +BEGIN; +-- XXX: The advice feedback says 'partially matched' here which isn't exactly +-- wrong given the way that flag is handled in the code, but it's at the very +-- least confusing. Something should probably be improved here. +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Disabled: true + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Disabled: true + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(f d1 d2) /* partially matched */ + Generated Plan Advice: + JOIN_ORDER(d2 f d1) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_PLAIN(d1) + SEQ_SCAN(d2 f d1) + NO_GATHER(d1 f d2) +(21 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0))) + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Materialize + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(f d2 d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + MERGE_JOIN_PLAIN(d2) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(d1 f d2) +(20 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Materialize + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(d2 f d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d2 f d1) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(d2 f d1) + NO_GATHER(d1 f d2) +(20 rows) + +COMMIT; +-- XXX: add tests for join order prefix matching +-- XXX: join_order(justonerel) shouldn't report partially matched diff --git a/contrib/pg_plan_advice/expected/join_strategy.out b/contrib/pg_plan_advice/expected/join_strategy.out new file mode 100644 index 0000000000..71ee26a337 --- /dev/null +++ b/contrib/pg_plan_advice/expected/join_strategy.out @@ -0,0 +1,297 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +CREATE TABLE join_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE join_dim; +CREATE TABLE join_fact ( + id int primary key, + dim_id integer not null references join_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO join_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +CREATE INDEX join_fact_dim_id ON join_fact (dim_id); +VACUUM ANALYZE join_fact; +-- We expect a hash join by default. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Hash + -> Seq Scan on join_dim d + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + NO_GATHER(f d) +(10 rows) + +-- Try forcing each join method in turn with join_dim as the inner table. +-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will +-- fail, because the planner knows that join_dim (id) is unique, and will +-- refuse to add mark/restore overhead. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Hash + -> Seq Scan on join_dim d + Supplied Plan Advice: + HASH_JOIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Disabled: true + Merge Cond: (f.dim_id = d.id) + -> Index Scan using join_fact_dim_id on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Supplied Plan Advice: + MERGE_JOIN_MATERIALIZE(d) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Index Scan using join_fact_dim_id on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Supplied Plan Advice: + MERGE_JOIN_PLAIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey) + NO_GATHER(f d) +(11 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------- + Nested Loop + Join Filter: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Materialize + -> Seq Scan on join_dim d + Supplied Plan Advice: + NESTED_LOOP_MATERIALIZE(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_MATERIALIZE(d) + SEQ_SCAN(f d) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + -> Seq Scan on join_fact f + -> Memoize + Cache Key: f.dim_id + Cache Mode: logical + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + NESTED_LOOP_MEMOIZE(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_MEMOIZE(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------- + Nested Loop + -> Seq Scan on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + NESTED_LOOP_PLAIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(12 rows) + +COMMIT; +-- Now try forcing each join method in turn with join_fact as the inner +-- table. All of these should work. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------- + Hash Join + Hash Cond: (d.id = f.dim_id) + -> Seq Scan on join_dim d + -> Hash + -> Seq Scan on join_fact f + Supplied Plan Advice: + HASH_JOIN(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + HASH_JOIN(f) + SEQ_SCAN(d f) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using join_dim_pkey on join_dim d + -> Materialize + -> Index Scan using join_fact_dim_id on join_fact f + Supplied Plan Advice: + MERGE_JOIN_MATERIALIZE(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_MATERIALIZE(f) + INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using join_dim_pkey on join_dim d + -> Index Scan using join_fact_dim_id on join_fact f + Supplied Plan Advice: + MERGE_JOIN_PLAIN(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id) + NO_GATHER(f d) +(11 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------- + Nested Loop + Join Filter: (f.dim_id = d.id) + -> Seq Scan on join_dim d + -> Materialize + -> Seq Scan on join_fact f + Supplied Plan Advice: + NESTED_LOOP_MATERIALIZE(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + NESTED_LOOP_MATERIALIZE(f) + SEQ_SCAN(d f) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------------------------- + Nested Loop + -> Seq Scan on join_dim d + -> Memoize + Cache Key: d.id + Cache Mode: logical + -> Index Scan using join_fact_dim_id on join_fact f + Index Cond: (dim_id = d.id) + Supplied Plan Advice: + NESTED_LOOP_MEMOIZE(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + NESTED_LOOP_MEMOIZE(f) + SEQ_SCAN(d) + INDEX_SCAN(f public.join_fact_dim_id) + NO_GATHER(f d) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------------------- + Nested Loop + -> Seq Scan on join_dim d + -> Index Scan using join_fact_dim_id on join_fact f + Index Cond: (dim_id = d.id) + Supplied Plan Advice: + NESTED_LOOP_PLAIN(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + NESTED_LOOP_PLAIN(f) + SEQ_SCAN(d) + INDEX_SCAN(f public.join_fact_dim_id) + NO_GATHER(f d) +(12 rows) + +COMMIT; +-- We can't force a foreign join between these tables, because they +-- aren't foreign tables. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------- + Nested Loop + Disabled: true + -> Seq Scan on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + FOREIGN_JOIN((f d)) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(13 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/local_collector.out b/contrib/pg_plan_advice/expected/local_collector.out new file mode 100644 index 0000000000..56f554bf23 --- /dev/null +++ b/contrib/pg_plan_advice/expected/local_collector.out @@ -0,0 +1,65 @@ +CREATE EXTENSION pg_plan_advice; +SET debug_parallel_query = off; +-- Try clearing advice before we've collected any. +SELECT pg_clear_collected_local_advice(); + pg_clear_collected_local_advice +--------------------------------- + +(1 row) + +-- Set a small advice collection limit so that we'll exceed it. +SET pg_plan_advice.local_collection_limit = 2; +-- Set up a dummy table. +CREATE TABLE dummy_table (a int primary key, b text) + WITH (autovacuum_enabled = false, parallel_workers = 0); +-- Test queries. +SELECT * FROM dummy_table a, dummy_table b; + a | b | a | b +---+---+---+--- +(0 rows) + +SELECT * FROM dummy_table; + a | b +---+--- +(0 rows) + +-- Should return the advice from the second test query. +SELECT advice FROM pg_get_collected_local_advice() ORDER BY id DESC LIMIT 1; + advice +------------------------ + SEQ_SCAN(dummy_table) + + NO_GATHER(dummy_table) +(1 row) + +-- Now try clearing advice again. +SELECT pg_clear_collected_local_advice(); + pg_clear_collected_local_advice +--------------------------------- + +(1 row) + +-- Raise the collection limit so that the collector uses multiple chunks. +SET pg_plan_advice.local_collection_limit = 2000; +-- Push a bunch of queries through the collector. +DO $$ +BEGIN + FOR x IN 1..2000 LOOP + EXECUTE 'SELECT * FROM dummy_table'; + END LOOP; +END +$$; +-- Check that the collector worked. +SELECT COUNT(*) FROM pg_get_collected_local_advice(); + count +------- + 2000 +(1 row) + +-- And clear one more time, to verify that this doesn't cause a problem +-- even with a larger number of entries. +SELECT pg_clear_collected_local_advice(); + pg_clear_collected_local_advice +--------------------------------- + +(1 row) + diff --git a/contrib/pg_plan_advice/expected/partitionwise.out b/contrib/pg_plan_advice/expected/partitionwise.out new file mode 100644 index 0000000000..df0f05531d --- /dev/null +++ b/contrib/pg_plan_advice/expected/partitionwise.out @@ -0,0 +1,243 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET enable_partitionwise_join = true; +CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int) + PARTITION BY RANGE (id); +CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt1; +CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int) + PARTITION BY RANGE (id); +CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt2 (id, dim2, val2) + SELECT g, 'some other text ' || g, (g % 5) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt2; +CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int) + PARTITION BY RANGE (id); +CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt3 (id, dim3, val3) + SELECT g, 'a third random text ' || g, (g % 7) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt3; +CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int) + PARTITION BY RANGE (id); +CREATE TABLE ptmismatcha PARTITION OF ptmismatch + FOR VALUES FROM (1) to (1501) + WITH (autovacuum_enabled = false); +CREATE TABLE ptmismatchb PARTITION OF ptmismatch + FOR VALUES FROM (1501) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO ptmismatch (id, dimm, valm) + SELECT g, 'yet another text ' || g, (g % 2) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE ptmismatch; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_1.id = pt3_1.id) + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Index Scan using pt1a_pkey on pt1a pt1_1 + Index Cond: (id = pt2_1.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_2.id = pt3_2.id) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Index Scan using pt1b_pkey on pt1b pt1_2 + Index Cond: (id = pt2_2.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_3.id = pt3_3.id) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + -> Index Scan using pt1c_pkey on pt1c pt1_3 + Index Cond: (id = pt2_3.id) + Filter: (val1 = 1) + Generated Plan Advice: + JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a) + JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b) + JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c) + NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) + SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b + pt2/public.pt2c pt3/public.pt3c) + INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey + pt1/public.pt1c public.pt1c_pkey) + PARTITIONWISE((pt1 pt2 pt3)) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(47 rows) + +-- Suppress partitionwise join, or do it just partially. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Hash Join + Hash Cond: (pt1.id = pt2.id) + -> Append + -> Seq Scan on pt1a pt1_1 + Filter: (val1 = 1) + -> Seq Scan on pt1b pt1_2 + Filter: (val1 = 1) + -> Seq Scan on pt1c pt1_3 + Filter: (val1 = 1) + -> Hash + -> Hash Join + Hash Cond: (pt2.id = pt3.id) + -> Append + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Append + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + Supplied Plan Advice: + PARTITIONWISE(pt1) /* matched */ + PARTITIONWISE(pt2) /* matched */ + PARTITIONWISE(pt3) /* matched */ + Generated Plan Advice: + JOIN_ORDER(pt1 (pt2 pt3)) + HASH_JOIN(pt3 pt3) + SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b + pt3/public.pt3c) + PARTITIONWISE(pt1 pt2 pt3) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(40 rows) + +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Hash Join + Hash Cond: (pt1.id = pt3.id) + -> Append + -> Hash Join + Hash Cond: (pt1_1.id = pt2_1.id) + -> Seq Scan on pt1a pt1_1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Hash Join + Hash Cond: (pt1_2.id = pt2_2.id) + -> Seq Scan on pt1b pt1_2 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash Join + Hash Cond: (pt1_3.id = pt2_3.id) + -> Seq Scan on pt1c pt1_3 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Append + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + Supplied Plan Advice: + PARTITIONWISE((pt1 pt2)) /* matched */ + PARTITIONWISE(pt3) /* matched */ + Generated Plan Advice: + JOIN_ORDER(pt1/public.pt1a pt2/public.pt2a) + JOIN_ORDER(pt1/public.pt1b pt2/public.pt2b) + JOIN_ORDER(pt1/public.pt1c pt2/public.pt2c) + JOIN_ORDER({pt1 pt2} pt3) + HASH_JOIN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3) + SEQ_SCAN(pt1/public.pt1a pt2/public.pt2a pt1/public.pt1b pt2/public.pt2b + pt1/public.pt1c pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b + pt3/public.pt3c) + PARTITIONWISE((pt1 pt2) pt3) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(47 rows) + +COMMIT; +-- Can't force a partitionwise join with a mismatched table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop + Disabled: true + -> Append + -> Seq Scan on pt1a pt1_1 + -> Seq Scan on pt1b pt1_2 + -> Seq Scan on pt1c pt1_3 + -> Append + -> Index Scan using ptmismatcha_pkey on ptmismatcha ptmismatch_1 + Index Cond: (id = pt1.id) + -> Index Scan using ptmismatchb_pkey on ptmismatchb ptmismatch_2 + Index Cond: (id = pt1.id) + Supplied Plan Advice: + PARTITIONWISE((pt1 ptmismatch)) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(pt1 ptmismatch) + NESTED_LOOP_PLAIN(ptmismatch) + SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + INDEX_SCAN(ptmismatch/public.ptmismatcha public.ptmismatcha_pkey + ptmismatch/public.ptmismatchb public.ptmismatchb_pkey) + PARTITIONWISE(pt1 ptmismatch) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c + ptmismatch/public.ptmismatcha ptmismatch/public.ptmismatchb) +(22 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/scan.out b/contrib/pg_plan_advice/expected/scan.out new file mode 100644 index 0000000000..a80de78d82 --- /dev/null +++ b/contrib/pg_plan_advice/expected/scan.out @@ -0,0 +1,757 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET seq_page_cost = 0.1; +SET random_page_cost = 0.1; +SET cpu_tuple_cost = 0; +SET cpu_index_tuple_cost = 0; +CREATE TABLE scan_table (a int primary key, b text) + WITH (autovacuum_enabled = false); +INSERT INTO scan_table + SELECT g, 'some text ' || g FROM generate_series(1, 100000) g; +CREATE INDEX scan_table_b ON scan_table USING brin (b); +VACUUM ANALYZE scan_table; +-- Sequential scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +------------------------- + Seq Scan on scan_table + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(4 rows) + +-- Index scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(5 rows) + +-- Index-only scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------ + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(5 rows) + +-- Bitmap heap scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + QUERY PLAN +---------------------------------------------------- + Bitmap Heap Scan on scan_table + Recheck Cond: (b > 'some text 8'::text) + -> Bitmap Index Scan on scan_table_b + Index Cond: (b > 'some text 8'::text) + Generated Plan Advice: + BITMAP_HEAP_SCAN(scan_table public.scan_table_b) + NO_GATHER(scan_table) +(7 rows) + +-- TID scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + QUERY PLAN +----------------------------------- + Tid Scan on scan_table + TID Cond: (ctid = '(0,1)'::tid) + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(5 rows) + +-- TID range scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + QUERY PLAN +--------------------------------------------------------------- + Tid Range Scan on scan_table + TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid)) + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(5 rows) + +-- Try forcing each of our test queries to use the scan type they +-- wanted to use anyway. This should succeed. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(6 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + QUERY PLAN +----------------------------------------------------------- + Bitmap Heap Scan on scan_table + Recheck Cond: (b > 'some text 8'::text) + -> Bitmap Index Scan on scan_table_b + Index Cond: (b > 'some text 8'::text) + Supplied Plan Advice: + BITMAP_HEAP_SCAN(scan_table scan_table_b) /* matched */ + Generated Plan Advice: + BITMAP_HEAP_SCAN(scan_table public.scan_table_b) + NO_GATHER(scan_table) +(9 rows) + +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + QUERY PLAN +-------------------------------------- + Tid Scan on scan_table + TID Cond: (ctid = '(0,1)'::tid) + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched */ + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + QUERY PLAN +--------------------------------------------------------------- + Tid Range Scan on scan_table + TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid)) + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched */ + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Try to force a full scan of the table to use some other scan type. All +-- of these will fail. An index scan or bitmap heap scan could potentially +-- generate the correct answer, but the planner does not even consider these +-- possibilities due to the lack of a WHERE clause. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + BITMAP_HEAP_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +---------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Try again to force index use. This should now succeed for the INDEX_SCAN +-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the +-- query fetches columns not included in the index. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a > 0) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Filter: (a > 0) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(8 rows) + +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; + QUERY PLAN +-------------------------------------------------------------- + Bitmap Heap Scan on scan_table + Recheck Cond: (a > 0) + -> Bitmap Index Scan on scan_table_pkey + Index Cond: (a > 0) + Supplied Plan Advice: + BITMAP_HEAP_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + BITMAP_HEAP_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(9 rows) + +COMMIT; +-- We can force a primary key lookup to use a sequential scan, but we +-- can't force it to use an index-only scan (due to the column list) +-- or a TID scan (due to the absence of a TID qual). +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Disabled: true + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Disabled: true + Index Cond: (a = 1) + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- We can forcibly downgrade an index-only scan to an index scan, but we can't +-- force the use of an index that the planner thinks is inapplicable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Filter: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_b) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- We can force the use of a sequential scan in place of a bitmap heap scan, +-- but a plain index scan on a BRIN index is not possible. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Filter: (b > 'some text 8'::text) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Filter: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_b) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- We can force the use of a sequential scan rather than a TID scan or +-- TID range scan. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Filter: (ctid = '(0,1)'::tid) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + QUERY PLAN +------------------------------------------------------------- + Seq Scan on scan_table + Filter: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid)) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Test more complex scenarios with index scans. +BEGIN; +-- Should still work if we mention the schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +-- But not if we mention the wrong schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +----------------------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table cilbup.scan_table_pkey) /* matched, inapplicable */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +-- It's OK to repeat the same advice. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +-- But it doesn't work if the index target is even notionally different. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +---------------------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched, conflicting */ + INDEX_SCAN(scan_table public.scan_table_pkey) /* matched, conflicting */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- Test assorted incorrect advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------ + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(nothing) /* not matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------ + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(nothing whatsoever) /* not matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table bogus) /* matched, inapplicable, failed */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(nothing whatsoever) /* not matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +----------------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table bogus) /* matched, inapplicable */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Test our ability to refer to multiple instances of the same alias. +BEGIN; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +------------------------------------------------------------------- + Nested Loop Left Join + -> Nested Loop Left Join + -> Function Scan on generate_series g + -> Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = g.g) + -> Index Scan using scan_table_pkey on scan_table s_1 + Index Cond: (a = g.g) + Generated Plan Advice: + JOIN_ORDER(g s s#2) + NESTED_LOOP_PLAIN(s s#2) + INDEX_SCAN(s public.scan_table_pkey s#2 public.scan_table_pkey) + NO_GATHER(s s#2) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Left Join + -> Hash Left Join + Hash Cond: (g.g = s.a) + -> Function Scan on generate_series g + -> Hash + -> Seq Scan on scan_table s + -> Index Scan using scan_table_pkey on scan_table s_1 + Index Cond: (a = g.g) + Supplied Plan Advice: + SEQ_SCAN(s) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g s s#2) + NESTED_LOOP_PLAIN(s#2) + HASH_JOIN(s) + SEQ_SCAN(s) + INDEX_SCAN(s#2 public.scan_table_pkey) + NO_GATHER(s s#2) +(17 rows) + +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +-------------------------------------------------------------- + Hash Left Join + Hash Cond: (g.g = s_1.a) + -> Nested Loop Left Join + -> Function Scan on generate_series g + -> Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = g.g) + -> Hash + -> Seq Scan on scan_table s_1 + Supplied Plan Advice: + SEQ_SCAN(s#2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g s s#2) + NESTED_LOOP_PLAIN(s) + HASH_JOIN(s#2) + SEQ_SCAN(s#2) + INDEX_SCAN(s public.scan_table_pkey) + NO_GATHER(s s#2) +(17 rows) + +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +------------------------------------------------ + Hash Left Join + Hash Cond: (g.g = s_1.a) + -> Hash Left Join + Hash Cond: (g.g = s.a) + -> Function Scan on generate_series g + -> Hash + -> Seq Scan on scan_table s + -> Hash + -> Seq Scan on scan_table s_1 + Supplied Plan Advice: + SEQ_SCAN(s) /* matched */ + SEQ_SCAN(s#2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g s s#2) + HASH_JOIN(s s#2) + SEQ_SCAN(s s#2) + NO_GATHER(s s#2) +(17 rows) + +COMMIT; +-- Test our ability to refer to scans within a subquery. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +-------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_SCAN(s@x public.scan_table_pkey) + NO_GATHER(s@x) +(5 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +--------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey) + NO_GATHER(s@unnamed_subquery) +(5 rows) + +BEGIN; +-- Should not match. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +-------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@x public.scan_table_pkey) + NO_GATHER(s@x) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +--------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey) + NO_GATHER(s@unnamed_subquery) +(7 rows) + +-- Should match first query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +------------------------------- + Seq Scan on scan_table s + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@x) /* matched */ + Generated Plan Advice: + SEQ_SCAN(s@x) + NO_GATHER(s@x) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +--------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@x) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey) + NO_GATHER(s@unnamed_subquery) +(7 rows) + +-- Should match second query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +-------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@unnamed_subquery) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@x public.scan_table_pkey) + NO_GATHER(s@x) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +---------------------------------------------- + Seq Scan on scan_table s + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@unnamed_subquery) /* matched */ + Generated Plan Advice: + SEQ_SCAN(s@unnamed_subquery) + NO_GATHER(s@unnamed_subquery) +(7 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/syntax.out b/contrib/pg_plan_advice/expected/syntax.out new file mode 100644 index 0000000000..eec0298089 --- /dev/null +++ b/contrib/pg_plan_advice/expected/syntax.out @@ -0,0 +1,162 @@ +LOAD 'pg_plan_advice'; +-- An empty string is allowed, and so is an empty target list. +SET pg_plan_advice.advice = ''; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) +(1 row) + +SET pg_plan_advice.advice = 'SEQ_SCAN()'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: +(2 rows) + +-- Test assorted variations in capitalization, whitespace, and which parts of +-- the relation identifier are included. These should all work. +SET pg_plan_advice.advice = 'SEQ_SCAN(x)'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + SEQ_SCAN(x) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'seq_scan(x@y)'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + SEQ_SCAN(x@y) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'SEQ_scan(x#2)'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + SEQ_SCAN(x#2) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + SEQ_SCAN(x/y) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = ' SEQ_SCAN ( x / y . z ) '; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + SEQ_SCAN(x/y.z) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + SEQ_SCAN(x#2/y.z@t) /* not matched */ +(3 rows) + +-- Syntax errors. +SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQUENTIAL_SCAN(x)" +DETAIL: Could not parse advice: syntax error at or near "SEQUENTIAL_SCAN" +SET pg_plan_advice.advice = 'SEQ_SCAN'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN" +DETAIL: Could not parse advice: syntax error at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN('; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(" +DETAIL: Could not parse advice: syntax error at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN("'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("" +DETAIL: Could not parse advice: unterminated quoted identifier at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN("")'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("")" +DETAIL: Could not parse advice: zero-length delimited identifier at or near """ +SET pg_plan_advice.advice = 'SEQ_SCAN("a"'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("a"" +DETAIL: Could not parse advice: syntax error at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN(#'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(#" +DETAIL: Could not parse advice: syntax error at or near "#" +SET pg_plan_advice.advice = '()'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "()" +DETAIL: Could not parse advice: syntax error at or near "(" +SET pg_plan_advice.advice = '123'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "123" +DETAIL: Could not parse advice: syntax error at or near "123" +-- Legal comments. +SET pg_plan_advice.advice = '/**/'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) +(1 row) + +SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + HASH_JOIN(_) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + HASH_JOIN(y) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) + Supplied Plan Advice: + HASH_JOIN(y/z) /* not matched */ +(3 rows) + +-- Unterminated comments. +SET pg_plan_advice.advice = '/*'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "/*" +DETAIL: Could not parse advice: unterminated comment at end of input +SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER("fOO") /* oops" +DETAIL: Could not parse advice: unterminated comment at end of input +-- Nested comments are not supported, so the first of these is legal and +-- the second is not. +SET pg_plan_advice.advice = '/*/*/'; +EXPLAIN SELECT 1; + QUERY PLAN +------------------------------------------ + Result (cost=0.00..0.01 rows=1 width=4) +(1 row) + +SET pg_plan_advice.advice = '/*/* stuff */*/'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "/*/* stuff */*/" +DETAIL: Could not parse advice: syntax error at or near "*" +-- Foreign join requires multiple relation identifiers. +SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN(a)" +DETAIL: Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")" +SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN((a))" +DETAIL: Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")" diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build new file mode 100644 index 0000000000..3452e5ad48 --- /dev/null +++ b/contrib/pg_plan_advice/meson.build @@ -0,0 +1,70 @@ +# Copyright (c) 2022-2024, PostgreSQL Global Development Group + +pg_plan_advice_sources = files( + 'pg_plan_advice.c', + 'pgpa_ast.c', + 'pgpa_collector.c', + 'pgpa_identifier.c', + 'pgpa_join.c', + 'pgpa_output.c', + 'pgpa_planner.c', + 'pgpa_scan.c', + 'pgpa_trove.c', + 'pgpa_walker.c', +) + +pgpa_scanner = custom_target('pgpa_scanner', + input: 'pgpa_scanner.l', + output: 'pgpa_scanner.c', + command: flex_cmd, +) +generated_sources += pgpa_scanner +pg_plan_advice_sources += pgpa_scanner + +pgpa_parser = custom_target('pgpa_parser', + input: 'pgpa_parser.y', + kwargs: bison_kw, +) +generated_sources += pgpa_parser.to_list() +pg_plan_advice_sources += pgpa_parser + +if host_system == 'windows' + pg_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'pg_plan_advice', + '--FILEDESC', 'pg_plan_advice - help the planner get the right plan',]) +endif + +pg_plan_advice = shared_module('pg_plan_advice', + pg_plan_advice_sources, + include_directories: include_directories('.'), + kwargs: contrib_mod_args, +) +contrib_targets += pg_plan_advice + +install_data( + 'pg_plan_advice--1.0.sql', + 'pg_plan_advice.control', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'pg_plan_advice', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'gather', + 'join_order', + 'join_strategy', + 'local_collector', + 'partitionwise', + 'scan', + 'syntax', + ], + }, + 'tap': { + 'tests': [ + 't/001_regress.pl', + ], + }, +} diff --git a/contrib/pg_plan_advice/pg_plan_advice--1.0.sql b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql new file mode 100644 index 0000000000..29f4f22486 --- /dev/null +++ b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql @@ -0,0 +1,42 @@ +/* contrib/pg_plan_advice/pg_plan_advice--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_plan_advice" to load this file. \quit + +CREATE FUNCTION pg_clear_collected_local_advice() +RETURNS void +AS 'MODULE_PATHNAME', 'pg_clear_collected_local_advice' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_clear_collected_shared_advice() +RETURNS void +AS 'MODULE_PATHNAME', 'pg_clear_collected_shared_advice' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_get_collected_local_advice( + OUT id bigint, + OUT userid oid, + OUT dbid oid, + OUT queryid bigint, + OUT collection_time timestamptz, + OUT query text, + OUT advice text +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_collected_local_advice' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_get_collected_shared_advice( + OUT id bigint, + OUT userid oid, + OUT dbid oid, + OUT queryid bigint, + OUT collection_time timestamptz, + OUT query text, + OUT advice text +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_collected_shared_advice' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION pg_get_collected_shared_advice() FROM PUBLIC; diff --git a/contrib/pg_plan_advice/pg_plan_advice.c b/contrib/pg_plan_advice/pg_plan_advice.c new file mode 100644 index 0000000000..865931c960 --- /dev/null +++ b/contrib/pg_plan_advice/pg_plan_advice.c @@ -0,0 +1,455 @@ +/*------------------------------------------------------------------------- + * + * pg_plan_advice.c + * main entrypoints for generating and applying planner advice + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pg_plan_advice.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pg_plan_advice.h" +#include "pgpa_ast.h" +#include "pgpa_collector.h" +#include "pgpa_identifier.h" +#include "pgpa_output.h" +#include "pgpa_planner.h" +#include "pgpa_trove.h" +#include "pgpa_walker.h" + +#include "commands/defrem.h" +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "commands/explain_state.h" +#include "funcapi.h" +#include "optimizer/planner.h" +#include "storage/dsm_registry.h" +#include "utils/guc.h" + +PG_MODULE_MAGIC; + +static pgpa_shared_state *pgpa_state = NULL; +static dsa_area *pgpa_dsa_area = NULL; + +/* GUC variables */ +char *pg_plan_advice_advice = NULL; +static bool pg_plan_advice_always_explain_supplied_advice = true; +int pg_plan_advice_local_collection_limit = 0; +int pg_plan_advice_shared_collection_limit = 0; + +/* Saved hook value */ +static explain_per_plan_hook_type prev_explain_per_plan = NULL; + +/* Other file-level globals */ +static int es_extension_id; +static MemoryContext pgpa_memory_context = NULL; + +static void pg_plan_advice_explain_option_handler(ExplainState *es, + DefElem *opt, + ParseState *pstate); +static void pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt, + IntoClause *into, + ExplainState *es, + const char *queryString, + ParamListInfo params, + QueryEnvironment *queryEnv); +static bool pg_plan_advice_advice_check_hook(char **newval, void **extra, + GucSource source); +static DefElem *find_defelem_by_defname(List *deflist, char *defname); + +/* + * Initialize this module. + */ +void +_PG_init(void) +{ + DefineCustomStringVariable("pg_plan_advice.advice", + "advice to apply during query planning", + NULL, + &pg_plan_advice_advice, + NULL, + PGC_USERSET, + 0, + pg_plan_advice_advice_check_hook, + NULL, + NULL); + + DefineCustomBoolVariable("pg_plan_advice.always_explain_supplied_advice", + "EXPLAIN output includes supplied advice even without EXPLAIN (PLAN_ADVICE)", + NULL, + &pg_plan_advice_always_explain_supplied_advice, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("pg_plan_advice.local_collection_limit", + "# of advice entries to retain in per-backend memory", + NULL, + &pg_plan_advice_local_collection_limit, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("pg_plan_advice.shared_collection_limit", + "# of advice entries to retain in shared memory", + NULL, + &pg_plan_advice_shared_collection_limit, + 0, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); + + MarkGUCPrefixReserved("pg_plan_advice"); + + /* Get an ID that we can use to cache data in an ExplainState. */ + es_extension_id = GetExplainExtensionId("pg_plan_advice"); + + /* Register the new EXPLAIN options implemented by this module. */ + RegisterExtensionExplainOption("plan_advice", + pg_plan_advice_explain_option_handler); + + /* Install hooks */ + pgpa_planner_install_hooks(); + prev_explain_per_plan = explain_per_plan_hook; + explain_per_plan_hook = pg_plan_advice_explain_per_plan_hook; +} + +/* + * Initialize shared state when first created. + */ +static void +pgpa_init_shared_state(void *ptr) +{ + pgpa_shared_state *state = (pgpa_shared_state *) ptr; + + LWLockInitialize(&state->lock, LWLockNewTrancheId("pg_plan_advice_lock")); + state->dsa_tranche = LWLockNewTrancheId("pg_plan_advice_dsa"); + state->area = DSA_HANDLE_INVALID; + state->shared_collector = InvalidDsaPointer; +} + +/* + * Return a pointer to a memory context where long-lived data managed by this + * module can be stored. + */ +MemoryContext +pg_plan_advice_get_mcxt(void) +{ + if (pgpa_memory_context == NULL) + pgpa_memory_context = AllocSetContextCreate(TopMemoryContext, + "pg_plan_advice", + ALLOCSET_DEFAULT_SIZES); + + return pgpa_memory_context; +} + +/* + * Get a pointer to our shared state. + * + * If no shared state exists, create and initialize it. If it does exist but + * this backend has not yet accessed it, attach to it. Otherwise, just return + * our cached pointer. + * + * Along the way, make sure the relevant LWLock tranches are registered. + */ +pgpa_shared_state * +pg_plan_advice_attach(void) +{ + if (pgpa_state == NULL) + { + bool found; + + pgpa_state = + GetNamedDSMSegment("pg_plan_advice", sizeof(pgpa_shared_state), + pgpa_init_shared_state, &found); + } + + return pgpa_state; +} + +/* + * Return a pointer to pg_plan_advice's DSA area, creating it if needed. + */ +dsa_area * +pg_plan_advice_dsa_area(void) +{ + if (pgpa_dsa_area == NULL) + { + pgpa_shared_state *state = pg_plan_advice_attach(); + dsa_handle area_handle; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt()); + + LWLockAcquire(&state->lock, LW_EXCLUSIVE); + area_handle = state->area; + if (area_handle == DSA_HANDLE_INVALID) + { + pgpa_dsa_area = dsa_create(state->dsa_tranche); + dsa_pin(pgpa_dsa_area); + state->area = dsa_get_handle(pgpa_dsa_area); + LWLockRelease(&state->lock); + } + else + { + LWLockRelease(&state->lock); + pgpa_dsa_area = dsa_attach(area_handle); + } + + dsa_pin_mapping(pgpa_dsa_area); + + MemoryContextSwitchTo(oldcontext); + } + + return pgpa_dsa_area; +} + +/* + * Was the PLAN_ADVICE option specified and not set to false? + */ +bool +pg_plan_advice_should_explain(ExplainState *es) +{ + bool *plan_advice = NULL; + + if (es != NULL) + plan_advice = GetExplainExtensionState(es, es_extension_id); + return plan_advice != NULL && *plan_advice; +} + +/* + * Handler for EXPLAIN (PLAN_ADVICE). + */ +static void +pg_plan_advice_explain_option_handler(ExplainState *es, DefElem *opt, + ParseState *pstate) +{ + bool *plan_advice; + + plan_advice = GetExplainExtensionState(es, es_extension_id); + + if (plan_advice == NULL) + { + plan_advice = palloc0_object(bool); + SetExplainExtensionState(es, es_extension_id, plan_advice); + } + + *plan_advice = defGetBoolean(opt); +} + +/* + * Display a string that is likely to consist of multiple lines in EXPLAIN + * output. + */ +static void +pg_plan_advice_explain_text_multiline(ExplainState *es, char *qlabel, + char *value) +{ + char *s; + + /* For non-text formats, it's best not to add any special handling. */ + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText(qlabel, value, es); + return; + } + + /* In text format, if there is no data, display nothing. */ + if (*qlabel == '\0') + return; + + /* + * It looks nicest to indent each line of the advice separately, beginning + * on the line below the label. + */ + ExplainIndentText(es); + appendStringInfo(es->str, "%s:\n", qlabel); + es->indent++; + while ((s = strchr(value, '\n')) != NULL) + { + ExplainIndentText(es); + appendBinaryStringInfo(es->str, value, (s - value) + 1); + value = s + 1; + } + + /* Don't interpret a terminal newline as a request for an empty line. */ + if (*value != '\0') + { + ExplainIndentText(es); + appendStringInfo(es->str, "%s\n", value); + } + + es->indent--; +} + +/* + * Add advice feedback to the EXPLAIN output. + */ +static void +pg_plan_advice_explain_feedback(ExplainState *es, List *feedback) +{ + StringInfoData buf; + + initStringInfo(&buf); + foreach_node(DefElem, item, feedback) + { + int flags = defGetInt32(item); + + appendStringInfo(&buf, "%s /* ", item->defname); + if ((flags & PGPA_TE_MATCH_FULL) != 0) + { + Assert((flags & PGPA_TE_MATCH_PARTIAL) != 0); + appendStringInfo(&buf, "matched"); + } + else if ((flags & PGPA_TE_MATCH_PARTIAL) != 0) + appendStringInfo(&buf, "partially matched"); + else + appendStringInfo(&buf, "not matched"); + if ((flags & PGPA_TE_INAPPLICABLE) != 0) + appendStringInfo(&buf, ", inapplicable"); + if ((flags & PGPA_TE_CONFLICTING) != 0) + appendStringInfo(&buf, ", conflicting"); + if ((flags & PGPA_TE_FAILED) != 0) + appendStringInfo(&buf, ", failed"); + appendStringInfo(&buf, " */\n"); + } + + pg_plan_advice_explain_text_multiline(es, "Supplied Plan Advice", + buf.data); +} + +/* + * Add relevant details, if any, to the EXPLAIN output for a single plan. + */ +static void +pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt, + IntoClause *into, + ExplainState *es, + const char *queryString, + ParamListInfo params, + QueryEnvironment *queryEnv) +{ + bool should_explain; + DefElem *pgpa_item; + List *pgpa_list; + + if (prev_explain_per_plan) + prev_explain_per_plan(plannedstmt, into, es, queryString, params, + queryEnv); + + /* Should an advice string be part of the EXPLAIN output? */ + should_explain = pg_plan_advice_should_explain(es); + + /* Find any data pgpa_planner_shutdown stashed in the PlannedStmt. */ + pgpa_item = find_defelem_by_defname(plannedstmt->extension_state, + "pg_plan_advice"); + pgpa_list = pgpa_item == NULL ? NULL : (List *) pgpa_item->arg; + + /* + * By default, if there is a record of attempting to apply advice during + * query planning, we always output that information, but the user can set + * pg_plan_advice.always_explain_supplied_advice = false to suppress that + * behavior. If they do, we'll only display it when the PLAN_ADVICE option + * was specified and not set to false. + * + * NB: If we're explaining a query planned beforehand -- i.e. a prepared + * statement -- the application of query advice may not have been + * recorded, and therefore this won't be able to show anything. + */ + if (pgpa_list != NULL && (pg_plan_advice_always_explain_supplied_advice || + should_explain)) + { + DefElem *feedback; + + feedback = find_defelem_by_defname(pgpa_list, "feedback"); + if (feedback != NULL) + pg_plan_advice_explain_feedback(es, (List *) feedback->arg); + } + + /* + * If the PLAN_ADVICE option was specified -- and not sent to FALSE -- + * show generated advice. + */ + if (should_explain) + { + DefElem *advice_string_item; + char *advice_string = NULL; + + advice_string_item = + find_defelem_by_defname(pgpa_list, "advice_string"); + if (advice_string_item != NULL) + { + /* Advice has already been generated; we can reuse it. */ + advice_string = strVal(advice_string_item->arg); + } + if (advice_string != NULL && advice_string[0] != '\0') + pg_plan_advice_explain_text_multiline(es, "Generated Plan Advice", + advice_string); + } +} + +/* + * Check hook for pg_plan_advice.advice + */ +static bool +pg_plan_advice_advice_check_hook(char **newval, void **extra, GucSource source) +{ + MemoryContext oldcontext; + MemoryContext tmpcontext; + char *error; + + if (*newval == NULL) + return true; + + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "pg_plan_advice.advice", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + /* + * It would be nice to save the parse tree that we construct here for + * eventual use when planning with this advice, but *extra can only point + * to a single guc_malloc'd chunk, and our parse tree involves an + * arbitrary number of memory allocations. + */ + (void) pgpa_parse(*newval, &error); + + if (error != NULL) + { + GUC_check_errdetail("Could not parse advice: %s", error); + return false; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(tmpcontext); + + return true; +} + +/* + * Search a list of DefElem objects for a given defname. + */ +static DefElem * +find_defelem_by_defname(List *deflist, char *defname) +{ + foreach_node(DefElem, item, deflist) + { + if (strcmp(item->defname, defname) == 0) + return item; + } + + return NULL; +} diff --git a/contrib/pg_plan_advice/pg_plan_advice.control b/contrib/pg_plan_advice/pg_plan_advice.control new file mode 100644 index 0000000000..aa6fdc9e7b --- /dev/null +++ b/contrib/pg_plan_advice/pg_plan_advice.control @@ -0,0 +1,5 @@ +# pg_plan_advice extension +comment = 'help the planner get the right plan' +default_version = '1.0' +module_pathname = '$libdir/pg_plan_advice' +relocatable = true diff --git a/contrib/pg_plan_advice/pg_plan_advice.h b/contrib/pg_plan_advice/pg_plan_advice.h new file mode 100644 index 0000000000..02e65a3382 --- /dev/null +++ b/contrib/pg_plan_advice/pg_plan_advice.h @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------- + * + * pg_plan_advice.h + * main header file for pg_plan_advice contrib module + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pg_plan_advice.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PLAN_ADVICE_H +#define PG_PLAN_ADVICE_H + +#include "commands/explain_state.h" +#include "nodes/plannodes.h" +#include "storage/lwlock.h" +#include "utils/dsa.h" + +typedef struct pgpa_shared_state +{ + LWLock lock; + int dsa_tranche; + dsa_handle area; + dsa_pointer shared_collector; +} pgpa_shared_state; + +/* GUC variables */ +extern int pg_plan_advice_local_collection_limit; +extern int pg_plan_advice_shared_collection_limit; +extern char *pg_plan_advice_advice; + +/* Function prototypes */ +extern MemoryContext pg_plan_advice_get_mcxt(void); +extern pgpa_shared_state *pg_plan_advice_attach(void); +extern dsa_area *pg_plan_advice_dsa_area(void); +extern bool pg_plan_advice_should_explain(ExplainState *es); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_ast.c b/contrib/pg_plan_advice/pgpa_ast.c new file mode 100644 index 0000000000..5f822bce03 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_ast.c @@ -0,0 +1,392 @@ +/*------------------------------------------------------------------------- + * + * pgpa_ast.c + * additional supporting code related to plan advice parsing + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_ast.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_ast.h" + +#include "funcapi.h" +#include "utils/array.h" +#include "utils/builtins.h" + +static bool pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids, + pgpa_advice_target *target, + bool *rids_used); + +/* + * Get a C string that corresponds to the specified advice tag. + */ +char * +pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag) +{ + switch (advice_tag) + { + case PGPA_TAG_BITMAP_HEAP_SCAN: + return "BITMAP_HEAP_SCAN"; + case PGPA_TAG_FOREIGN_JOIN: + return "FOREIGN_JOIN"; + case PGPA_TAG_GATHER: + return "GATHER"; + case PGPA_TAG_GATHER_MERGE: + return "GATHER_MERGE"; + case PGPA_TAG_HASH_JOIN: + return "HASH_JOIN"; + case PGPA_TAG_INDEX_ONLY_SCAN: + return "INDEX_ONLY_SCAN"; + case PGPA_TAG_INDEX_SCAN: + return "INDEX_SCAN"; + case PGPA_TAG_JOIN_ORDER: + return "JOIN_ORDER"; + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + return "MERGE_JOIN_MATERIALIZE"; + case PGPA_TAG_MERGE_JOIN_PLAIN: + return "MERGE_JOIN_PLAIN"; + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + return "NESTED_LOOP_MATERIALIZE"; + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + return "NESTED_LOOP_MEMOIZE"; + case PGPA_TAG_NESTED_LOOP_PLAIN: + return "NESTED_LOOP_PLAIN"; + case PGPA_TAG_NO_GATHER: + return "NO_GATHER"; + case PGPA_TAG_PARTITIONWISE: + return "PARTITIONWISE"; + case PGPA_TAG_SEMIJOIN_NON_UNIQUE: + return "SEMIJOIN_NON_UNIQUE"; + case PGPA_TAG_SEMIJOIN_UNIQUE: + return "SEMIJOIN_UNIQUE"; + case PGPA_TAG_SEQ_SCAN: + return "SEQ_SCAN"; + case PGPA_TAG_TID_SCAN: + return "TID_SCAN"; + } + + pg_unreachable(); + return NULL; +} + +/* + * Convert an advice tag, formatted as a string that has already been + * downcased as appropriate, to a pgpa_advice_tag_type. + * + * If we succeed, set *fail = false and return the result; if we fail, + * set *fail = true and reurn an arbitrary value. + */ +pgpa_advice_tag_type +pgpa_parse_advice_tag(const char *tag, bool *fail) +{ + *fail = false; + + switch (tag[0]) + { + case 'b': + if (strcmp(tag, "bitmap_heap_scan") == 0) + return PGPA_TAG_BITMAP_HEAP_SCAN; + break; + case 'f': + if (strcmp(tag, "foreign_join") == 0) + return PGPA_TAG_FOREIGN_JOIN; + break; + case 'g': + if (strcmp(tag, "gather") == 0) + return PGPA_TAG_GATHER; + if (strcmp(tag, "gather_merge") == 0) + return PGPA_TAG_GATHER_MERGE; + break; + case 'h': + if (strcmp(tag, "hash_join") == 0) + return PGPA_TAG_HASH_JOIN; + break; + case 'i': + if (strcmp(tag, "index_scan") == 0) + return PGPA_TAG_INDEX_SCAN; + if (strcmp(tag, "index_only_scan") == 0) + return PGPA_TAG_INDEX_ONLY_SCAN; + break; + case 'j': + if (strcmp(tag, "join_order") == 0) + return PGPA_TAG_JOIN_ORDER; + break; + case 'm': + if (strcmp(tag, "merge_join_materialize") == 0) + return PGPA_TAG_MERGE_JOIN_MATERIALIZE; + if (strcmp(tag, "merge_join_plain") == 0) + return PGPA_TAG_MERGE_JOIN_PLAIN; + break; + case 'n': + if (strcmp(tag, "nested_loop_materialize") == 0) + return PGPA_TAG_NESTED_LOOP_MATERIALIZE; + if (strcmp(tag, "nested_loop_memoize") == 0) + return PGPA_TAG_NESTED_LOOP_MEMOIZE; + if (strcmp(tag, "nested_loop_plain") == 0) + return PGPA_TAG_NESTED_LOOP_PLAIN; + if (strcmp(tag, "no_gather") == 0) + return PGPA_TAG_NO_GATHER; + break; + case 'p': + if (strcmp(tag, "partitionwise") == 0) + return PGPA_TAG_PARTITIONWISE; + break; + case 's': + if (strcmp(tag, "semijoin_non_unique") == 0) + return PGPA_TAG_SEMIJOIN_NON_UNIQUE; + if (strcmp(tag, "semijoin_unique") == 0) + return PGPA_TAG_SEMIJOIN_UNIQUE; + if (strcmp(tag, "seq_scan") == 0) + return PGPA_TAG_SEQ_SCAN; + break; + case 't': + if (strcmp(tag, "tid_scan") == 0) + return PGPA_TAG_TID_SCAN; + break; + } + + /* didn't work out */ + *fail = true; + + /* return an arbitrary value to unwind the call stack */ + return PGPA_TAG_SEQ_SCAN; +} + +/* + * Format a pgpa_advice_target as a string and append result to a StringInfo. + */ +void +pgpa_format_advice_target(StringInfo str, pgpa_advice_target *target) +{ + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + bool first = true; + char *delims; + + if (target->ttype == PGPA_TARGET_UNORDERED_LIST) + delims = "{}"; + else + delims = "()"; + + appendStringInfoChar(str, delims[0]); + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + if (first) + first = false; + else + appendStringInfoChar(str, ' '); + pgpa_format_advice_target(str, child_target); + } + appendStringInfoChar(str, delims[1]); + } + else + { + const char *rt_identifier; + + rt_identifier = pgpa_identifier_string(&target->rid); + appendStringInfoString(str, rt_identifier); + } +} + +/* + * Format a pgpa_index_target as a string and append result to a StringInfo. + */ +void +pgpa_format_index_target(StringInfo str, pgpa_index_target *itarget) +{ + if (itarget->itype != PGPA_INDEX_NAME) + { + bool first = true; + + if (itarget->itype == PGPA_INDEX_AND) + appendStringInfoString(str, "&&("); + else + appendStringInfoString(str, "||("); + + foreach_ptr(pgpa_index_target, child_target, itarget->children) + { + if (first) + first = false; + else + appendStringInfoChar(str, ' '); + pgpa_format_index_target(str, child_target); + } + appendStringInfoChar(str, ')'); + } + else + { + if (itarget->indnamespace != NULL) + appendStringInfo(str, "%s.", + quote_identifier(itarget->indnamespace)); + appendStringInfoString(str, quote_identifier(itarget->indname)); + } +} + +/* + * Determine whether two pgpa_index_target objects are exactly identical. + */ +bool +pgpa_index_targets_equal(pgpa_index_target *i1, pgpa_index_target *i2) +{ + if (i1->itype != i2->itype) + return false; + + if (i1->itype == PGPA_INDEX_NAME) + { + /* indnamespace can be NULL, and two NULL values are equal */ + if ((i1->indnamespace != NULL || i2->indnamespace != NULL) && + (i1->indnamespace == NULL || i2->indnamespace == NULL || + strcmp(i1->indnamespace, i2->indnamespace) != 0)) + return false; + if (strcmp(i1->indname, i2->indname) != 0) + return false; + } + else + { + int i1_length = list_length(i1->children); + + if (i1_length != list_length(i2->children)) + return false; + for (int n = 0; n < i1_length; ++n) + { + pgpa_index_target *c1 = list_nth(i1->children, n); + pgpa_index_target *c2 = list_nth(i2->children, n); + + if (!pgpa_index_targets_equal(c1, c2)) + return false; + } + } + + return true; +} + +/* + * Check whether an identifier matches an any part of an advice target. + */ +bool +pgpa_identifier_matches_target(pgpa_identifier *rid, pgpa_advice_target *target) +{ + /* For non-identifiers, check all descendents. */ + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + if (pgpa_identifier_matches_target(rid, child_target)) + return true; + } + return false; + } + + /* + * If a relation identifer mentions a partition name, it should also specify + * a partition schema. + */ + Assert(rid->partnsp != NULL || rid->partrel == NULL); + + /* Straightforward comparisons of alias name and occcurrence number. */ + if (strcmp(rid->alias_name, target->rid.alias_name) != 0) + return false; + if (rid->occurrence != target->rid.occurrence) + return false; + + /* + * These fields can be NULL on either side, but NULL only matches another + * NULL. + */ + if (!strings_equal_or_both_null(rid->partnsp, target->rid.partnsp)) + return false; + if (!strings_equal_or_both_null(rid->partrel, target->rid.partrel)) + return false; + if (!strings_equal_or_both_null(rid->plan_name, target->rid.plan_name)) + return false; + + return true; +} + +/* + * Match identifiers to advice targets and return an enum value indicating + * the relationship between the set of keys and the set of targets. + * + * See the comments for pgpa_itm_type. + */ +pgpa_itm_type +pgpa_identifiers_match_target(int nrids, pgpa_identifier *rids, + pgpa_advice_target *target) +{ + bool all_rids_used = true; + bool any_rids_used = false; + bool all_targets_used; + bool *rids_used = palloc0_array(bool, nrids); + + all_targets_used = + pgpa_identifiers_cover_target(nrids, rids, target, rids_used); + + for (int i = 0; i < nrids; ++i) + { + if (rids_used[i]) + any_rids_used = true; + else + all_rids_used = false; + } + + if (all_rids_used) + { + if (all_targets_used) + return PGPA_ITM_EQUAL; + else + return PGPA_ITM_KEYS_ARE_SUBSET; + } + else + { + if (all_targets_used) + return PGPA_ITM_TARGETS_ARE_SUBSET; + else if (any_rids_used) + return PGPA_ITM_INTERSECTING; + else + return PGPA_ITM_DISJOINT; + } +} + +/* + * Returns true if every target or sub-target is matched by at least one + * identifier, and otherwise false. + * + * Also sets rids_used[i] = true for each idenifier that matches at least one + * target. + */ +static bool +pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids, + pgpa_advice_target *target, bool *rids_used) +{ + bool result = false; + + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + result = true; + + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + if (!pgpa_identifiers_cover_target(nrids, rids, child_target, + rids_used)) + result = false; + } + } + else + { + for (int i = 0; i < nrids; ++i) + { + if (pgpa_identifier_matches_target(&rids[i], target)) + { + rids_used[i] = true; + result = true; + } + } + } + + return result; +} diff --git a/contrib/pg_plan_advice/pgpa_ast.h b/contrib/pg_plan_advice/pgpa_ast.h new file mode 100644 index 0000000000..f6fe730a4d --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_ast.h @@ -0,0 +1,204 @@ +/*------------------------------------------------------------------------- + * + * pgpa_ast.h + * abstract syntax trees for plan advice, plus parser/scanner support + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_ast.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_AST_H +#define PGPA_AST_H + +#include "pgpa_identifier.h" + +#include "nodes/pg_list.h" + +/* + * Advice items generally take the form SOME_TAG(item [...]), where an item + * can take various forms. The simplest case is a relation identifier, but + * some tags allow sublists, and JOIN_ORDER() allows both ordered and unordered + * sublists. + */ +typedef enum +{ + PGPA_TARGET_IDENTIFIER, /* relation identifier */ + PGPA_TARGET_ORDERED_LIST, /* (item ...) */ + PGPA_TARGET_UNORDERED_LIST /* {item ...} */ +} pgpa_target_type; + +/* + * When an advice item describes a bitmap index scan, it may need to describe + * the use of multiple indexes. + */ +typedef enum +{ + PGPA_INDEX_NAME, /* index schema + name */ + PGPA_INDEX_AND, /* &&(item ...) */ + PGPA_INDEX_OR /* ||(item ...) */ +} pgpa_index_type; + +/* + * An index specification. We use this for INDEX_SCAN, INDEX_ONLY_SCAN, + * and BITMAP_HEAP_SCAN advice, but in the former two cases, the target must + * be of type PGPA_INDEX_NAME. + */ +typedef struct pgpa_index_target +{ + pgpa_index_type itype; + + /* Index schem and name, when itype == PGPA_INDEX_NAME */ + char *indnamespace; + char *indname; + + /* List of pgpa_index_target objects, when itype != PGPA_INDEX_NAME */ + List *children; +} pgpa_index_target; + +/* + * A single item about which advice is being given, which could be either + * a relation identifier that we want to break out into its constituent fields, + * or a sublist of some kind. + */ +typedef struct pgpa_advice_target +{ + pgpa_target_type ttype; + + /* + * This field is meaningful when ttype is PGPA_TARGET_IDENTIFIER. + * + * All identifiers must have an alias name and an occurrence number; the + * remaining fields can be NULL. Note that it's possible to specify a + * partition name without a partition schema, but not the reverse. + */ + pgpa_identifier rid; + + /* + * This field is set when ttype is PPGA_TARGET_IDENTIFIER and the advice + * tag is PGPA_TAG_INDEX_SCAN, PGPA_TAG_INDEX_ONLY_SCAN, or + * PGPA_TAG_BITMAP_HEAP_SCAN. + */ + pgpa_index_target *itarget; + + /* + * When the ttype is PGPA_TARGET_<anything>_LIST, this field contains a + * list of additional pgpa_advice_target objects. Otherwise, it is unused. + */ + List *children; +} pgpa_advice_target; + +/* + * These are all the kinds of advice that we know how to parse. If a keyword + * is found at the top level, it must be in this list. + * + * If you change anything here, also update pgpa_parse_advice_tag and + * pgpa_cstring_advice_tag. + */ +typedef enum pgpa_advice_tag_type +{ + PGPA_TAG_BITMAP_HEAP_SCAN, + PGPA_TAG_FOREIGN_JOIN, + PGPA_TAG_GATHER, + PGPA_TAG_GATHER_MERGE, + PGPA_TAG_HASH_JOIN, + PGPA_TAG_INDEX_ONLY_SCAN, + PGPA_TAG_INDEX_SCAN, + PGPA_TAG_JOIN_ORDER, + PGPA_TAG_MERGE_JOIN_MATERIALIZE, + PGPA_TAG_MERGE_JOIN_PLAIN, + PGPA_TAG_NESTED_LOOP_MATERIALIZE, + PGPA_TAG_NESTED_LOOP_MEMOIZE, + PGPA_TAG_NESTED_LOOP_PLAIN, + PGPA_TAG_NO_GATHER, + PGPA_TAG_PARTITIONWISE, + PGPA_TAG_SEMIJOIN_NON_UNIQUE, + PGPA_TAG_SEMIJOIN_UNIQUE, + PGPA_TAG_SEQ_SCAN, + PGPA_TAG_TID_SCAN +} pgpa_advice_tag_type; + +/* + * An item of advice, meaning a tag and the list of all targets to which + * it is being applied. + * + * "targets" is a list of pgpa_advice_target objects. + * + * The List returned from pgpa_yyparse is list of pgpa_advice_item objects. + */ +typedef struct pgpa_advice_item +{ + pgpa_advice_tag_type tag; + List *targets; +} pgpa_advice_item; + +/* + * Result of comparing an array of pgpa_relation_identifier objects to a + * pgpa_advice_target. + * + * PGPA_ITM_EQUAL means all targets are matched by some identifier, and + * all identifiers were matched to a target. + * + * PGPA_ITM_KEYS_ARE_SUBSET means that all identifiers matched to a target, + * but there were leftover targets. Generally, this means that the advice is + * looking to apply to all of the rels we have plus some additional ones that + * we don't have. + * + * PGPA_ITM_TARGETS_ARE_SUBSET means that all targets are matched by an + * identifiers, but there were leftover identifiers. Generally, this means + * that the advice is looking to apply to some but not all of the rels we have. + * + * PGPA_ITM_INTERSECTING means that some identifeirs and targets were matched, + * but neither all identifiers nor all targets could be matched to items in + * the other set. + * + * PGPA_ITM_DISJOINT means that no matches between identifeirs and targets were + * found. + */ +typedef enum +{ + PGPA_ITM_EQUAL, + PGPA_ITM_KEYS_ARE_SUBSET, + PGPA_ITM_TARGETS_ARE_SUBSET, + PGPA_ITM_INTERSECTING, + PGPA_ITM_DISJOINT +} pgpa_itm_type; + +/* for pgpa_scanner.l and pgpa_parser.y */ +union YYSTYPE; +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; +#endif + +/* in pgpa_scanner.l */ +extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, + char **parse_error_msg_p, yyscan_t yyscanner); +extern void pgpa_yyerror(List **result, char **parse_error_msg_p, + yyscan_t yyscanner, + const char *message); +extern void pgpa_scanner_init(const char *str, yyscan_t *yyscannerp); +extern void pgpa_scanner_finish(yyscan_t yyscanner); + +/* in pgpa_parser.y */ +extern int pgpa_yyparse(List **result, char **parse_error_msg_p, + yyscan_t yyscanner); +extern List *pgpa_parse(const char *advice_string, char **error_p); + +/* in pgpa_ast.c */ +extern char *pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag); +extern bool pgpa_identifier_matches_target(pgpa_identifier *rid, + pgpa_advice_target *target); +extern pgpa_itm_type pgpa_identifiers_match_target(int nrids, + pgpa_identifier *rids, + pgpa_advice_target *target); +extern bool pgpa_index_targets_equal(pgpa_index_target *i1, + pgpa_index_target *i2); +extern pgpa_advice_tag_type pgpa_parse_advice_tag(const char *tag, bool *fail); +extern void pgpa_format_advice_target(StringInfo str, + pgpa_advice_target *target); +extern void pgpa_format_index_target(StringInfo str, + pgpa_index_target *itarget); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_collector.c b/contrib/pg_plan_advice/pgpa_collector.c new file mode 100644 index 0000000000..7a45b5031f --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_collector.c @@ -0,0 +1,637 @@ +/*------------------------------------------------------------------------- + * + * pgpa_collector.c + * collect advice into backend-local or shared memory + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_collector.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pg_plan_advice.h" +#include "pgpa_collector.h" + +#include "datatype/timestamp.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/timestamp.h" + +PG_FUNCTION_INFO_V1(pg_clear_collected_local_advice); +PG_FUNCTION_INFO_V1(pg_clear_collected_shared_advice); +PG_FUNCTION_INFO_V1(pg_get_collected_local_advice); +PG_FUNCTION_INFO_V1(pg_get_collected_shared_advice); + +#define ADVICE_CHUNK_SIZE 1024 +#define ADVICE_CHUNK_ARRAY_SIZE 64 + +#define PG_GET_ADVICE_COLUMNS 7 + +/* + * Advice extracted from one query plan, together with the query string + * and various other identifying details. + */ +typedef struct pgpa_collected_advice +{ + Oid userid; /* user OID */ + Oid dbid; /* database OID */ + uint64 queryid; /* query identifier */ + TimestampTz timestamp; /* query timestamp */ + int advice_offset; /* start of advice in textual data */ + char textual_data[FLEXIBLE_ARRAY_MEMBER]; +} pgpa_collected_advice; + +/* + * A bunch of pointers to pgpa_collected_advice objects, stored in + * backend-local memory. + */ +typedef struct pgpa_local_advice_chunk +{ + pgpa_collected_advice *entries[ADVICE_CHUNK_SIZE]; +} pgpa_local_advice_chunk; + +/* + * Information about all of the pgpa_collected_advice objects that we're + * storing in local memory. + * + * We assign consecutive IDs, starting from 0, to each pgpa_collected_advice + * object that we store. The actual storage is an array of chunks, which + * helps keep memcpy() overhead low when we start discarding older data. + */ +typedef struct pgpa_local_advice +{ + uint64 next_id; + uint64 oldest_id; + uint64 base_id; + int chunk_array_allocated_size; + pgpa_local_advice_chunk **chunks; +} pgpa_local_advice; + +/* + * Just like pgpa_local_advice_chunk, but stored in a dynamic shared area, + * so we must use dsa_pointer instead of native pointers. + */ +typedef struct pgpa_shared_advice_chunk +{ + dsa_pointer entries[ADVICE_CHUNK_SIZE]; +} pgpa_shared_advice_chunk; + +/* + * Just like pgpa_local_advice, but stored in a dynamic shared area, so + * we must use dsa_pointer instead of native pointers. + */ +typedef struct pgpa_shared_advice +{ + uint64 next_id; + uint64 oldest_id; + uint64 base_id; + int chunk_array_allocated_size; + dsa_pointer chunks; +} pgpa_shared_advice; + +/* Pointers to local and shared collectors */ +static pgpa_local_advice *local_collector = NULL; +static pgpa_shared_advice *shared_collector = NULL; + +/* Static functions */ +static pgpa_collected_advice *pgpa_make_collected_advice(Oid userid, + Oid dbid, + uint64 queryId, + TimestampTz timestamp, + const char *query_string, + const char *advice_string, + dsa_area *area, + dsa_pointer *result); +static void pgpa_store_local_advice(pgpa_collected_advice *ca); +static void pgpa_trim_local_advice(int limit); +static void pgpa_store_shared_advice(dsa_pointer ca_pointer); +static void pgpa_trim_shared_advice(dsa_area *area, int limit); + +/* Helper function to extract the query string from pgpa_collected_advice */ +static inline const char * +query_string(pgpa_collected_advice *ca) +{ + return ca->textual_data; +} + +/* Helper function to extract the advice string from pgpa_collected_advice */ +static inline const char * +advice_string(pgpa_collected_advice *ca) +{ + return ca->textual_data + ca->advice_offset; +} + +/* + * Store collected query advice into the local or shared advice collector, + * as appropriate. + */ +void +pgpa_collect_advice(uint64 queryId, const char *query_string, + const char *advice_string) +{ + Oid userid = GetUserId(); + Oid dbid = MyDatabaseId; + TimestampTz now = GetCurrentTimestamp(); + + if (pg_plan_advice_local_collection_limit > 0) + { + pgpa_collected_advice *ca; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt()); + ca = pgpa_make_collected_advice(userid, dbid, queryId, now, + query_string, advice_string, + NULL, NULL); + pgpa_store_local_advice(ca); + MemoryContextSwitchTo(oldcontext); + } + + if (pg_plan_advice_shared_collection_limit > 0) + { + dsa_area *area = pg_plan_advice_dsa_area(); + dsa_pointer ca_pointer = InvalidDsaPointer; /* placate compiler */ + + pgpa_make_collected_advice(userid, dbid, queryId, now, + query_string, advice_string, area, + &ca_pointer); + pgpa_store_shared_advice(ca_pointer); + } +} + +/* + * Allocate and fill a new pgpa_collected_advice object. + * + * If area != NULL, it is used to allocate the new object, and the resulting + * dsa_pointer is returned via *result. + * + * If area == NULL, the new object is allocated in the current memory context, + * and result is not examined or modified. + */ +static pgpa_collected_advice * +pgpa_make_collected_advice(Oid userid, Oid dbid, uint64 queryId, + TimestampTz timestamp, + const char *query_string, + const char *advice_string, + dsa_area *area, dsa_pointer *result) +{ + size_t query_string_length = strlen(query_string) + 1; + size_t advice_string_length = strlen(advice_string) + 1; + size_t total_length; + pgpa_collected_advice *ca; + + total_length = offsetof(pgpa_collected_advice, textual_data) + + query_string_length + advice_string_length; + + if (area == NULL) + ca = palloc(total_length); + else + { + *result = dsa_allocate(area, total_length); + ca = dsa_get_address(area, *result); + } + + ca->userid = GetUserId(); + ca->dbid = MyDatabaseId; + ca->queryid = queryId; + ca->timestamp = timestamp; + ca->advice_offset = query_string_length; + + memcpy(ca->textual_data, query_string, query_string_length); + memcpy(&ca->textual_data[ca->advice_offset], + advice_string, advice_string_length); + + return ca; +} + +/* + * Add a pg_collected_advice object to our backend-local advice collection. + * + * Caller is responsible for switching to the appropriate memory context; + * the provided object should have been allocated in that same context. + */ +static void +pgpa_store_local_advice(pgpa_collected_advice *ca) +{ + uint64 chunk_number; + uint64 chunk_offset; + pgpa_local_advice *la = local_collector; + + /* If the local advice collector isn't initialized yet, do that now. */ + if (la == NULL) + { + la = palloc0(sizeof(pgpa_local_advice)); + la->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE; + la->chunks = palloc0_array(pgpa_local_advice_chunk *, + la->chunk_array_allocated_size); + local_collector = la; + } + + /* Compute chunk and offset at which to store this advice. */ + chunk_number = (la->next_id - la->base_id) / ADVICE_CHUNK_SIZE; + chunk_offset = (la->next_id - la->base_id) % ADVICE_CHUNK_SIZE; + + /* Extend chunk array, if needed. */ + if (chunk_number >= la->chunk_array_allocated_size) + { + int new_size; + + new_size = la->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE; + la->chunks = repalloc0_array(la->chunks, + pgpa_local_advice_chunk *, + la->chunk_array_allocated_size, + new_size); + la->chunk_array_allocated_size = new_size; + } + + /* Allocate new chunk, if needed. */ + if (la->chunks[chunk_number] == NULL) + la->chunks[chunk_number] = palloc0_object(pgpa_local_advice_chunk); + + /* Save pointer and bump next-id counter. */ + Assert(la->chunks[chunk_number]->entries[chunk_offset] == NULL); + la->chunks[chunk_number]->entries[chunk_offset] = ca; + ++la->next_id; + + /* If we've exceeded the storage limit, discard old data. */ + pgpa_trim_local_advice(pg_plan_advice_local_collection_limit); +} + +/* + * Add a pg_collected_advice object to the shared advice collection. + * + * 'ca_pointer' should have been allocated from the pg_plan_advice DSA area + * and should point to an object of type pgpa_collected_advice. + */ +static void +pgpa_store_shared_advice(dsa_pointer ca_pointer) +{ + uint64 chunk_number; + uint64 chunk_offset; + pgpa_shared_state *state = pg_plan_advice_attach(); + dsa_area *area = pg_plan_advice_dsa_area(); + pgpa_shared_advice *sa = shared_collector; + dsa_pointer *chunk_array; + pgpa_shared_advice_chunk *chunk; + + /* Lock the shared state. */ + LWLockAcquire(&state->lock, LW_EXCLUSIVE); + + /* + * If we're not attached to the shared advice collector yet, fix that now. + * If we're the first ones to attach, we may need to create the object. + */ + if (sa == NULL) + { + if (state->shared_collector == InvalidDsaPointer) + state->shared_collector = + dsa_allocate0(area, sizeof(pgpa_shared_advice)); + shared_collector = sa = dsa_get_address(area, state->shared_collector); + } + + /* + * It's possible that some other backend may have succeeded in creating + * the main collector object but failed to allocate an initial chunk + * array, so we must be prepared to allocate the chunk array here whether + * or not we created the collector object. + */ + if (shared_collector->chunk_array_allocated_size == 0) + { + sa->chunks = + dsa_allocate0(area, + sizeof(dsa_pointer) * ADVICE_CHUNK_ARRAY_SIZE); + sa->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE; + } + + /* Compute chunk and offset at which to store this advice. */ + chunk_number = (sa->next_id - sa->base_id) / ADVICE_CHUNK_SIZE; + chunk_offset = (sa->next_id - sa->base_id) % ADVICE_CHUNK_SIZE; + + /* Get the address of the chunk array and, if needed, extend it. */ + if (chunk_number >= sa->chunk_array_allocated_size) + { + int new_size; + dsa_pointer new_chunks; + + /* + * DSA can't enlarge an existing allocation, so we must make a new + * allocation and copy data over. + */ + new_size = sa->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE; + new_chunks = dsa_allocate0(area, sizeof(dsa_pointer) * new_size); + chunk_array = dsa_get_address(area, new_chunks); + memcpy(chunk_array, dsa_get_address(area, sa->chunks), + sizeof(dsa_pointer) * sa->chunk_array_allocated_size); + dsa_free(area, sa->chunks); + sa->chunks = new_chunks; + sa->chunk_array_allocated_size = new_size; + } + else + chunk_array = dsa_get_address(area, sa->chunks); + + /* Get the address of the desired chunk, allocating it if needed. */ + if (chunk_array[chunk_number] == InvalidDsaPointer) + chunk_array[chunk_number] = + dsa_allocate0(area, sizeof(pgpa_shared_advice_chunk)); + chunk = dsa_get_address(area, chunk_array[chunk_number]); + + /* Save pointer and bump next-id counter. */ + Assert(chunk->entries[chunk_offset] == InvalidDsaPointer); + chunk->entries[chunk_offset] = ca_pointer; + ++sa->next_id; + + /* If we've exceeded the storage limit, discard old data. */ + pgpa_trim_shared_advice(area, pg_plan_advice_shared_collection_limit); + + /* Release lock on shared state. */ + LWLockRelease(&state->lock); +} + +/* + * Discard collected advice stored in backend-local memory in excess of the + * specified limit. + */ +static void +pgpa_trim_local_advice(int limit) +{ + pgpa_local_advice *la = local_collector; + uint64 current_count; + uint64 trim_count; + uint64 total_chunk_count; + uint64 trim_chunk_count; + uint64 remaining_chunk_count; + + /* If we haven't yet reached the limit, there's nothing to do. */ + current_count = la->next_id - la->oldest_id; + if (current_count <= limit) + return; + + /* Free enough entries to get us back down to the limit. */ + trim_count = current_count - limit; + while (trim_count > 0) + { + uint64 chunk_number; + uint64 chunk_offset; + + chunk_number = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE; + chunk_offset = (la->oldest_id - la->base_id) % ADVICE_CHUNK_SIZE; + + Assert(la->chunks[chunk_number]->entries[chunk_offset] != NULL); + pfree(la->chunks[chunk_number]->entries[chunk_offset]); + la->chunks[chunk_number]->entries[chunk_offset] = NULL; + ++la->oldest_id; + --trim_count; + } + + /* Free any chunks that are now entirely unused. */ + trim_chunk_count = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE; + for (uint64 n = 0; n < trim_chunk_count; ++n) + pfree(la->chunks[n]); + + /* Slide remaining chunk pointers back toward the base of the array. */ + total_chunk_count = (la->next_id - la->base_id + + ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE; + remaining_chunk_count = total_chunk_count - trim_chunk_count; + if (remaining_chunk_count > 0) + memmove(&la->chunks[0], &la->chunks[trim_chunk_count], + sizeof(pgpa_local_advice_chunk *) * remaining_chunk_count); + + /* Don't leave stale pointers around. */ + memset(&la->chunks[remaining_chunk_count], 0, + sizeof(pgpa_local_advice_chunk *) + * (total_chunk_count - remaining_chunk_count)); + + /* Adjust base ID value accordingly. */ + la->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE; +} + +/* + * Discard collected advice stored in shared memory in excess of the + * specified limit. + */ +static void +pgpa_trim_shared_advice(dsa_area *area, int limit) +{ + pgpa_shared_advice *sa = shared_collector; + uint64 current_count; + uint64 trim_count; + uint64 total_chunk_count; + uint64 trim_chunk_count; + uint64 remaining_chunk_count; + dsa_pointer *chunk_array; + + /* If we haven't yet reached the limit, there's nothing to do. */ + current_count = sa->next_id - sa->oldest_id; + if (current_count <= limit) + return; + + /* Get a pointer to the chunk array. */ + chunk_array = dsa_get_address(area, sa->chunks); + + /* Free enough entries to get us back down to the limit. */ + trim_count = current_count - limit; + while (trim_count > 0) + { + uint64 chunk_number; + uint64 chunk_offset; + pgpa_shared_advice_chunk *chunk; + + chunk_number = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE; + chunk_offset = (sa->oldest_id - sa->base_id) % ADVICE_CHUNK_SIZE; + + chunk = dsa_get_address(area, chunk_array[chunk_number]); + Assert(chunk->entries[chunk_offset] != InvalidDsaPointer); + dsa_free(area, chunk->entries[chunk_offset]); + chunk->entries[chunk_offset] = InvalidDsaPointer; + ++sa->oldest_id; + --trim_count; + } + + /* Free any chunks that are now entirely unused. */ + trim_chunk_count = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE; + for (uint64 n = 0; n < trim_chunk_count; ++n) + dsa_free(area, chunk_array[n]); + + /* Slide remaining chunk pointers back toward the base of the array. */ + total_chunk_count = (sa->next_id - sa->base_id + + ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE; + remaining_chunk_count = total_chunk_count - trim_chunk_count; + if (remaining_chunk_count > 0) + memmove(&chunk_array[0], &chunk_array[trim_chunk_count], + sizeof(dsa_pointer) * remaining_chunk_count); + + /* Don't leave stale pointers around. */ + memset(&chunk_array[remaining_chunk_count], 0, + sizeof(pgpa_shared_advice_chunk *) + * (total_chunk_count - remaining_chunk_count)); + + /* Adjust base ID value accordingly. */ + sa->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE; +} + +/* + * SQL-callable function to discard advice collected in backend-local memory + */ +Datum +pg_clear_collected_local_advice(PG_FUNCTION_ARGS) +{ + if (local_collector != NULL) + pgpa_trim_local_advice(0); + + PG_RETURN_VOID(); +} + +/* + * SQL-callable function to discard advice collected in backend-local memory + */ +Datum +pg_clear_collected_shared_advice(PG_FUNCTION_ARGS) +{ + pgpa_shared_state *state = pg_plan_advice_attach(); + dsa_area *area = pg_plan_advice_dsa_area(); + + LWLockAcquire(&state->lock, LW_EXCLUSIVE); + + /* + * If we're not attached to the shared advice collector yet, fix that now; + * but if the collector doesn't even exist, we can return without doing + * anything else. + */ + if (shared_collector == NULL) + { + if (state->shared_collector == InvalidDsaPointer) + { + LWLockRelease(&state->lock); + return (Datum) 0; + } + shared_collector = dsa_get_address(area, state->shared_collector); + } + + /* Do the real work */ + pgpa_trim_shared_advice(area, 0); + + LWLockRelease(&state->lock); + + PG_RETURN_VOID(); +} + +/* + * SQL-callable SRF to return advice collected in backend-local memory + */ +Datum +pg_get_collected_local_advice(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + pgpa_local_advice *la = local_collector; + Oid userid = GetUserId(); + + InitMaterializedSRF(fcinfo, 0); + + if (la == NULL) + return (Datum) 0; + + /* Loop over all entries. */ + for (uint64 id = la->oldest_id; id < la->next_id; ++id) + { + uint64 chunk_number; + uint64 chunk_offset; + pgpa_collected_advice *ca; + Datum values[PG_GET_ADVICE_COLUMNS]; + bool nulls[PG_GET_ADVICE_COLUMNS] = {0}; + + chunk_number = (id - la->base_id) / ADVICE_CHUNK_SIZE; + chunk_offset = (id - la->base_id) % ADVICE_CHUNK_SIZE; + + ca = la->chunks[chunk_number]->entries[chunk_offset]; + + if (!member_can_set_role(userid, ca->userid)) + continue; + + values[0] = UInt64GetDatum(id); + values[1] = ObjectIdGetDatum(ca->userid); + values[2] = ObjectIdGetDatum(ca->dbid); + values[3] = UInt64GetDatum(ca->queryid); + values[4] = TimestampGetDatum(ca->timestamp); + values[5] = CStringGetTextDatum(query_string(ca)); + values[6] = CStringGetTextDatum(advice_string(ca)); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + return (Datum) 0; +} + +/* + * SQL-callable SRF to return advice collected in shared memory + */ +Datum +pg_get_collected_shared_advice(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + pgpa_shared_state *state = pg_plan_advice_attach(); + dsa_area *area = pg_plan_advice_dsa_area(); + dsa_pointer *chunk_array; + pgpa_shared_advice *sa = shared_collector; + + InitMaterializedSRF(fcinfo, 0); + + /* Lock the shared state. */ + LWLockAcquire(&state->lock, LW_SHARED); + + /* + * If we're not attached to the shared advice collector yet, fix that now; + * but if the collector doesn't even exist, we can return without doing + * anything else. + */ + if (sa == NULL) + { + if (state->shared_collector == InvalidDsaPointer) + { + LWLockRelease(&state->lock); + return (Datum) 0; + } + shared_collector = sa = dsa_get_address(area, state->shared_collector); + } + + /* Get a pointer to the chunk array. */ + chunk_array = dsa_get_address(area, sa->chunks); + + /* Loop over all entries. */ + for (uint64 id = sa->oldest_id; id < sa->next_id; ++id) + { + uint64 chunk_number; + uint64 chunk_offset; + pgpa_shared_advice_chunk *chunk; + pgpa_collected_advice *ca; + Datum values[PG_GET_ADVICE_COLUMNS]; + bool nulls[PG_GET_ADVICE_COLUMNS] = {0}; + + chunk_number = (id - sa->base_id) / ADVICE_CHUNK_SIZE; + chunk_offset = (id - sa->base_id) % ADVICE_CHUNK_SIZE; + + chunk = dsa_get_address(area, chunk_array[chunk_number]); + ca = dsa_get_address(area, chunk->entries[chunk_offset]); + + values[0] = UInt64GetDatum(id); + values[1] = ObjectIdGetDatum(ca->userid); + values[2] = ObjectIdGetDatum(ca->dbid); + values[3] = UInt64GetDatum(ca->queryid); + values[4] = TimestampGetDatum(ca->timestamp); + values[5] = CStringGetTextDatum(query_string(ca)); + values[6] = CStringGetTextDatum(advice_string(ca)); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + /* Release lock on shared state. */ + LWLockRelease(&state->lock); + + return (Datum) 0; +} diff --git a/contrib/pg_plan_advice/pgpa_collector.h b/contrib/pg_plan_advice/pgpa_collector.h new file mode 100644 index 0000000000..b6e746a06d --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_collector.h @@ -0,0 +1,18 @@ +/*------------------------------------------------------------------------- + * + * pgpa_collector.h + * collect advice into backend-local or shared memory + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_collector.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_COLLECTOR_H +#define PGPA_COLLECTOR_H + +extern void pgpa_collect_advice(uint64 queryId, const char *query_string, + const char *advice_string); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_identifier.c b/contrib/pg_plan_advice/pgpa_identifier.c new file mode 100644 index 0000000000..a5fa77e083 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_identifier.c @@ -0,0 +1,476 @@ +/*------------------------------------------------------------------------- + * + * pgpa_identifier.c + * create appropriate identifiers for range table entries + * + * The goal of this module is to be able to produce identifiers for range + * table entries that are unique, understandable to human beings, and + * able to be reconstructed during future planning cycles. As an + * exception, we do not care about, or want to produce, identifiers for + * RTE_JOIN entries. This is because (1) we would end up with a ton of + * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have + * RTEs; and (3) we intend to refer to joins by their constituent members + * rather than by reference to the join RTE. + * + * In general, we construct identifiers of the following form: + * + * alias_name#occurrence_number/child_table_name@subquery_name + * + * However, occurrence_number is omitted when it is the first occurrence + * within the same subquery, child_table_name is omitted for relations that + * are not child tables, and subquery_name is omitted for the topmost + * query level. Whenever an item is omitted, the preceding punctuation mark + * is also omitted. Identifier-style escaping is applied to alias_name and + * subquery_name. Whenever we include child_table_name, we always + * schema-qualified name, but writing their own plan advice are not required + * to do so. Identifier-style escaping is applied to the schema and to the + * relation names separately. + * + * The upshot of all of these rules is that in simple cases, the relation + * identifier is textually identical to the alias name, making life easier + * for users. However, even in complex cases, every relation identifier + * for a given query will be unique (or at least we hope so: if not, this + * code is buggy and the identifier format might need to be rethought). + * + * A key goal of this system is that we want to be able to reconstruct the + * same identifiers during a future planning cycle for the same query, so + * that if a certain behavior is specified for a certain identifier, we can + * properly identify the RTI for which that behavior is mandated. In order + * for this to work, subquery names must be unique and known before the + * subquery is planned, and the remainder of the identifier must not depend + * on any part of the query outside of the current subquery level. In + * particular, occurrence_number must be calculated relative to the range + * table for the relevant subquery, not the final flattened range table. + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_identifier.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_identifier.h" + +#include "parser/parsetree.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + +static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable, + List *appinfos); +static int pgpa_occurrence_number(List *rtable, Index *top_rti_map, + SubPlanRTInfo *rtinfo, Index rti); + +/* + * Create a range table identifier from scratch. + * + * This function leaves the caller to do all the heavy lifting, so it's + * generally better to use one of the functions below instead. + * + * See the file header comments for more details on the format of an + * identifier. + */ +const char * +pgpa_identifier_string(const pgpa_identifier *rid) +{ + const char *result; + + Assert(rid->alias_name != NULL); + result = quote_identifier(rid->alias_name); + + Assert(rid->occurrence >= 0); + if (rid->occurrence > 1) + result = psprintf("%s#%d", result, rid->occurrence); + + if (rid->partrel != NULL) + { + if (rid->partnsp == NULL) + result = psprintf("%s/%s", result, + quote_identifier(rid->partrel)); + else + result = psprintf("%s/%s.%s", result, + quote_identifier(rid->partnsp), + quote_identifier(rid->partrel)); + } + + if (rid->plan_name != NULL) + result = psprintf("%s@%s", result, quote_identifier(rid->plan_name)); + + return result; +} + +/* + * Compute a relation identifier for a particular RTI. + * + * The caller provides root and rti, and gets the necessary details back via + * the remaining parameters. + */ +void +pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti, + pgpa_identifier *rid) +{ + Index top_rti = rti; + int occurrence = 1; + RangeTblEntry *rte; + RangeTblEntry *top_rte; + char *partnsp = NULL; + char *partrel = NULL; + + /* + * If this is a child RTE, find the topmost parent that is still of type + * RTE_RELATION. We do this because we identify children of partitioned + * tables by the name of the child table, but subqueries can also have + * child rels and we don't care about those here. + */ + for (;;) + { + AppendRelInfo *appinfo; + RangeTblEntry *parent_rte; + + /* append_rel_array can be NULL if there are no children */ + if (root->append_rel_array == NULL || + (appinfo = root->append_rel_array[top_rti]) == NULL) + break; + + parent_rte = planner_rt_fetch(appinfo->parent_relid, root); + if (parent_rte->rtekind != RTE_RELATION) + break; + + top_rti = appinfo->parent_relid; + } + + /* Get the range table entries for the RTI and top RTI. */ + rte = planner_rt_fetch(rti, root); + top_rte = planner_rt_fetch(top_rti, root); + Assert(rte->rtekind != RTE_JOIN); + Assert(top_rte->rtekind != RTE_JOIN); + + /* Work out the correct occurrence number. */ + for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti) + { + RangeTblEntry *prior_rte; + AppendRelInfo *appinfo; + + /* + * If this is a child rel of a parent that is a relation, skip it. + * + * Such range table entries are disambiguated by mentioning the schema + * and name of the table, not by counting them as separate occurrences + * of the same table. + * + * NB: append_rel_array can be NULL if there are no children + */ + if (root->append_rel_array != NULL && + (appinfo = root->append_rel_array[prior_rti]) != NULL) + { + RangeTblEntry *parent_rte; + + parent_rte = planner_rt_fetch(appinfo->parent_relid, root); + if (parent_rte->rtekind == RTE_RELATION) + continue; + } + + /* Skip NULL entries and joins. */ + prior_rte = planner_rt_fetch(prior_rti, root); + if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN) + continue; + + /* Skip if the alias name differs. */ + if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0) + continue; + + /* Looks like a true duplicate. */ + ++occurrence; + } + + /* If this is a child table, get the schema and relation names. */ + if (rti != top_rti) + { + partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid)); + partrel = get_rel_name(rte->relid); + } + + /* OK, we have all the answers we need. Return them to the caller. */ + rid->alias_name = top_rte->eref->aliasname; + rid->occurrence = occurrence; + rid->partnsp = partnsp; + rid->partrel = partrel; + rid->plan_name = root->plan_name; +} + +/* + * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN + * RTIs that may be present. + * + * RTE_JOIN entries are excluded because they cannot be mentioned by plan + * advice. + * + * The caller is responsible for making sure that the tkeys array is large + * enough to store the results. + * + * The return value is the number of identifiers computed. + */ +int +pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids, + pgpa_identifier *rids) +{ + int count = 0; + int rti = -1; + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + RangeTblEntry *rte = planner_rt_fetch(rti, root); + + if (rte->rtekind == RTE_JOIN) + continue; + pgpa_compute_identifier_by_rti(root, rti, &rids[count++]); + } + + Assert(count > 0); + return count; +} + +/* + * Create an array of range table identifiers for all the non-NULL, + * non-RTE_JOIN entries in the PlannedStmt's range table. + */ +pgpa_identifier * +pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt) +{ + Index rtable_length = list_length(pstmt->rtable); + pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length); + Index *top_rti_map; + int rtinfoindex = 0; + SubPlanRTInfo *rtinfo = NULL; + SubPlanRTInfo *nextrtinfo = NULL; + + /* + * Account for relations addded by inheritance expansion of partitioned + * tables. + */ + top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable, + pstmt->appendRelations); + + /* + * When we begin iterating, we're processing the portion of the range + * table that originated from the top-level PlannerInfo, so subrtinfo is + * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose + * portion of the range table we are processing. nextrtinfo is always the + * SubPlanRTInfo that follows the current one, if any, so when we're + * processing the top-level query's portion of the range table, the next + * SubPlanRTInfo is the very first one. + */ + if (pstmt->subrtinfos != NULL) + nextrtinfo = linitial(pstmt->subrtinfos); + + /* Main loop over the range table. */ + for (Index rti = 1; rti <= rtable_length; rti++) + { + const char *plan_name; + Index top_rti; + RangeTblEntry *rte; + RangeTblEntry *top_rte; + char *partnsp = NULL; + char *partrel = NULL; + int occurrence; + pgpa_identifier *rid; + + /* + * Advance to the next SubPlanRTInfo, if it's time to do that. + * + * This loop probably shouldn't ever iterate more than once, because + * that would imply that a subquery was planned but added nothing to + * the range table; but let's be defensive and assume it can happen. + */ + while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset) + { + rtinfo = nextrtinfo; + if (++rtinfoindex >= list_length(pstmt->subrtinfos)) + nextrtinfo = NULL; + else + nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex); + } + + /* Fetch the range table entry, if any. */ + rte = rt_fetch(rti, pstmt->rtable); + + /* + * We can't and don't need to identify null entries, and we don't want + * to identify join entries. + */ + if (rte == NULL || rte->rtekind == RTE_JOIN) + continue; + + /* + * If this is not a relation added by partitioned table expansion, + * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise, + * we need the information for the top RTI/RTE, and must also fetch + * the partition schema and name. + */ + top_rti = top_rti_map[rti - 1]; + if (rti == top_rti) + top_rte = rte; + else + { + top_rte = rt_fetch(top_rti, pstmt->rtable); + partnsp = + get_namespace_name_or_temp(get_rel_namespace(rte->relid)); + partrel = get_rel_name(rte->relid); + } + + /* Compute the correct occurrence number. */ + occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map, + rtinfo, top_rti); + + /* Get the name of the current plan (NULL for toplevel query). */ + plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name; + + /* Save all the details we've derived. */ + rid = &result[rti - 1]; + rid->alias_name = top_rte->eref->aliasname; + rid->occurrence = occurrence; + rid->partnsp = partnsp; + rid->partrel = partrel; + rid->plan_name = plan_name; + } + + return result; +} + +/* + * Search for a pgpa_identifier in the array of identifiers computed for the + * range table. If exactly one match is found, return the matching RTI; else + * return 0. + */ +Index +pgpa_compute_rti_from_identifier(int rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_identifier *rid) +{ + Index result = 0; + + for (Index rti = 1; rti <= rtable_length; ++rti) + { + pgpa_identifier *rti_rid = &rt_identifiers[rti - 1]; + + /* If there's no identifier for this RTI, skip it. */ + if (rti_rid->alias_name == NULL) + continue; + + /* + * If it matches, return this RTI. As usual, an omitted partition + * schema matches anything, but partition and plan names must either + * match exactly or be omitted on both sides. + */ + if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 && + rid->occurrence == rti_rid->occurrence && + (rid->partnsp == NULL || rti_rid->partnsp == NULL || + strcmp(rid->partnsp, rti_rid->partnsp) == 0) && + strings_equal_or_both_null(rid->partrel, rti_rid->partrel) && + strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name)) + { + if (result != 0) + { + /* Multiple matches were found. */ + return 0; + } + result = rti; + } + } + + return result; +} + +/* + * Build a mapping from each RTI to the RTI whose alias_name will be used to + * construct the range table identifier. + * + * For child relations, this is the topmost parent that is still of type + * RTE_RELATION. For other relations, it's just the original RTI. + * + * Since we're eventually going to need this information for every RTI in + * the range table, it's best to compute all the answers in a single pass over + * the AppendRelInfo list. Otherwise, we might end up searching through that + * list repeatedly for entries of interest. + * + * Note that the returned array is uses zero-based indexing, while RTIs use + * 1-based indexing, so subtract 1 from the RTI before looking it up in the + * array. + */ +static Index * +pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos) +{ + Index *top_rti_map = palloc0_array(Index, rtable_length); + + /* Initially, make every RTI point to itself. */ + for (Index rti = 1; rti <= rtable_length; ++rti) + top_rti_map[rti - 1] = rti; + + /* Update the map for each AppendRelInfo object. */ + foreach_node(AppendRelInfo, appinfo, appinfos) + { + Index parent_rti = appinfo->parent_relid; + RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable); + + /* If the parent is not RTE_RELATION, ignore this entry. */ + if (parent_rte->rtekind != RTE_RELATION) + continue; + + /* + * Map the child to wherever we mapped the parent. Parents always + * precede their children in the AppendRelInfo list, so this should + * work out. + */ + top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1]; + } + + return top_rti_map; +} + +/* + * Find the occurence number of a certain relation within a certain subquery. + * + * The same alias name can occur multiple times within a subquery, but we want + * to disambiguate by giving different occurrences different integer indexes. + * However, child tables are disambiguated by including the table name rather + * than by incrementing the occurrence number; and joins are not named and so + * shouldn't increment the occurence number either. + */ +static int +pgpa_occurrence_number(List *rtable, Index *top_rti_map, + SubPlanRTInfo *rtinfo, Index rti) +{ + Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset; + int occurrence = 1; + RangeTblEntry *rte = rt_fetch(rti, rtable); + + for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti) + { + RangeTblEntry *prior_rte; + + /* + * If this is a child rel of a parent that is a relation, skip it. + * + * Such range table entries are disambiguated by mentioning the schema + * and name of the table, not by counting them as separate occurrences + * of the same table. + */ + if (top_rti_map[prior_rti - 1] != prior_rti) + break; + + /* Skip joins. */ + prior_rte = rt_fetch(prior_rti, rtable); + if (prior_rte->rtekind == RTE_JOIN) + continue; + + /* Skip if the alias name differs. */ + if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0) + continue; + + /* Looks like a true duplicate. */ + ++occurrence; + } + + return occurrence; +} diff --git a/contrib/pg_plan_advice/pgpa_identifier.h b/contrib/pg_plan_advice/pgpa_identifier.h new file mode 100644 index 0000000000..b000d2b708 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_identifier.h @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * pgpa_identifier.h + * create appropriate identifiers for range table entries + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_identifier.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PGPA_IDENTIFIER_H +#define PGPA_IDENTIFIER_H + +#include "nodes/pathnodes.h" +#include "nodes/plannodes.h" + +typedef struct pgpa_identifier +{ + const char *alias_name; + int occurrence; + const char *partnsp; + const char *partrel; + const char *plan_name; +} pgpa_identifier; + +/* Convenience function for comparing possibly-NULL strings. */ +static inline bool +strings_equal_or_both_null(const char *a, const char *b) +{ + if (a == b) + return true; + else if (a == NULL || b == NULL) + return false; + else + return strcmp(a, b) == 0; +} + +extern const char *pgpa_identifier_string(const pgpa_identifier *rid); +extern void pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti, + pgpa_identifier *rid); +extern int pgpa_compute_identifiers_by_relids(PlannerInfo *root, + Bitmapset *relids, + pgpa_identifier *rids); +extern pgpa_identifier *pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt); + +extern Index pgpa_compute_rti_from_identifier(int rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_identifier *rid); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c new file mode 100644 index 0000000000..7d1ab72f11 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_join.c @@ -0,0 +1,629 @@ +/*------------------------------------------------------------------------- + * + * pgpa_join.c + * analysis of joins in Plan trees + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_join.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_join.h" +#include "pgpa_scan.h" +#include "pgpa_walker.h" + +#include "nodes/pathnodes.h" +#include "nodes/print.h" +#include "parser/parsetree.h" + +/* + * Temporary object used when unrolling a join tree. + */ +struct pgpa_join_unroller +{ + unsigned nallocated; + unsigned nused; + Plan *outer_subplan; + ElidedNode *outer_elided_node; + bool outer_beneath_any_gather; + pgpa_join_strategy *strategy; + Plan **inner_subplans; + ElidedNode **inner_elided_nodes; + pgpa_join_unroller **inner_unrollers; + bool *inner_beneath_any_gather; +}; + +static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker, + Plan *plan, + Plan **realouter, + Plan **realinner, + ElidedNode **elidedrealouter, + ElidedNode **elidedrealinner, + bool *found_any_outer_gather, + bool *found_any_inner_gather); +static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan); +static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan, + bool *found_any_gather); +static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, + ElidedNode **elided_node); + +static bool is_result_node_with_child(Plan *plan); +static bool is_sorting_plan(Plan *plan); + +/* + * Create an initially-empty object for unrolling joins. + * + * This function creates a helper object that can later be used to create a + * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times. + */ +pgpa_join_unroller * +pgpa_create_join_unroller(void) +{ + pgpa_join_unroller *join_unroller; + + join_unroller = palloc0_object(pgpa_join_unroller); + join_unroller->nallocated = 4; + join_unroller->strategy = + palloc_array(pgpa_join_strategy, join_unroller->nallocated); + join_unroller->inner_subplans = + palloc_array(Plan *, join_unroller->nallocated); + join_unroller->inner_elided_nodes = + palloc_array(ElidedNode *, join_unroller->nallocated); + join_unroller->inner_unrollers = + palloc_array(pgpa_join_unroller *, join_unroller->nallocated); + join_unroller->inner_beneath_any_gather = + palloc_array(bool, join_unroller->nallocated); + + return join_unroller; +} + +/* + * Unroll one level of an unrollable join tree. + * + * Our basic goal here is to unroll join trees as they occur in the Plan + * tree into a simpler and more regular structure that we can more easily + * use for further processing. Unrolling is outer-deep, so if the plan tree + * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be + * used for Join1 and Join2, but a different one will be needed for Join3, + * since that involves a join within the *inner* side of another join. + * + * pgpa_plan_walker creates a "top level" join unroller object when it + * encounters a join in a portion of the plan tree in which no join unroller + * is already active. From there, this function is responsible for determing + * to what portion of the plan tree that join unroller applies, and for + * creating any subordinate join unroller objects that are needed as a result + * of non-outer-deep join trees. We do this by returning the join unroller + * objects that should be used for further traversal of the outer and inner + * subtrees of the current plan node via *outer_join_unroller and + * *inner_join_unroller, respectively. + */ +void +pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan, + bool beneath_any_gather, + pgpa_join_unroller *join_unroller, + pgpa_join_unroller **outer_join_unroller, + pgpa_join_unroller **inner_join_unroller) +{ + pgpa_join_strategy strategy; + Plan *realinner, + *realouter; + ElidedNode *elidedinner, + *elidedouter; + int n; + bool found_any_outer_gather = false; + bool found_any_inner_gather = false; + + Assert(join_unroller != NULL); + + /* + * We need to pass the join_unroller object down through certain types of + * plan nodes -- anything that's considered part of the join strategy, and + * any other nodes that can occur in a join tree despite not being scans + * or joins. + * + * This includes: + * + * (1) Materialize, Memoize, and Hash nodes, which are part of the join + * strategy, + * + * (2) Gather and Gather Merge nodes, which can occur at any point in the + * join tree where the planner decided to initiate parallelism, + * + * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin + * or GatherMerge, + * + * (4) Agg and Unique nodes, which can occur when we decide to make the + * nullable side of a semijoin unique and then join the result, and + * + * (5) Result nodes with children, which can be added either to project to + * enforce a one-time filter (but Result nodes without children are + * degenerate scans or joins). + */ + if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash) + || IsA(plan, Gather) || IsA(plan, GatherMerge) + || is_sorting_plan(plan) || IsA(plan, Agg) || IsA(plan, Unique) + || is_result_node_with_child(plan)) + { + *outer_join_unroller = join_unroller; + return; + } + + /* + * Since we've already handled nodes that require pass-through treatment, + * this should be an unrollable join. + */ + strategy = pgpa_decompose_join(walker, plan, + &realouter, &realinner, + &elidedouter, &elidedinner, + &found_any_outer_gather, + &found_any_inner_gather); + + /* If our workspace is full, expand it. */ + if (join_unroller->nused >= join_unroller->nallocated) + { + join_unroller->nallocated *= 2; + join_unroller->strategy = + repalloc_array(join_unroller->strategy, + pgpa_join_strategy, + join_unroller->nallocated); + join_unroller->inner_subplans = + repalloc_array(join_unroller->inner_subplans, + Plan *, + join_unroller->nallocated); + join_unroller->inner_elided_nodes = + repalloc_array(join_unroller->inner_elided_nodes, + ElidedNode *, + join_unroller->nallocated); + join_unroller->inner_beneath_any_gather = + repalloc_array(join_unroller->inner_beneath_any_gather, + bool, + join_unroller->nallocated); + join_unroller->inner_unrollers = + repalloc_array(join_unroller->inner_unrollers, + pgpa_join_unroller *, + join_unroller->nallocated); + } + + /* + * Since we're flattening outer-deep join trees, it follows that if the + * outer side is still an unrollable join, it should be unrolled into this + * same object. Otherwise, we've reached the limit of what we can unroll + * into this object and must remember the outer side as the final outer + * subplan. + */ + if (elidedouter == NULL && pgpa_is_join(realouter)) + *outer_join_unroller = join_unroller; + else + { + join_unroller->outer_subplan = realouter; + join_unroller->outer_elided_node = elidedouter; + join_unroller->outer_beneath_any_gather = + beneath_any_gather || found_any_outer_gather; + } + + /* + * Store the inner subplan. If it's an unrollable join, it needs to be + * flattened in turn, but into a new unroller object, not this one. + */ + n = join_unroller->nused++; + join_unroller->strategy[n] = strategy; + join_unroller->inner_subplans[n] = realinner; + join_unroller->inner_elided_nodes[n] = elidedinner; + join_unroller->inner_beneath_any_gather[n] = + beneath_any_gather || found_any_inner_gather; + if (elidedinner == NULL && pgpa_is_join(realinner)) + *inner_join_unroller = pgpa_create_join_unroller(); + else + *inner_join_unroller = NULL; + join_unroller->inner_unrollers[n] = *inner_join_unroller; +} + +/* + * Use the data we've accumulated in a pgpa_join_unroller object to construct + * a pgpa_unrolled_join. + */ +pgpa_unrolled_join * +pgpa_build_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_join_unroller *join_unroller) +{ + pgpa_unrolled_join *ujoin; + int i; + + /* + * We shouldn't have gone even so far as to create a join unroller unless + * we found at least one unrollable join. + */ + Assert(join_unroller->nused > 0); + + /* Allocate result structures. */ + ujoin = palloc0_object(pgpa_unrolled_join); + ujoin->ninner = join_unroller->nused; + ujoin->strategy = palloc0_array(pgpa_join_strategy, join_unroller->nused); + ujoin->inner = palloc0_array(pgpa_join_member, join_unroller->nused); + + /* Handle the outermost join. */ + ujoin->outer.plan = join_unroller->outer_subplan; + ujoin->outer.elided_node = join_unroller->outer_elided_node; + ujoin->outer.scan = + pgpa_build_scan(walker, ujoin->outer.plan, + ujoin->outer.elided_node, + join_unroller->outer_beneath_any_gather, + true); + + /* + * We want the joins from the deepest part of the plan tree to appear + * first in the result object, but the join unroller adds them in exactly + * the reverse of that order, so we need to flip the order of the arrays + * when constructing the final result. + */ + for (i = 0; i < join_unroller->nused; ++i) + { + int k = join_unroller->nused - i - 1; + + /* Copy strategy, Plan, and ElidedNode. */ + ujoin->strategy[i] = join_unroller->strategy[k]; + ujoin->inner[i].plan = join_unroller->inner_subplans[k]; + ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k]; + + /* + * Fill in remaining details, using either the nested join unroller, + * or by deriving them from the plan and elided nodes. + */ + if (join_unroller->inner_unrollers[k] != NULL) + ujoin->inner[i].unrolled_join = + pgpa_build_unrolled_join(walker, + join_unroller->inner_unrollers[k]); + else + ujoin->inner[i].scan = + pgpa_build_scan(walker, ujoin->inner[i].plan, + ujoin->inner[i].elided_node, + join_unroller->inner_beneath_any_gather[i], + true); + } + + return ujoin; +} + +/* + * Free memory allocated for pgpa_join_unroller. + */ +void +pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller) +{ + pfree(join_unroller->strategy); + pfree(join_unroller->inner_subplans); + pfree(join_unroller->inner_elided_nodes); + pfree(join_unroller->inner_unrollers); + pfree(join_unroller); +} + +/* + * Identify the join strategy used by a join and the "real" inner and outer + * plans. + * + * For example, a Hash Join always has a Hash node on the inner side, but + * for all intents and purposes the real inner input is the Hash node's child, + * not the Hash node itself. + * + * Likewise, a Merge Join may have Sort note on the inner or outer side; if + * it does, the real input to the join is the Sort node's child, not the + * Sort node itself. + * + * In addition, with a Merge Join or a Nested Loop, the join planning code + * may add additional nodes such as Materialize or Memoize. We regard these + * as an aspect of the join strategy. As in the previous cases, the true input + * to the join is the underlying node. + * + * However, if any involved child node previously had a now-elided node stacked + * on top, then we can't "look through" that node -- indeed, what's going to be + * relevant for our purposes is the ElidedNode on top of that plan node, rather + * than the plan node itself. + * + * If there are multiple elided nodes, we want that one that would have been + * uppermost in the plan tree prior to setrefs processing; we expect to find + * that one last in the list of elided nodes. + * + * On return *realouter and *realinner will have been set to the real inner + * and real outer plans that we identified, and *elidedrealouter and + * *elidedrealinner to the last of any correspoding elided nodes. + * Additionally, *found_any_outer_gather and *found_any_inner_gather will + * be set to true if we looked through a Gather or Gather Merge node on + * that side of the join, and false otherwise. + */ +static pgpa_join_strategy +pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan, + Plan **realouter, Plan **realinner, + ElidedNode **elidedrealouter, ElidedNode **elidedrealinner, + bool *found_any_outer_gather, bool *found_any_inner_gather) +{ + PlannedStmt *pstmt = walker->pstmt; + JoinType jointype = ((Join *) plan)->jointype; + Plan *outerplan = plan->lefttree; + Plan *innerplan = plan->righttree; + ElidedNode *elidedouter; + ElidedNode *elidedinner; + pgpa_join_strategy strategy; + bool uniqueouter; + bool uniqueinner; + + elidedouter = pgpa_last_elided_node(pstmt, outerplan); + elidedinner = pgpa_last_elided_node(pstmt, innerplan); + *found_any_outer_gather = false; + *found_any_inner_gather = false; + + switch (nodeTag(plan)) + { + case T_MergeJoin: + + /* + * The planner may have chosen to place a Material node on the + * inner side of the MergeJoin; if this is present, we record it + * as part of the join strategy. + */ + if (elidedinner == NULL && IsA(innerplan, Material)) + { + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_MERGE_JOIN_MATERIALIZE; + } + else + strategy = JSTRAT_MERGE_JOIN_PLAIN; + + /* + * For a MergeJoin, either the outer or the inner subplan, or + * both, may have needed to be sorted; we must disregard any Sort + * or IncrementalSort node to find the real inner or outer + * subplan. + */ + if (elidedouter == NULL && is_sorting_plan(outerplan)) + elidedouter = pgpa_descend_node(pstmt, &outerplan); + if (elidedinner == NULL && is_sorting_plan(innerplan)) + elidedinner = pgpa_descend_node(pstmt, &innerplan); + break; + + case T_NestLoop: + + /* + * The planner may have chosen to place a Material or Memoize node + * on the inner side of the NestLoop; if this is present, we + * record it as part of the join strategy. + */ + if (elidedinner == NULL && IsA(innerplan, Material)) + { + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_NESTED_LOOP_MATERIALIZE; + } + else if (elidedinner == NULL && IsA(innerplan, Memoize)) + { + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_NESTED_LOOP_MEMOIZE; + } + else + strategy = JSTRAT_NESTED_LOOP_PLAIN; + break; + + case T_HashJoin: + + /* + * The inner subplan of a HashJoin is always a Hash node; the real + * inner subplan is the Hash node's child. + */ + Assert(IsA(innerplan, Hash)); + Assert(elidedinner == NULL); + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_HASH_JOIN; + break; + + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan)); + } + + /* + * The planner may have decided to implement a semijoin by first making + * the nullable side of the plan unique, and then performing a normal join + * against the result. Therefore, we might need to descend through a + * unique node on either side of the plan. + */ + uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter); + uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner); + + /* + * The planner may have decided to parallelize part of the join tree, so + * we could find a Gather or Gather Merge node here. Note that, if + * present, this will appear below nodes we considered as part of the join + * strategy, but we could find another uniqueness-enforcing node below the + * Gather or Gather Merge, if present. + */ + if (elidedouter == NULL) + { + elidedouter = pgpa_descend_any_gather(pstmt, &outerplan, + found_any_outer_gather); + if (found_any_outer_gather && + pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter)) + uniqueouter = true; + } + if (elidedinner == NULL) + { + elidedinner = pgpa_descend_any_gather(pstmt, &innerplan, + found_any_inner_gather); + if (found_any_inner_gather && + pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner)) + uniqueinner = true; + } + + /* + * It's possible that Result node has been inserted either to project a + * target list or to implement a one-time filter. If so, we can descend + * throught it. Note that a result node without a child would be a + * degenerate scan or join, and not something we could descend through. + * + * XXX. I suspect it's possible for this to happen above the Gather or + * Gather Merge node, too, but apparently we have no test case for that + * scenario. + */ + if (elidedouter == NULL && is_result_node_with_child(outerplan)) + elidedouter = pgpa_descend_node(pstmt, &outerplan); + if (elidedinner == NULL && is_result_node_with_child(innerplan)) + elidedinner = pgpa_descend_node(pstmt, &innerplan); + + /* + * If this is a semijoin that was converted to an inner join by making one + * side or the other unique, make a note that the inner or outer subplan, + * as appropriate, should be treated as a query plan feature when the main + * tree traversal reaches it. + * + * Conversely, if the planner could have made one side of the join unique + * and thereby converted it to an inner join, and chose not to do so, that + * is also worth noting. + * + * NB: This code could appear slightly higher up in in this function, but + * none of the nodes through which we just descended should have + * associated RTIs. + * + * NB: This seems like a somewhat hacky way of passing information up to + * the main tree walk, but I don't currently have a better idea. + */ + if (uniqueouter) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, outerplan); + else if (jointype == JOIN_RIGHT_SEMI) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, outerplan); + if (uniqueinner) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, innerplan); + else if (jointype == JOIN_SEMI) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, innerplan); + + /* Set output parameters. */ + *realouter = outerplan; + *realinner = innerplan; + *elidedrealouter = elidedouter; + *elidedrealinner = elidedinner; + return strategy; +} + +/* + * Descend through a Plan node in a join tree that the caller has determined + * to be irrelevant. + * + * Updates *plan, and returns the last of any elided nodes pertaining to the + * new plan node. + */ +static ElidedNode * +pgpa_descend_node(PlannedStmt *pstmt, Plan **plan) +{ + *plan = (*plan)->lefttree; + return pgpa_last_elided_node(pstmt, *plan); +} + +/* + * Descend through a Gather or Gather Merge node, if present, and any Sort + * or IncrementalSort node occurring under a Gather Merge. + * + * Caller should have verified that there is no ElidedNode pertaining to + * the initial value of *plan. + * + * Updates *plan, and returns the last of any elided nodes pertaining to the + * new plan node. Sets *found_any_gather = true if either Gather or + * Gather Merge was found, and otherwise leaves it unchanged. + */ +static ElidedNode * +pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan, + bool *found_any_gather) +{ + if (IsA(*plan, Gather)) + { + *found_any_gather = true; + return pgpa_descend_node(pstmt, plan); + } + + if (IsA(*plan, GatherMerge)) + { + ElidedNode *elided = pgpa_descend_node(pstmt, plan); + + if (elided == NULL && is_sorting_plan(*plan)) + elided = pgpa_descend_node(pstmt, plan); + + *found_any_gather = true; + return elided; + } + + return NULL; +} + +/* + * If *plan is an Agg or Unique node, we want to descend through it, unless + * it has a corresponding elided node. If its immediate child is a Sort or + * IncrementalSort, we also want to descend through that, unless it has a + * corresponding elided node. + * + * On entry, *elided_node must be the last of any elided nodes corresponding + * to *plan; on exit, this will still be true, but *plan may have been updated. + * + * The reason we don't want to descend through elided nodes is that a single + * join tree can't cross through any sort of elided node: subqueries are + * planned separately, and planning inside an Append or MergeAppend is + * separate from planning outside of it. + * + * The return value is true if we descend through a node that we believe is + * making one side of a semijoin unique, and otherwise false. + */ +static bool +pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, + ElidedNode **elided_node) +{ + bool descend = false; + bool sjunique = false; + + if (*elided_node != NULL) + return sjunique; + + if (IsA(*plan, Unique)) + { + descend = true; + sjunique = true; + } + else if (IsA(*plan, Agg)) + { + /* + * If this is a simple Agg node, then assume it's here to implement + * semijoin uniqueness. Otherwise, assume it's completing an eager + * aggregation or partitionwise aggregation operation that began at a + * higher level of the plan tree. + * + * XXX. I suspect this logic does not cover all cases: couldn't SJ + * uniqueness be implemented in two steps with an intermediate Gather? + */ + descend = true; + sjunique = (((Agg *) *plan)->aggsplit == AGGSPLIT_SIMPLE); + } + + if (descend) + { + *elided_node = pgpa_descend_node(pstmt, plan); + + if (*elided_node == NULL && is_sorting_plan(*plan)) + *elided_node = pgpa_descend_node(pstmt, plan); + } + + return sjunique; +} + +/* + * Is this a Result node that has a child? + */ +static bool +is_result_node_with_child(Plan *plan) +{ + return IsA(plan, Result) && plan->lefttree != NULL; +} + +/* + * Is this a Plan node whose purpose is put the data in a certain order? + */ +static bool +is_sorting_plan(Plan *plan) +{ + return IsA(plan, Sort) || IsA(plan, IncrementalSort); +} diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h new file mode 100644 index 0000000000..4dc72986a7 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_join.h @@ -0,0 +1,105 @@ +/*------------------------------------------------------------------------- + * + * pgpa_join.h + * analysis of joins in Plan trees + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_join.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_JOIN_H +#define PGPA_JOIN_H + +#include "nodes/plannodes.h" + +typedef struct pgpa_plan_walker_context pgpa_plan_walker_context; +typedef struct pgpa_join_unroller pgpa_join_unroller; +typedef struct pgpa_unrolled_join pgpa_unrolled_join; + +/* + * Although there are three main join strategies, we try to classify things + * more precisely here: merge joins have the option of using materialization + * on the inner side, and nested loops can use either materialization or + * memoization. + */ +typedef enum +{ + JSTRAT_MERGE_JOIN_PLAIN = 0, + JSTRAT_MERGE_JOIN_MATERIALIZE, + JSTRAT_NESTED_LOOP_PLAIN, + JSTRAT_NESTED_LOOP_MATERIALIZE, + JSTRAT_NESTED_LOOP_MEMOIZE, + JSTRAT_HASH_JOIN + /* update NUM_PGPA_JOIN_STRATEGY if you add anything here */ +} pgpa_join_strategy; + +#define NUM_PGPA_JOIN_STRATEGY ((int) JSTRAT_HASH_JOIN + 1) + +/* + * In an outer-deep join tree, every member of an unrolled join will be a scan, + * but join trees with other shapes can contain unrolled joins. + * + * The plan node we store here will be the inner or outer child of the join + * node, as appropriate, except that we look through subnodes that we regard as + * part of the join method itself. For instance, for a Nested Loop that + * materializes the inner input, we'll store the child of the Materialize node, + * not the Materialize node itself. + * + * If setrefs processing elided one or more nodes from the plan tree, then + * we'll store details about the topmost of those in elided_node; otherwise, + * it will be NULL. + * + * Exactly one of scan and unrolled_join will be non-NULL. + */ +typedef struct +{ + Plan *plan; + ElidedNode *elided_node; + struct pgpa_scan *scan; + pgpa_unrolled_join *unrolled_join; +} pgpa_join_member; + +/* + * We convert outer-deep join trees to a flat structure; that is, ((A JOIN B) + * JOIN C) JOIN D gets converted to outer = A, inner = <B C D>. When joins + * aren't outer-deep, substructure is required, e.g. (A JOIN B) JOIN (C JOIN D) + * is represented as outer = A, inner = <B X>, where X is a pgpa_unrolled_join + * covering C-D. + */ +struct pgpa_unrolled_join +{ + /* Outermost member; must not itself be an unrolled join. */ + pgpa_join_member outer; + + /* Number of inner members. Length of the strategy and inner arrays. */ + unsigned ninner; + + /* Array of strategies, one per non-outermost member. */ + pgpa_join_strategy *strategy; + + /* Array of members, excluding the outermost. Deepest first. */ + pgpa_join_member *inner; +}; + +/* + * Does this plan node inherit from Join? + */ +static inline bool +pgpa_is_join(Plan *plan) +{ + return IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin); +} + +extern pgpa_join_unroller *pgpa_create_join_unroller(void); +extern void pgpa_unroll_join(pgpa_plan_walker_context *walker, + Plan *plan, bool beneath_any_gather, + pgpa_join_unroller *join_unroller, + pgpa_join_unroller **outer_join_unroller, + pgpa_join_unroller **inner_join_unroller); +extern pgpa_unrolled_join *pgpa_build_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_join_unroller *join_unroller); +extern void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c new file mode 100644 index 0000000000..89a675ff93 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_output.c @@ -0,0 +1,628 @@ +/*------------------------------------------------------------------------- + * + * pgpa_output.c + * produce textual output from the results of a plan tree walk + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_output.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_output.h" +#include "pgpa_scan.h" + +#include "nodes/parsenodes.h" +#include "parser/parsetree.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + +/* + * Context object for textual advice generation. + * + * rt_identifiers is the caller-provided array of range table identifiers. + * See the comments at the top of pgpa_identifier.c for more details. + * + * buf is the caller-provided output buffer. + * + * wrap_column is the wrap column, so that we don't create output that is + * too wide. See pgpa_maybe_linebreak() and comments in pgpa_output_advice. + */ +typedef struct pgpa_output_context +{ + const char **rid_strings; + StringInfo buf; + int wrap_column; +} pgpa_output_context; + +static void pgpa_output_unrolled_join(pgpa_output_context *context, + pgpa_unrolled_join *join); +static void pgpa_output_join_member(pgpa_output_context *context, + pgpa_join_member *member); +static void pgpa_output_scan_strategy(pgpa_output_context *context, + pgpa_scan_strategy strategy, + List *scans); +static void pgpa_output_bitmap_index_details(pgpa_output_context *context, + Plan *plan); +static void pgpa_output_relation_name(pgpa_output_context *context, Oid relid); +static void pgpa_output_query_feature(pgpa_output_context *context, + pgpa_qf_type type, + List *query_features); +static void pgpa_output_simple_strategy(pgpa_output_context *context, + char *strategy, + List *relid_sets); +static void pgpa_output_no_gather(pgpa_output_context *context, + Bitmapset *relids); +static void pgpa_output_relations(pgpa_output_context *context, StringInfo buf, + Bitmapset *relids); + +static char *pgpa_cstring_join_strategy(pgpa_join_strategy strategy); +static char *pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy); +static char *pgpa_cstring_query_feature_type(pgpa_qf_type type); + +static void pgpa_maybe_linebreak(StringInfo buf, int wrap_column); + +/* + * Append query advice to the provided buffer. + * + * Before calling this function, 'walker' must be used to iterate over the + * main plan tree and all subplans from the PlannedStmt. + * + * 'rt_identifiers' is a table of unique identifiers, one for each RTI. + * See pgpa_create_identifiers_for_planned_stmt(). + * + * Results will be appended to 'buf'. + */ +void +pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers) +{ + Index rtable_length = list_length(walker->pstmt->rtable); + ListCell *lc; + pgpa_output_context context; + + /* Basic initialization. */ + memset(&context, 0, sizeof(pgpa_output_context)); + context.buf = buf; + + /* + * Convert identifiers to string form. Note that the loop variable here is + * not an RTI, because RTIs are 1-based. Some RTIs will have no + * identifier, either because the reloptkind is RTE_JOIN or because that + * portion of the query didn't make it into the final plan. + */ + context.rid_strings = palloc0_array(const char *, rtable_length); + for (int i = 0; i < rtable_length; ++i) + if (rt_identifiers[i].alias_name != NULL) + context.rid_strings[i] = pgpa_identifier_string(&rt_identifiers[i]); + + /* + * If the user chooses to use EXPLAIN (PLAN_ADVICE) in an 80-column window + * from a psql client with default settings, psql will add one space to + * the left of the output and EXPLAIN will add two more to the left of the + * advice. Thus, lines of more than 77 characters will wrap. We set the + * wrap limit to 76 here so that the output won't reach all the way to the + * very last column of the terminal. + * + * Of course, this is fairly arbitrary set of assumptions, and one could + * well make an argument for a different wrap limit, or for a configurable + * one. + */ + context.wrap_column = 76; + + /* + * Each piece of JOIN_ORDER() advice fully describes the join order for a + * a single unrolled join. Merging is not permitted, because that would + * change the meaning, e.g. SEQ_SCAN(a b c d) means simply that sequential + * scans should be used for all of those relations, and is thus equivalent + * to SEQ_SCAN(a b) SEQ_SCAN(c d), but JOIN_ORDER(a b c d) means that "a" + * is the driving table which is then joined to "b" then "c" then "d", + * which is totally different from JOIN_ORDER(a b) and JOIN_ORDER(c d). + */ + foreach(lc, walker->toplevel_unrolled_joins) + { + pgpa_unrolled_join *ujoin = lfirst(lc); + + if (buf->len > 0) + appendStringInfoChar(buf, '\n'); + appendStringInfo(context.buf, "JOIN_ORDER("); + pgpa_output_unrolled_join(&context, ujoin); + appendStringInfoChar(context.buf, ')'); + pgpa_maybe_linebreak(context.buf, context.wrap_column); + } + + /* Emit join strategy advice. */ + for (int s = 0; s < NUM_PGPA_JOIN_STRATEGY; ++s) + { + char *strategy = pgpa_cstring_join_strategy(s); + + pgpa_output_simple_strategy(&context, + strategy, + walker->join_strategies[s]); + } + + /* + * Emit scan strategy advice (but not for ordinary scans, which are + * definitionally uninteresting). + */ + for (int c = 0; c < NUM_PGPA_SCAN_STRATEGY; ++c) + if (c != PGPA_SCAN_ORDINARY) + pgpa_output_scan_strategy(&context, c, walker->scans[c]); + + /* Emit query feature advice. */ + for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t) + pgpa_output_query_feature(&context, t, walker->query_features[t]); + + /* Emit NO_GATHER advice. */ + pgpa_output_no_gather(&context, walker->no_gather_scans); +} + +/* + * Output the members of an unrolled join, first the outermost member, and + * then the inner members one by one, as part of JOIN_ORDER() advice. + */ +static void +pgpa_output_unrolled_join(pgpa_output_context *context, + pgpa_unrolled_join *join) +{ + pgpa_output_join_member(context, &join->outer); + + for (int k = 0; k < join->ninner; ++k) + { + pgpa_join_member *member = &join->inner[k]; + + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_join_member(context, member); + } +} + +/* + * Output a single member of an unrolled join as part of JOIN_ORDER() advice. + */ +static void +pgpa_output_join_member(pgpa_output_context *context, + pgpa_join_member *member) +{ + if (member->unrolled_join != NULL) + { + appendStringInfoChar(context->buf, '('); + pgpa_output_unrolled_join(context, member->unrolled_join); + appendStringInfoChar(context->buf, ')'); + } + else + { + pgpa_scan *scan = member->scan; + + Assert(scan != NULL); + if (bms_membership(scan->relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, scan->relids); + else + { + appendStringInfoChar(context->buf, '{'); + pgpa_output_relations(context, context->buf, scan->relids); + appendStringInfoChar(context->buf, '}'); + } + } +} + +/* + * Output advice for a List of pgpa_scan objects. + * + * All the scans must use the strategy specified by the "strategy" argument. + */ +static void +pgpa_output_scan_strategy(pgpa_output_context *context, + pgpa_scan_strategy strategy, + List *scans) +{ + bool first = true; + + if (scans == NIL) + return; + + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfo(context->buf, "%s(", + pgpa_cstring_scan_strategy(strategy)); + + foreach_ptr(pgpa_scan, scan, scans) + { + Plan *plan = scan->plan; + + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + + /* Output the relation identifiers. */ + if (bms_membership(scan->relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, scan->relids); + else + { + appendStringInfoChar(context->buf, '('); + pgpa_output_relations(context, context->buf, scan->relids); + appendStringInfoChar(context->buf, ')'); + } + + /* For scans involving indexes, output index information. */ + if (strategy == PGPA_SCAN_INDEX) + { + Assert(IsA(plan, IndexScan)); + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_relation_name(context, ((IndexScan *) plan)->indexid); + } + else if (strategy == PGPA_SCAN_INDEX_ONLY) + { + Assert(IsA(plan, IndexOnlyScan)); + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_relation_name(context, + ((IndexOnlyScan *) plan)->indexid); + } + else if (strategy == PGPA_SCAN_BITMAP_HEAP) + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_bitmap_index_details(context, plan->lefttree); + } + } + + appendStringInfoChar(context->buf, ')'); + pgpa_maybe_linebreak(context->buf, context->wrap_column); +} + +/* + * Output information about which index or indexes power a BitmapHeapScan. + * + * We emit &&(i1 i2 i3) for a BitmapAnd between indexes i1, i2, and i3; + * and likewise ||(i1 i2 i3) for a similar BitmapOr operation. + */ +static void +pgpa_output_bitmap_index_details(pgpa_output_context *context, Plan *plan) +{ + char *operator; + List *bitmapplans; + bool first = true; + + if (IsA(plan, BitmapIndexScan)) + { + BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; + + pgpa_output_relation_name(context, bitmapindexscan->indexid); + return; + } + + if (IsA(plan, BitmapOr)) + { + operator = "||"; + bitmapplans = ((BitmapOr *) plan)->bitmapplans; + } + else if (IsA(plan, BitmapAnd)) + { + operator = "&&"; + bitmapplans = ((BitmapAnd *) plan)->bitmapplans; + } + else + elog(ERROR, "unexpected node type: %d", (int) nodeTag(plan)); + + appendStringInfo(context->buf, "%s(", operator); + foreach_ptr(Plan, child_plan, bitmapplans) + { + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + pgpa_output_bitmap_index_details(context, child_plan); + } + appendStringInfoChar(context->buf, ')'); +} + +/* + * Output a schema-qualified relation name. + */ +static void +pgpa_output_relation_name(pgpa_output_context *context, Oid relid) +{ + Oid nspoid = get_rel_namespace(relid); + char *relnamespace = get_namespace_name_or_temp(nspoid); + char *relname = get_rel_name(relid); + + appendStringInfoString(context->buf, quote_identifier(relnamespace)); + appendStringInfoChar(context->buf, '.'); + appendStringInfoString(context->buf, quote_identifier(relname)); +} + +/* + * Output advice for a List of pgpa_query_feature objects. + * + * All features must be of the type specified by the "type" argument. + */ +static void +pgpa_output_query_feature(pgpa_output_context *context, pgpa_qf_type type, + List *query_features) +{ + bool first = true; + + if (query_features == NIL) + return; + + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfo(context->buf, "%s(", + pgpa_cstring_query_feature_type(type)); + + foreach_ptr(pgpa_query_feature, qf, query_features) + { + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + + if (bms_membership(qf->relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, qf->relids); + else + { + appendStringInfoChar(context->buf, '('); + pgpa_output_relations(context, context->buf, qf->relids); + appendStringInfoChar(context->buf, ')'); + } + } + + appendStringInfoChar(context->buf, ')'); + pgpa_maybe_linebreak(context->buf, context->wrap_column); +} + +/* + * Output "simple" advice for a List of Bitmapset objects each of which + * contains one or more RTIs. + * + * By simple, we just mean that the advice emitted follows the most + * straightforward pattern: the strategy name, followed by a list of items + * separated by spaces and surrounded by parentheses. Individual items in + * the list are a single relation identifier for a Bitmapset that contains + * just one member, or a sub-list again separated by spaces and surrounded + * by parentheses for a Bitmapset with multiple members. Bitmapsets with + * no members probably shouldn't occur here, but if they do they'll be + * rendered as an empty sub-list. + */ +static void +pgpa_output_simple_strategy(pgpa_output_context *context, char *strategy, + List *relid_sets) +{ + bool first = true; + + if (relid_sets == NIL) + return; + + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfo(context->buf, "%s(", strategy); + + foreach_node(Bitmapset, relids, relid_sets) + { + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + + if (bms_membership(relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, relids); + else + { + appendStringInfoChar(context->buf, '('); + pgpa_output_relations(context, context->buf, relids); + appendStringInfoChar(context->buf, ')'); + } + } + + appendStringInfoChar(context->buf, ')'); + pgpa_maybe_linebreak(context->buf, context->wrap_column); +} + +/* + * Output NO_GATHER advice for all relations not appearing beneath any + * Gather or Gather Merge node. + */ +static void +pgpa_output_no_gather(pgpa_output_context *context, Bitmapset *relids) +{ + if (relids == NULL) + return; + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfoString(context->buf, "NO_GATHER("); + pgpa_output_relations(context, context->buf, relids); + appendStringInfoChar(context->buf, ')'); +} + +/* + * Output the identifiers for each RTI in the provided set. + * + * Identifiers are separated by spaces, and a line break is possible after + * each one. + */ +static void +pgpa_output_relations(pgpa_output_context *context, StringInfo buf, + Bitmapset *relids) +{ + int rti = -1; + bool first = true; + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + const char *rid_string = context->rid_strings[rti - 1]; + + if (rid_string == NULL) + elog(ERROR, "no identifier for RTI %d", rti); + + if (first) + { + first = false; + appendStringInfoString(buf, rid_string); + } + else + { + pgpa_maybe_linebreak(buf, context->wrap_column); + appendStringInfo(buf, " %s", rid_string); + } + } +} + +/* + * Get a C string that corresponds to the specified join strategy. + */ +static char * +pgpa_cstring_join_strategy(pgpa_join_strategy strategy) +{ + switch (strategy) + { + case JSTRAT_MERGE_JOIN_PLAIN: + return "MERGE_JOIN_PLAIN"; + case JSTRAT_MERGE_JOIN_MATERIALIZE: + return "MERGE_JOIN_MATERIALIZE"; + case JSTRAT_NESTED_LOOP_PLAIN: + return "NESTED_LOOP_PLAIN"; + case JSTRAT_NESTED_LOOP_MATERIALIZE: + return "NESTED_LOOP_MATERIALIZE"; + case JSTRAT_NESTED_LOOP_MEMOIZE: + return "NESTED_LOOP_MEMOIZE"; + case JSTRAT_HASH_JOIN: + return "HASH_JOIN"; + } + + pg_unreachable(); + return NULL; +} + +/* + * Get a C string that corresponds to the specified scan strategy. + */ +static char * +pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy) +{ + switch (strategy) + { + case PGPA_SCAN_ORDINARY: + return "ORDINARY_SCAN"; + case PGPA_SCAN_SEQ: + return "SEQ_SCAN"; + case PGPA_SCAN_BITMAP_HEAP: + return "BITMAP_HEAP_SCAN"; + case PGPA_SCAN_FOREIGN: + return "FOREIGN_JOIN"; + case PGPA_SCAN_INDEX: + return "INDEX_SCAN"; + case PGPA_SCAN_INDEX_ONLY: + return "INDEX_ONLY_SCAN"; + case PGPA_SCAN_PARTITIONWISE: + return "PARTITIONWISE"; + case PGPA_SCAN_TID: + return "TID_SCAN"; + } + + pg_unreachable(); + return NULL; +} + +/* + * Get a C string that corresponds to the specified scan strategy. + */ +static char * +pgpa_cstring_query_feature_type(pgpa_qf_type type) +{ + switch (type) + { + case PGPAQF_GATHER: + return "GATHER"; + case PGPAQF_GATHER_MERGE: + return "GATHER_MERGE"; + case PGPAQF_SEMIJOIN_NON_UNIQUE: + return "SEMIJOIN_NON_UNIQUE"; + case PGPAQF_SEMIJOIN_UNIQUE: + return "SEMIJOIN_UNIQUE"; + } + + + pg_unreachable(); + return NULL; +} + +/* + * Insert a line break into the StringInfoData, if needed. + * + * If wrap_column is zero or negative, this does nothing. Otherwise, we + * consider inserting a newline. We only insert a newline if the length of + * the last line in the buffer exceeds wrap_column, and not if we'd be + * inserting a newline at or before the beginning of the current line. + * + * The position at which the newline is inserted is simply wherever the + * buffer ended the last time this function was called. In other words, + * the caller is expected to call this function every time we reach a good + * place for a line break. + */ +static void +pgpa_maybe_linebreak(StringInfo buf, int wrap_column) +{ + char *trailing_nl; + int line_start; + int save_cursor; + + /* If line wrapping is disabled, exit quickly. */ + if (wrap_column <= 0) + return; + + /* + * Set line_start to the byte offset within buf->data of the first + * character of the current line, where the current line means the last + * one in the buffer. Note that line_start could be the offset of the + * trailing '\0' if the last character in the buffer is a line break. + */ + trailing_nl = strrchr(buf->data, '\n'); + if (trailing_nl == NULL) + line_start = 0; + else + line_start = (trailing_nl - buf->data) + 1; + + /* + * Remember that the current end of the buffer is a potential location to + * insert a line break on a future call to this function. + */ + save_cursor = buf->cursor; + buf->cursor = buf->len; + + /* If we haven't passed the wrap column, we don't need a newline. */ + if (buf->len - line_start <= wrap_column) + return; + + /* + * It only makes sense to insert a newline at a position later than the + * beginning of the current line. + */ + if (buf->cursor <= line_start) + return; + + /* Insert a newline at the previous cursor location. */ + enlargeStringInfo(buf, 1); + memmove(&buf->data[save_cursor] + 1, &buf->data[save_cursor], + buf->len - save_cursor); + ++buf->cursor; + buf->data[++buf->len] = '\0'; + buf->data[save_cursor] = '\n'; +} diff --git a/contrib/pg_plan_advice/pgpa_output.h b/contrib/pg_plan_advice/pgpa_output.h new file mode 100644 index 0000000000..47496d76f5 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_output.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * pgpa_output.h + * produce textual output from the results of a plan tree walk + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_output.c + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_OUTPUT_H +#define PGPA_OUTPUT_H + +#include "pgpa_identifier.h" +#include "pgpa_walker.h" + +extern void pgpa_output_advice(StringInfo buf, + pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_parser.y b/contrib/pg_plan_advice/pgpa_parser.y new file mode 100644 index 0000000000..4617e7f2f6 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_parser.y @@ -0,0 +1,337 @@ +%{ +/* + * Parser for plan advice + * + * Copyright (c) 2000-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_parser.y + */ + +#include "postgres.h" + +#include <float.h> +#include <math.h> + +#include "fmgr.h" +#include "nodes/miscnodes.h" +#include "utils/builtins.h" +#include "utils/float.h" + +#include "pgpa_ast.h" +#include "pgpa_parser.h" + +/* + * Bison doesn't allocate anything that needs to live across parser calls, + * so we can easily have it use palloc instead of malloc. This prevents + * memory leaks if we error out during parsing. + */ +#define YYMALLOC palloc +#define YYFREE pfree +%} + +/* BISON Declarations */ +%parse-param {List **result} +%parse-param {char **parse_error_msg_p} +%parse-param {yyscan_t yyscanner} +%lex-param {List **result} +%lex-param {char **parse_error_msg_p} +%lex-param {yyscan_t yyscanner} +%pure-parser +%expect 0 +%name-prefix="pgpa_yy" + +%union +{ + char *str; + int integer; + List *list; + pgpa_advice_item *item; + pgpa_advice_target *target; + pgpa_index_target *itarget; +} +%token <str> TOK_IDENT TOK_TAG_JOIN_ORDER TOK_TAG_BITMAP TOK_TAG_INDEX +%token <str> TOK_TAG_SIMPLE TOK_TAG_GENERIC +%token <integer> TOK_INTEGER +%token TOK_OR TOK_AND + +%type <integer> opt_ri_occurrence +%type <item> advice_item +%type <list> advice_item_list bitmap_sublist bitmap_target_list generic_target_list +%type <list> index_target_list join_order_target_list +%type <list> opt_partition simple_target_list +%type <str> identifier opt_plan_name +%type <target> generic_sublist join_order_sublist +%type <target> relation_identifier +%type <itarget> bitmap_target_item index_name + +%start parse_toplevel + +/* Grammar follows */ +%% + +parse_toplevel: advice_item_list + { + (void) yynerrs; /* suppress compiler warning */ + *result = $1; + } + ; + +advice_item_list: advice_item_list advice_item + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +advice_item: TOK_TAG_JOIN_ORDER '(' join_order_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + $$->tag = PGPA_TAG_JOIN_ORDER; + $$->targets = $3; + } + | TOK_TAG_INDEX '(' index_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + if (strcmp($1, "index_only_scan") == 0) + $$->tag = PGPA_TAG_INDEX_ONLY_SCAN; + else if (strcmp($1, "index_scan") == 0) + $$->tag = PGPA_TAG_INDEX_SCAN; + else + elog(ERROR, "tag parsing failed: %s", $1); + $$->targets = $3; + } + | TOK_TAG_BITMAP '(' bitmap_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + $$->tag = PGPA_TAG_BITMAP_HEAP_SCAN; + $$->targets = $3; + } + | TOK_TAG_SIMPLE '(' simple_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + if (strcmp($1, "no_gather") == 0) + $$->tag = PGPA_TAG_NO_GATHER; + else if (strcmp($1, "seq_scan") == 0) + $$->tag = PGPA_TAG_SEQ_SCAN; + else if (strcmp($1, "tid_scan") == 0) + $$->tag = PGPA_TAG_TID_SCAN; + else + elog(ERROR, "tag parsing failed: %s", $1); + $$->targets = $3; + } + | TOK_TAG_GENERIC '(' generic_target_list ')' + { + bool fail; + + $$ = palloc0_object(pgpa_advice_item); + $$->tag = pgpa_parse_advice_tag($1, &fail); + if (fail) + { + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "unrecognized advice tag"); + } + + if ($$->tag == PGPA_TAG_FOREIGN_JOIN) + { + foreach_ptr(pgpa_advice_target, target, $3) + { + if (target->ttype == PGPA_TARGET_IDENTIFIER || + list_length(target->children) == 1) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "FOREIGN_JOIN targets must contain more than one relation identifier"); + } + } + + $$->targets = $3; + } + ; + +relation_identifier: identifier opt_ri_occurrence opt_partition opt_plan_name + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_IDENTIFIER; + $$->rid.alias_name = $1; + $$->rid.occurrence = $2; + if (list_length($3) == 2) + { + $$->rid.partnsp = linitial($3); + $$->rid.partrel = lsecond($3); + } + else if ($3 != NIL) + $$->rid.partrel = linitial($3); + $$->rid.plan_name = $4; + } + ; + +index_name: identifier + { + $$ = palloc0_object(pgpa_index_target); + $$->itype = PGPA_INDEX_NAME; + $$->indname = $1; + } + | identifier '.' identifier + { + $$ = palloc0_object(pgpa_index_target); + $$->itype = PGPA_INDEX_NAME; + $$->indnamespace = $1; + $$->indname = $3; + } + ; + +opt_ri_occurrence: + '#' TOK_INTEGER + { + if ($2 <= 0) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "only positive occurrence numbers are permitted"); + $$ = $2; + } + | + { + /* The default occurrence number is 1. */ + $$ = 1; + } + ; + +identifier: TOK_IDENT + | TOK_TAG_JOIN_ORDER + | TOK_TAG_INDEX + | TOK_TAG_BITMAP + | TOK_TAG_SIMPLE + | TOK_TAG_GENERIC + ; + +/* + * When generating advice, we always schema-qualify the partition name, but + * when parsing advice, we accept a specification that lacks one. + */ +opt_partition: + '/' TOK_IDENT '.' TOK_IDENT + { $$ = list_make2($2, $4); } + | '/' TOK_IDENT + { $$ = list_make1($2); } + | + { $$ = NIL; } + ; + +opt_plan_name: + '@' TOK_IDENT + { $$ = $2; } + | + { $$ = NULL; } + ; + +bitmap_target_list: bitmap_target_list relation_identifier bitmap_target_item + { + $2->itarget = $3; + $$ = lappend($1, $2); + } + | + { $$ = NIL; } + ; + +bitmap_target_item: index_name + { $$ = $1; } + | TOK_OR '(' bitmap_sublist ')' + { + $$ = palloc0_object(pgpa_index_target); + $$->itype = PGPA_INDEX_OR; + $$->children = $3; + } + | TOK_AND '(' bitmap_sublist ')' + { + $$ = palloc0_object(pgpa_index_target); + $$->itype = PGPA_INDEX_AND; + $$->children = $3; + } + ; + +bitmap_sublist: bitmap_sublist bitmap_target_item + { $$ = lappend($1, $2); } + | bitmap_target_item + { $$ = list_make1($1); } + ; + +generic_target_list: generic_target_list relation_identifier + { $$ = lappend($1, $2); } + | generic_target_list generic_sublist + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +generic_sublist: '(' generic_target_list ')' + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_ORDERED_LIST; + $$->children = $2; + } + ; + +index_target_list: + index_target_list relation_identifier index_name + { + $2->itarget = $3; + $$ = lappend($1, $2); + } + | + { $$ = NIL; } + ; + +join_order_target_list: join_order_target_list relation_identifier + { $$ = lappend($1, $2); } + | join_order_target_list join_order_sublist + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +join_order_sublist: + '(' join_order_target_list ')' + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_ORDERED_LIST; + $$->children = $2; + } + | '{' simple_target_list '}' + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_UNORDERED_LIST; + $$->children = $2; + } + ; + +simple_target_list: simple_target_list relation_identifier + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +%% + +/* + * Parse an advice_string and return the resulting list of pgpa_advice_item + * objects. If a parse error occurs, instead return NULL. + * + * If the return value is NULL, *error_p will be set to the error message; + * otherwise, *error_p will be set to NULL. + */ +List * +pgpa_parse(const char *advice_string, char **error_p) +{ + yyscan_t scanner; + List *result; + char *error = NULL; + + pgpa_scanner_init(advice_string, &scanner); + pgpa_yyparse(&result, &error, scanner); + pgpa_scanner_finish(scanner); + + if (error != NULL) + { + *error_p = error; + return NULL; + } + + *error_p = NULL; + return result; +} diff --git a/contrib/pg_plan_advice/pgpa_planner.c b/contrib/pg_plan_advice/pgpa_planner.c new file mode 100644 index 0000000000..5c82f266b1 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_planner.c @@ -0,0 +1,1764 @@ +/*------------------------------------------------------------------------- + * + * pgpa_planner.c + * planner hooks + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_planner.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pg_plan_advice.h" +#include "pgpa_collector.h" +#include "pgpa_identifier.h" +#include "pgpa_output.h" +#include "pgpa_planner.h" +#include "pgpa_trove.h" +#include "pgpa_walker.h" + +#include "common/hashfn_unstable.h" +#include "nodes/makefuncs.h" +#include "optimizer/extendplan.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planner.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" + +#ifdef USE_ASSERT_CHECKING + +/* + * When assertions are enabled, we try generating relation identifiers during + * planning, saving them in a hash table, and then cross-checking them against + * the ones generated after planning is complete. + */ +typedef struct pgpa_ri_checker_key +{ + char *plan_name; + Index rti; +} pgpa_ri_checker_key; + +typedef struct pgpa_ri_checker +{ + pgpa_ri_checker_key key; + uint32 status; + const char *rid_string; +} pgpa_ri_checker; + +static uint32 pgpa_ri_checker_hash_key(pgpa_ri_checker_key key); + +static inline bool +pgpa_ri_checker_compare_key(pgpa_ri_checker_key a, pgpa_ri_checker_key b) +{ + if (a.rti != b.rti) + return false; + if (a.plan_name == NULL) + return (b.plan_name == NULL); + if (b.plan_name == NULL) + return false; + return strcmp(a.plan_name, b.plan_name) == 0; +} + +#define SH_PREFIX pgpa_ri_check +#define SH_ELEMENT_TYPE pgpa_ri_checker +#define SH_KEY_TYPE pgpa_ri_checker_key +#define SH_KEY key +#define SH_HASH_KEY(tb, key) pgpa_ri_checker_hash_key(key) +#define SH_EQUAL(tb, a, b) pgpa_ri_checker_compare_key(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +#endif + +typedef struct pgpa_planner_state +{ + ExplainState *explain_state; + bool generate_advice_string; + pgpa_trove *trove; + MemoryContext trove_cxt; + List *sj_unique_rels; + +#ifdef USE_ASSERT_CHECKING + pgpa_ri_check_hash *ri_check_hash; +#endif +} pgpa_planner_state; + +typedef struct pgpa_join_state +{ + /* Most-recently-considered outer rel. */ + RelOptInfo *outerrel; + + /* Most-recently-considered inner rel. */ + RelOptInfo *innerrel; + + /* + * Array of relation identifiers for all members of this joinrel, with + * outerrel idenifiers before innerrel identifiers. + */ + pgpa_identifier *rids; + + /* Number of outer rel identifiers. */ + int outer_count; + + /* Number of inner rel identifiers. */ + int inner_count; + + /* + * Trove lookup results. + * + * join_entries and rel_entries are arrays of entries, and join_indexes + * and rel_indexes are the integer offsets within those arrays of entries + * potentially relevant to us. The "join" fields correspond to a lookup + * using PGPA_TROVE_LOOKUP_JOIN and the "rel" fields to a lookup using + * PGPA_TROVE_LOOKUP_REL. + */ + pgpa_trove_entry *join_entries; + Bitmapset *join_indexes; + pgpa_trove_entry *rel_entries; + Bitmapset *rel_indexes; +} pgpa_join_state; + +/* Saved hook values */ +static get_relation_info_hook_type prev_get_relation_info = NULL; +static join_path_setup_hook_type prev_join_path_setup = NULL; +static joinrel_setup_hook_type prev_joinrel_setup = NULL; +static planner_setup_hook_type prev_planner_setup = NULL; +static planner_shutdown_hook_type prev_planner_shutdown = NULL; + +/* Other global variabes */ +static int planner_extension_id = -1; + +/* Function prototypes. */ +static void pgpa_get_relation_info(PlannerInfo *root, + Oid relationObjectId, + bool inhparent, + RelOptInfo *rel); +static void pgpa_joinrel_setup(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, + List *restrictlist); +static void pgpa_join_path_setup(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra); +static void pgpa_planner_setup(PlannerGlobal *glob, Query *parse, + const char *query_string, + double *tuple_fraction, + ExplainState *es); +static void pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse, + const char *query_string, PlannedStmt *pstmt); +static void pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, + char *plan_name, + pgpa_join_state *pjs); +static void pgpa_planner_apply_join_path_advice(JoinType jointype, + uint64 *pgs_mask_p, + char *plan_name, + pgpa_join_state *pjs); +static void pgpa_planner_apply_scan_advice(RelOptInfo *rel, + pgpa_trove_entry *scan_entries, + Bitmapset *scan_indexes, + pgpa_trove_entry *rel_entries, + Bitmapset *rel_indexes); +static uint64 pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag); +static bool pgpa_join_order_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry); +static bool pgpa_join_method_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method); +static bool pgpa_opaque_join_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method); + +static List *pgpa_planner_append_feedback(List *list, pgpa_trove *trove, + pgpa_trove_lookup_type type, + pgpa_identifier *rt_identifiers, + pgpa_plan_walker_context *walker); + +static inline void pgpa_ri_checker_save(pgpa_planner_state *pps, + PlannerInfo *root, + RelOptInfo *rel); +static void pgpa_ri_checker_validate(pgpa_planner_state *pps, + PlannedStmt *pstmt); + +/* + * Install planner-related hooks. + */ +void +pgpa_planner_install_hooks(void) +{ + planner_extension_id = GetPlannerExtensionId("pg_plan_advice"); + prev_get_relation_info = get_relation_info_hook; + get_relation_info_hook = pgpa_get_relation_info; + prev_joinrel_setup = joinrel_setup_hook; + joinrel_setup_hook = pgpa_joinrel_setup; + prev_join_path_setup = join_path_setup_hook; + join_path_setup_hook = pgpa_join_path_setup; + prev_planner_setup = planner_setup_hook; + planner_setup_hook = pgpa_planner_setup; + prev_planner_shutdown = planner_shutdown_hook; + planner_shutdown_hook = pgpa_planner_shutdown; +} + +/* + * Hook function for get_relation_info(). + * + * We can apply scan advice at this opint, and we also usee this as an + * opportunity to do range-table identifier cross-checking in assert-enabled + * builds. + */ +static void +pgpa_get_relation_info(PlannerInfo *root, Oid relationObjectId, + bool inhparent, RelOptInfo *rel) +{ + pgpa_planner_state *pps; + + /* Fetch our private state, set up by pgpa_planner_setup(). */ + pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id); + + /* Save details needed for range table identifier cross-checking. */ + if (pps != NULL) + pgpa_ri_checker_save(pps, root, rel); + + /* If query advice was provided, search for relevant entries. */ + if (pps != NULL && pps->trove != NULL) + { + pgpa_identifier rid; + pgpa_trove_result tresult_scan; + pgpa_trove_result tresult_rel; + + /* Search for scan advice and general rel advice. */ + pgpa_compute_identifier_by_rti(root, rel->relid, &rid); + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid, + &tresult_scan); + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, 1, &rid, + &tresult_rel); + + /* If relevant entries were found, apply them. */ + if (tresult_scan.indexes != NULL || tresult_rel.indexes != NULL) + pgpa_planner_apply_scan_advice(rel, + tresult_scan.entries, + tresult_scan.indexes, + tresult_rel.entries, + tresult_rel.indexes); + } + + /* Pass call to previous hook. */ + if (prev_get_relation_info) + (*prev_get_relation_info) (root, relationObjectId, inhparent, rel); +} + +/* + * Search for advice pertaining to a proposed join. + */ +static pgpa_join_state * +pgpa_get_join_state(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel) +{ + pgpa_planner_state *pps; + pgpa_join_state *pjs; + bool new_pjs = false; + + /* Fetch our private state, set up by pgpa_planner_setup(). */ + pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id); + if (pps == NULL || pps->trove == NULL) + { + /* No advice applies to this query, hence none to this joinrel. */ + return NULL; + } + + /* + * See whether we've previously associated a pgpa_join_state with this + * joinrel. If we have not, we need to try to construct one. If we have, + * then there are two cases: (a) if innerrel and outerrel are unchanged, + * we can simply use it, and (b) if they have changed, we need to rejigger + * the array of identifiers but can still skip the trove lookup. + */ + pjs = GetRelOptInfoExtensionState(joinrel, planner_extension_id); + if (pjs != NULL) + { + if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL) + { + /* + * If there's no potentially relevant advice, then the presence of + * this pgpa_join_state acts like a negative cache entry: it tells + * us not to bother searching the trove for advice, because we + * will not find any. + */ + return NULL; + } + + if (pjs->outerrel == outerrel && pjs->innerrel == innerrel) + { + /* No updates required, so just return. */ + /* XXX. Does this need to do something different under GEQO? */ + return pjs; + } + } + + /* + * If there's no pgpa_join_state yet, we need to allocate one. Trove keys + * will not get built for RTE_JOIN RTEs, so the array may end up being + * larger than needed. It's not worth trying to compute a perfectly + * accurate count here. + */ + if (pjs == NULL) + { + int pessimistic_count = bms_num_members(joinrel->relids); + + pjs = palloc0_object(pgpa_join_state); + pjs->rids = palloc_array(pgpa_identifier, pessimistic_count); + new_pjs = true; + } + + /* + * Either we just allocated a new pgpa_join_state, or the existing one + * needs reconfiguring for a new innerrel and outerrel. The required array + * size can't change, so we can overwrite the existing one. + */ + pjs->outerrel = outerrel; + pjs->innerrel = innerrel; + pjs->outer_count = + pgpa_compute_identifiers_by_relids(root, outerrel->relids, pjs->rids); + pjs->inner_count = + pgpa_compute_identifiers_by_relids(root, innerrel->relids, + pjs->rids + pjs->outer_count); + + /* + * If we allocated a new pgpa_join_state, search our trove of advice for + * relevant entries. The trove lookup will return the same results for + * every outerrel/innerrel combination, so we don't need to repeat that + * work every time. + */ + if (new_pjs) + { + pgpa_trove_result tresult; + + /* Find join entries. */ + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_JOIN, + pjs->outer_count + pjs->inner_count, + pjs->rids, &tresult); + pjs->join_entries = tresult.entries; + pjs->join_indexes = tresult.indexes; + + /* Find rel entries. */ + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, + pjs->outer_count + pjs->inner_count, + pjs->rids, &tresult); + pjs->rel_entries = tresult.entries; + pjs->rel_indexes = tresult.indexes; + + /* Now that the new pgpa_join_state is fully valid, save a pointer. */ + SetRelOptInfoExtensionState(joinrel, planner_extension_id, pjs); + + /* + * If there was no relevant advice found, just return NULL. This + * pgpa_join_state will stick around as a sort of negative cache + * entry, so that future calls for this same joinrel quickly return + * NULL. + */ + if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL) + return NULL; + } + + return pjs; +} + +/* + * Enforce any provided advice that is relevant to any method of implementing + * this join. + * + * Although we're passed the outerrel and innerrel here, those are just + * whatever values happened to prompt the creation of this joinrel; they + * shouldn't really influence our choice of what advice to apply. + */ +static void +pgpa_joinrel_setup(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, List *restrictlist) +{ + pgpa_join_state *pjs; + + Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE); + + /* Get our private state information for this join. */ + pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel); + + /* If there is relevant advice, call a helper function to apply it. */ + if (pjs != NULL) + pgpa_planner_apply_joinrel_advice(&joinrel->pgs_mask, + root->plan_name, + pjs); + + /* Pass call to previous hook. */ + if (prev_joinrel_setup) + (*prev_joinrel_setup) (root, joinrel, outerrel, innerrel, + sjinfo, restrictlist); +} + +/* + * Enforce any provided advice that is relevant to this particular method of + * implementing this particular join. + */ +static void +pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, JoinPathExtraData *extra) +{ + pgpa_join_state *pjs; + + Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE); + + /* + * If we're considering implementing a semijoin by making one side unique, + * make a note of it in the pgpa_planner_state. See comments for + * pgpa_sj_unique_rel for why we do this. + */ + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + { + pgpa_planner_state *pps; + RelOptInfo *uniquerel; + + uniquerel = jointype == JOIN_UNIQUE_OUTER ? outerrel : innerrel; + pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id); + if (pps->generate_advice_string) + { + bool found = false; + + /* Avoid adding duplicates. */ + foreach_ptr(pgpa_sj_unique_rel, ur, pps->sj_unique_rels) + { + /* + * We should always use the same pointer for the same plan + * name, so we need not use strcmp() here. + */ + if (root->plan_name == ur->plan_name && + bms_equal(uniquerel->relids, ur->relids)) + { + found = true; + break; + } + } + + /* If not a duplicate, append to the list. */ + if (!found) + { + pgpa_sj_unique_rel *ur = palloc_object(pgpa_sj_unique_rel); + + ur->plan_name = root->plan_name; + ur->relids = uniquerel->relids; + pps->sj_unique_rels = lappend(pps->sj_unique_rels, ur); + } + } + } + + /* Get our private state information for this join. */ + pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel); + + /* If there is relevant advice, call a helper function to apply it. */ + if (pjs != NULL) + pgpa_planner_apply_join_path_advice(jointype, + &extra->pgs_mask, + root->plan_name, + pjs); + + /* Pass call to previous hook. */ + if (prev_join_path_setup) + (*prev_join_path_setup) (root, joinrel, outerrel, innerrel, + jointype, extra); +} + +/* + * Carry out whatever setup work we need to do before planning. + */ +static void +pgpa_planner_setup(PlannerGlobal *glob, Query *parse, const char *query_string, + double *tuple_fraction, ExplainState *es) +{ + pgpa_trove *trove = NULL; + pgpa_planner_state *pps; + bool generate_advice_string = false; + bool needs_pps = false; + + /* + * Decide whether we need to generate an advice string. We must do this if + * at least one collector is enabled or if the user has requested it using + * the EXPLAIN (PLAN_ADVICE) option. + */ + generate_advice_string = (pg_plan_advice_local_collection_limit > 0 || + pg_plan_advice_shared_collection_limit > 0 || + pg_plan_advice_should_explain(es)); + if (generate_advice_string) + needs_pps = true; + + /* + * If any advice was provided, build a trove of advice for use during + * planning. + */ + if (pg_plan_advice_advice != NULL && pg_plan_advice_advice[0] != '\0') + { + List *advice_items; + char *error; + + /* + * Parsing shouldn't fail here, because we must have previously parsed + * successfully in pg_plan_advice_advice_check_hook, but if it does, + * emit a warning. + */ + advice_items = pgpa_parse(pg_plan_advice_advice, &error); + if (error) + elog(WARNING, "could not parse advice: %s", error); + + /* + * It's possible that the advice string was non-empty but contained no + * actual advice, e.g. it was all whitespace. + */ + if (advice_items != NIL) + { + trove = pgpa_build_trove(advice_items); + needs_pps = true; + } + } + +#ifdef USE_ASSERT_CHECKING + + /* + * If asserts are enabled, always build a private state object for + * cross-checks. + */ + needs_pps = true; +#endif + + /* + * We only create and initialize a private state object if it's needed for + * some purpose. That could be (1) recording that we will need to generate + * an advice string, (2) storing a trove of supplied advice, or (3) + * facilitating debugging cross-checks when asserts are enabled. + */ + if (needs_pps) + { + pps = palloc0_object(pgpa_planner_state); + pps->explain_state = es; + pps->generate_advice_string = generate_advice_string; + pps->trove = trove; +#ifdef USE_ASSERT_CHECKING + pps->ri_check_hash = + pgpa_ri_check_create(CurrentMemoryContext, 1024, NULL); +#endif + SetPlannerGlobalExtensionState(glob, planner_extension_id, pps); + } +} + +/* + * Carry out whatever work we want to do after planning is complete. + */ +static void +pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse, + const char *query_string, PlannedStmt *pstmt) +{ + pgpa_planner_state *pps; + pgpa_trove *trove = NULL; + ExplainState *es = NULL; + pgpa_plan_walker_context walker = {0}; /* placate compiler */ + bool do_advice_feedback; + bool generate_advice_string = false; + List *pgpa_items = NIL; + pgpa_identifier *rt_identifiers = NULL; + + /* Fetch our private state, set up by pgpa_planner_setup(). */ + pps = GetPlannerGlobalExtensionState(glob, planner_extension_id); + if (pps != NULL) + { + trove = pps->trove; + es = pps->explain_state; + generate_advice_string = pps->generate_advice_string; + } + + /* + * If any advice was specified, and if we're running under EXPLAIN, then + * we should try try to provide advice feedback. + * + * If we're trying to generate an advice string or if we're trying to + * provide advice feedback, then we will need to create range table + * identifiers. + */ + do_advice_feedback = (trove != NULL && es != NULL); + if (generate_advice_string || do_advice_feedback) + { + pgpa_plan_walker(&walker, pstmt, pps->sj_unique_rels); + rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt); + } + + /* Generate the advice string, if we need to do so. */ + if (generate_advice_string) + { + char *advice_string; + StringInfoData buf; + + /* Generate a textual advice string. */ + initStringInfo(&buf); + pgpa_output_advice(&buf, &walker, rt_identifiers); + advice_string = buf.data; + + /* If the advice string is empty, don't bother collecting it. */ + if (advice_string[0] != '\0') + pgpa_collect_advice(pstmt->queryId, query_string, advice_string); + + /* + * If we've gone to the trouble of generating an advice string, and if + * we're inside EXPLAIN, save the string so we don't need to + * regenerate it. + */ + if (es != NULL) + pgpa_items = lappend(pgpa_items, + makeDefElem("advice_string", + (Node *) makeString(advice_string), + -1)); + } + + /* + * If we are planning within EXPLAIN, make arrangements to allow EXPLAIN + * to tell the user what has happened with the provided advice. + * + * NB: If EXPLAIN is used on a prepared is a prepared statement, planning + * will have already happened happened without recording these details. We + * could consider adding a GUC to cater to that scenario; or we could do + * this work all the time, but that seems like too much overhead. + */ + if (do_advice_feedback) + { + List *feedback = NIL; + + /* + * Inject a Node-tree representation of all the trove-entry flags into + * the PlannedStmt. + */ + feedback = pgpa_planner_append_feedback(feedback, + trove, + PGPA_TROVE_LOOKUP_SCAN, + rt_identifiers, &walker); + feedback = pgpa_planner_append_feedback(feedback, + trove, + PGPA_TROVE_LOOKUP_JOIN, + rt_identifiers, &walker); + feedback = pgpa_planner_append_feedback(feedback, + trove, + PGPA_TROVE_LOOKUP_REL, + rt_identifiers, &walker); + + pgpa_items = lappend(pgpa_items, makeDefElem("feedback", + (Node *) feedback, + -1)); + } + + /* Push whatever data we're saving into the PlannedStmt. */ + if (pgpa_items != NIL) + pstmt->extension_state = + lappend(pstmt->extension_state, + makeDefElem("pg_plan_advice", (Node *) pgpa_items, -1)); + + /* + * If assertions are enabled, cross-check the generated range table + * identifiers. + */ + if (pps != NULL) + pgpa_ri_checker_validate(pps, pstmt); +} + +/* + * Enforce overall restrictions on a join relation that apply uniformly + * regardless of the choice of inner and outer rel. + */ +static void +pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, char *plan_name, + pgpa_join_state *pjs) +{ + int i = -1; + int flags; + bool gather_conflict = false; + uint64 gather_mask = 0; + Bitmapset *gather_partial_match = NULL; + Bitmapset *gather_full_match = NULL; + bool partitionwise_conflict = false; + int partitionwise_outcome = 0; + Bitmapset *partitionwise_partial_match = NULL; + Bitmapset *partitionwise_full_match = NULL; + + /* Iterate over all possibly-relevant advice. */ + while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &pjs->rel_entries[i]; + pgpa_itm_type itm; + bool full_match = false; + uint64 my_gather_mask = 0; + int my_partitionwise_outcome = 0; /* >0 yes, <0 no */ + + /* + * For GATHER and GATHER_MERGE, if the specified relations exactly + * match this joinrel, do whatever the advice says; otherwise, don't + * allow Gather or Gather Merge at this level. For NO_GATHER, there + * must be a single target relation which must be included in this + * joinrel, so just don't allow Gather or Gather Merge here, full + * stop. + */ + if (entry->tag == PGPA_TAG_NO_GATHER) + { + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + full_match = true; + } + else + { + int total_count; + + total_count = pjs->outer_count + pjs->inner_count; + itm = pgpa_identifiers_match_target(total_count, pjs->rids, + entry->target); + Assert(itm != PGPA_ITM_DISJOINT); + + if (itm == PGPA_ITM_EQUAL) + { + full_match = true; + if (entry->tag == PGPA_TAG_PARTITIONWISE) + my_partitionwise_outcome = 1; + else if (entry->tag == PGPA_TAG_GATHER) + my_gather_mask = PGS_GATHER; + else if (entry->tag == PGPA_TAG_GATHER_MERGE) + my_gather_mask = PGS_GATHER_MERGE; + else + elog(ERROR, "unexpected advice tag: %d", + (int) entry->tag); + } + else + { + if (entry->tag == PGPA_TAG_PARTITIONWISE) + { + my_partitionwise_outcome = -1; + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + } + else if (entry->tag == PGPA_TAG_GATHER || + entry->tag == PGPA_TAG_GATHER_MERGE) + { + my_partitionwise_outcome = -1; + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + } + else + elog(ERROR, "unexpected advice tag: %d", + (int) entry->tag); + } + } + + /* + * If we set my_gather_mask up above, then we (1) make a note if the + * advice conflicted, (2) remember the mask value, and (3) remember + * whether this was a full or partial match. + */ + if (my_gather_mask != 0) + { + if (gather_mask != 0 && gather_mask != my_gather_mask) + gather_conflict = true; + gather_mask = my_gather_mask; + if (full_match) + gather_full_match = bms_add_member(gather_full_match, i); + else + gather_partial_match = bms_add_member(gather_partial_match, i); + } + + /* + * Likewise, if we set my_partitionwise_outcome up above, then we (1) + * make a note if the advice conflicted, (2) remember what the desired + * outcome was, and (3) remember whether this was a full or partial + * match. + */ + if (my_partitionwise_outcome != 0) + { + if (partitionwise_outcome != 0 && + partitionwise_outcome != my_partitionwise_outcome) + partitionwise_conflict = true; + partitionwise_outcome = my_partitionwise_outcome; + if (full_match) + partitionwise_full_match = + bms_add_member(partitionwise_full_match, i); + else + partitionwise_partial_match = + bms_add_member(partitionwise_partial_match, i); + } + } + + /* + * Mark every Gather-related piece of advice as partially matched, and if + * the set of targets exactly matched this relation, fully matched. If + * there was a conflict, mark them all as conflicting. + */ + flags = PGPA_TE_MATCH_PARTIAL; + if (gather_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(pjs->rel_entries, gather_partial_match, flags); + flags |= PGPA_TE_MATCH_FULL; + pgpa_trove_set_flags(pjs->rel_entries, gather_full_match, flags); + + /* Likewise for partitionwise advice. */ + flags = PGPA_TE_MATCH_PARTIAL; + if (partitionwise_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(pjs->rel_entries, partitionwise_partial_match, flags); + flags |= PGPA_TE_MATCH_FULL; + pgpa_trove_set_flags(pjs->rel_entries, partitionwise_full_match, flags); + + /* If there is a non-conflicting gather specification, enforce it. */ + if (gather_mask != 0 && !gather_conflict) + { + *pgs_mask_p &= + ~(PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL); + *pgs_mask_p |= gather_mask; + } + + /* + * If there is a non-conflicting partitionwise specification, enforce. + * + * To force a partitionwise join, we disable all the ordinary means of + * performing a join, and instead only Append and MergeAppend paths here. + * To prevent one, we just disable Append and MergeAppend. Note that we + * must not unset PGS_CONSIDER_PARTITIONWISE even when we don't want a + * partitionwise join here, because we might want one at a higher level + * that is constructing using paths from this level. + */ + if (partitionwise_outcome != 0 && !partitionwise_conflict) + { + if (partitionwise_outcome > 0) + *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY) | + PGS_APPEND | PGS_MERGE_APPEND | PGS_CONSIDER_PARTITIONWISE; + else + *pgs_mask_p &= ~(PGS_APPEND | PGS_MERGE_APPEND); + } +} + +/* + * Enforce restrictions on the join order or join method. + * + * Note that, although it is possible to view PARTITIONWISE advice as + * controlling the join method, we can't enforce it here, because the code + * path where this executes only deals with join paths that are built directly + * from a single outer path and a single inner path. + */ +static void +pgpa_planner_apply_join_path_advice(JoinType jointype, uint64 *pgs_mask_p, + char *plan_name, + pgpa_join_state *pjs) +{ + int i = -1; + Bitmapset *jo_permit_indexes = NULL; + Bitmapset *jo_deny_indexes = NULL; + Bitmapset *jm_indexes = NULL; + bool jm_conflict = false; + uint32 join_mask = 0; + + /* Iterate over all possibly-relevant advice. */ + while ((i = bms_next_member(pjs->join_indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &pjs->join_entries[i]; + uint32 my_join_mask; + + /* Handle join order advice. */ + if (entry->tag == PGPA_TAG_JOIN_ORDER) + { + if (pgpa_join_order_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry)) + jo_permit_indexes = bms_add_member(jo_permit_indexes, i); + else + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + continue; + } + + /* Handle join strategy advice. */ + my_join_mask = pgpa_join_strategy_mask_from_advice_tag(entry->tag); + if (my_join_mask != 0) + { + bool permit; + bool restrict_method; + + if (entry->tag == PGPA_TAG_FOREIGN_JOIN) + permit = pgpa_opaque_join_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry, + &restrict_method); + else + permit = pgpa_join_method_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry, + &restrict_method); + if (!permit) + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + else if (restrict_method) + { + jo_permit_indexes = bms_add_member(jo_permit_indexes, i); + jm_indexes = bms_add_member(jo_permit_indexes, i); + if (join_mask != 0 && join_mask != my_join_mask) + jm_conflict = true; + join_mask = my_join_mask; + } + continue; + } + + /* Handle semijoin uniqueness advice. */ + if (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE || + entry->tag == PGPA_TAG_SEMIJOIN_NON_UNIQUE) + { + bool advice_unique; + bool jt_unique; + bool jt_non_unique; + bool restrict_method; + + /* Advice wants to unique-ify and use a regular join? */ + advice_unique = (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE); + + /* Planner is trying to unique-ify and use a regular join? */ + jt_unique = (jointype == JOIN_UNIQUE_INNER || + jointype == JOIN_UNIQUE_OUTER); + + /* Planner is trying a semi-join, without unique-ifying? */ + jt_non_unique = (jointype == JOIN_SEMI || + jointype == JOIN_RIGHT_SEMI); + + /* + * These advice tags behave very much like join method advice, in + * that they want the inner side of the semijoin to match the + * relations listed in the advice. Hence, we test whether join + * method advice would enforce a join order restriction here, and + * disallow the join if not. + * + * XXX. Think harder about right semijoins. + */ + if (!pgpa_join_method_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry, + &restrict_method)) + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + else if (restrict_method) + { + if (!jt_unique && !jt_non_unique) + { + /* + * This doesn't seem to be a semijoin to which SJ_UNIQUE + * or SJ_NON_UNIQUE can be applied. + */ + entry->flags |= PGPA_TE_INAPPLICABLE; + } + else if (advice_unique != jt_unique) + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + else + jo_permit_indexes = bms_add_member(jo_permit_indexes, i); + } + continue; + } + } + + /* + * If the advice indicates both that this join order is permissible and + * also that it isn't, then mark advice related to the join order as + * conflicting. + */ + if (jo_permit_indexes != NULL && jo_deny_indexes != NULL) + { + pgpa_trove_set_flags(pjs->join_entries, jo_permit_indexes, + PGPA_TE_CONFLICTING); + pgpa_trove_set_flags(pjs->join_entries, jo_deny_indexes, + PGPA_TE_CONFLICTING); + } + + /* + * If more than one join method specification is relevant here and they + * differ, mark them all as conflicting. + */ + if (jm_conflict) + pgpa_trove_set_flags(pjs->join_entries, jm_indexes, + PGPA_TE_CONFLICTING); + + /* + * If we were advised to deny this join order, then do so. However, if we + * were also advised to permit it, then do nothing, since the advice + * conflicts. + */ + if (jo_deny_indexes != NULL && jo_permit_indexes == NULL) + *pgs_mask_p = 0; + + /* + * If we were advised to restrict the join method, then do so. However, if + * we got conflicting join method advice or were also advised to reject + * this join order completely, then instead do nothing. + */ + if (join_mask != 0 && !jm_conflict && jo_deny_indexes == NULL) + *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY) | join_mask; +} + +/* + * Translate an advice tag into a path generation strategy mask. + * + * This function can be called with tag types that don't represent join + * strategies. In such cases, we just return 0, which can't be confused with + * a valid mask. + */ +static uint64 +pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag) +{ + switch (tag) + { + case PGPA_TAG_FOREIGN_JOIN: + return PGS_FOREIGNJOIN; + case PGPA_TAG_MERGE_JOIN_PLAIN: + return PGS_MERGEJOIN_PLAIN; + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + return PGS_MERGEJOIN_MATERIALIZE; + case PGPA_TAG_NESTED_LOOP_PLAIN: + return PGS_NESTLOOP_PLAIN; + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + return PGS_NESTLOOP_MATERIALIZE; + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + return PGS_NESTLOOP_MEMOIZE; + case PGPA_TAG_HASH_JOIN: + return PGS_HASHJOIN; + default: + return 0; + } +} + +/* + * Does a certain item of join order advice permit a certain join? + */ +static bool +pgpa_join_order_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry) +{ + bool loop = true; + bool sublist = false; + int length; + int outer_length; + pgpa_advice_target *target = entry->target; + pgpa_advice_target *prefix_target; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + /* + * Find the innermost sublist that contains all keys; if no sublist does, + * then continue processing with the toplevel list. + * + * For example, if the advice says JOIN_ORDER(t1 t2 (t3 t4 t5)), then we + * should evaluate joins that only involve t3, t4, and/or t5 against the + * (t3 t4 t5) sublist, and others against the full list. + * + * Note that (1) outermost sublist is always ordered and (2) whenever we + * zoom into an unordered sublist, we instantly accept the proposed join. + * If the advice says JOIN_ORDER(t1 t2 {t3 t4 t5}), any approach to + * joining t3, t4, and/or t5 is acceptable. + */ + while (loop) + { + Assert(target->ttype == PGPA_TARGET_ORDERED_LIST); + + loop = false; + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + pgpa_itm_type itm; + + if (child_target->ttype == PGPA_TARGET_IDENTIFIER) + continue; + + itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, child_target); + if (itm == PGPA_ITM_EQUAL || itm == PGPA_ITM_KEYS_ARE_SUBSET) + { + if (child_target->ttype == PGPA_TARGET_ORDERED_LIST) + { + target = child_target; + sublist = true; + loop = true; + break; + } + else + { + Assert(child_target->ttype == PGPA_TARGET_UNORDERED_LIST); + return true; + } + } + } + } + + /* + * Try to find a prefix of the selected join order list that is exactly + * equal to the outer side of the proposed join. + */ + length = list_length(target->children); + prefix_target = palloc0_object(pgpa_advice_target); + prefix_target->ttype = PGPA_TARGET_ORDERED_LIST; + for (outer_length = 1; outer_length <= length; ++outer_length) + { + pgpa_itm_type itm; + + /* Avoid leaking memory in every loop iteration. */ + if (prefix_target->children != NULL) + list_free(prefix_target->children); + prefix_target->children = list_copy_head(target->children, + outer_length); + + /* Search, hoping to find an exact match. */ + itm = pgpa_identifiers_match_target(outer_count, rids, prefix_target); + if (itm == PGPA_ITM_EQUAL) + break; + + /* + * If the prefix of the join order list that we're considering + * includes some but not all of the outer rels, we can make the prefix + * longer to find an exact match. But the advice hasn't mentioned + * everything that's part of our outer rel yet, but has mentioned + * things that are not, then this join doesn't match the join order + * list. + */ + if (itm != PGPA_ITM_TARGETS_ARE_SUBSET) + return false; + } + + /* + * If the previous looped stopped before the prefix_target included the + * entire join order list, then the next member of the join order list + * must exactly match the inner side of the join. + * + * Example: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), if the outer side of the + * current join includes only t1, then the inner side must be exactly t2; + * if the outer side includes both t1 and t2, then the inner side must + * include exactly t3, t4, and t5. + */ + if (outer_length < length) + { + pgpa_advice_target *inner_target; + pgpa_itm_type itm; + + inner_target = list_nth(target->children, outer_length); + + itm = pgpa_identifiers_match_target(inner_count, rids + outer_count, + inner_target); + + /* + * Before returning, consider whether we need to mark this entry as + * fully matched. If we found every item but one on the lefthand side + * of the join and the last item on the righthand side of the join, + * then the answer is yes. + */ + if (outer_length + 1 == length && itm == PGPA_ITM_EQUAL) + entry->flags |= PGPA_TE_MATCH_FULL; + + return (itm == PGPA_ITM_EQUAL); + } + + /* + * If we get here, then the outer side of the join includes the entirety + * of the join order list. In this case, we behave differently depending + * on whether we're looking at the top-level join order list or sublist. + * At the top-level, we treat the specified list as mandating that the + * actual join order has the given list as a prefix, but a sublist + * requires an exact match. + * + * Exmaple: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), we must start by joining + * all five of those relations and in that sequence, but once that is + * done, it's OK to join any other rels that are part of the join problem. + * This allows a user to specify the driving table and perhaps the first + * few things to which it should be joined while leaving the rest of the + * join order up the optimizer. But it seems like it would be surprising, + * given that specification, if the user could add t6 to the (t3 t4 t5) + * sub-join, so we don't allow that. If we did want to allow it, the logic + * earlier in this function would require substantial adjustment: we could + * allow the t3-t4-t5-t6 join to be built here, but the next step of + * joining t1-t2 to the result would still be rejected. + */ + return !sublist; +} + +/* + * Does a certain item of join method advice permit a certain join? + * + * Advice such as HASH_JOIN((x y)) means that there should be a hash join with + * exactly x and y on the inner side. Obviously, this means that if we are + * considering a join with exactly x and y on the inner side, we should enforce + * the use of a hash join. However, it also means that we must reject some + * incompatible join orders entirely. For example, a join with exactly x + * and y on the outer side shouldn't be allowed, because such paths might win + * over the advice-driven path on cost. + * + * To accommodate these requirements, this function returns true if the join + * should be allowed and false if it should not. Furthermore, *restrict_method + * is set to true if the join method should be enforced and false if not. + */ +static bool +pgpa_join_method_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method) +{ + pgpa_advice_target *target = entry->target; + pgpa_itm_type inner_itm; + pgpa_itm_type outer_itm; + pgpa_itm_type join_itm; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + *restrict_method = false; + + /* + * If our inner rel mentions exactly the same relations as the advice + * target, allow the join and enforce the join method restriction. + * + * If our inner rel mentions a superset of the target relations, allow the + * join. The join we care about has already taken place, and this advice + * imposes no further restrictions. + */ + inner_itm = pgpa_identifiers_match_target(inner_count, + rids + outer_count, + target); + if (inner_itm == PGPA_ITM_EQUAL) + { + entry->flags |= PGPA_TE_MATCH_FULL; + *restrict_method = true; + return true; + } + else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + return true; + + /* + * If our outer rel mentions a supserset of the relations in the advice + * target, no restrictions apply. The join we care has already taken + * place, and this advice imposes no further restrictions. + * + * On the other hand, if our outer rel mentions exactly the relations + * mentioned in the advice target, the planner is trying to reverse the + * sides of the join as compared with our desired outcome. Reject that. + */ + outer_itm = pgpa_identifiers_match_target(outer_count, + rids, target); + if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + return true; + else if (outer_itm == PGPA_ITM_EQUAL) + return false; + + /* + * If the advice target mentions only a single relation, the test below + * cannot ever pass, so save some work by exiting now. + */ + if (target->ttype == PGPA_TARGET_IDENTIFIER) + return false; + + /* + * If everything in the joinrel is appears in the advice target, we're + * below the level of the join we want to control. + * + * For example, HASH_JOIN((x y)) doesn't restrict how x and y can be + * joined. + * + * This lookup shouldn't return PGPA_ITM_DISJOINT, because any such advice + * should not have been returned from the trove in the first place. + */ + join_itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, target); + Assert(join_itm != PGPA_ITM_DISJOINT); + if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET || + join_itm == PGPA_ITM_EQUAL) + return true; + + /* + * We've already permitted all allowable cases, so reject this. + * + * If we reach this point, then the advice overlaps with this join but + * isn't entirely contained within either side, and there's also at least + * one relation present in the join that isn't mentioned by the advice. + * + * For instance, in the HASH_JOIN((x y)) example, we would reach here if x + * were on one side of the join, y on the other, and at least one of the + * two sides also included some other relation, say t. In that case, + * accepting this join would allow the (x y t) joinrel to contain + * non-disabled paths that do not put (x y) on the inner side of a hash + * join; we could instead end up with something like (x JOIN t) JOIN y. + */ + return false; +} + +/* + * Does advice concerning an opaque join permit a certain join? + * + * By an opaque join, we mean one where the exact mechanism by which the + * join is performed is not visible to PostgreSQL. Currently this is the + * case only for foreign joins: FOREIGN_JOIN((x y z)) means that x, y, and + * z are joined on the remote side, but we know nothing about the join order + * or join methods used over there. + */ +static bool +pgpa_opaque_join_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method) +{ + pgpa_advice_target *target = entry->target; + pgpa_itm_type join_itm; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + *restrict_method = false; + + join_itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, target); + if (join_itm == PGPA_ITM_EQUAL) + { + /* + * We have an exact match, and should therefore allow the join and + * enforce the use of the relevant opaque join method. + */ + entry->flags |= PGPA_TE_MATCH_FULL; + *restrict_method = true; + return true; + } + + if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET || + join_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + { + /* + * If join_itm == PGPA_ITM_TARGETS_ARE_SUBSET, then the join we care + * about has already taken place and no further restrictions apply. + * + * If join_itm == PGPA_ITM_KEYS_ARE_SUBSET, we're still building up to + * the join we care about and have not introduced any extraneous + * relations not named in the advice. Note that ForeignScan paths for + * joins are built up from ForeignScan paths from underlying joins and + * scans, so we must not disable this join when considering a subset + * of the relations we ultimately want. + */ + return true; + } + + /* + * The advice overlaps the join, but at least one relation is present in + * the join that isn't mentioned by the advice. We want to disable such + * paths so that we actually push down the join as intended. + */ + return false; +} + +/* + * Apply scan advice to a RelOptInfo. + * + * XXX. For bitmap heap scans, we're just ignoring the index information from + * the advice. That's not cool. + */ +static void +pgpa_planner_apply_scan_advice(RelOptInfo *rel, + pgpa_trove_entry *scan_entries, + Bitmapset *scan_indexes, + pgpa_trove_entry *rel_entries, + Bitmapset *rel_indexes) +{ + bool gather_conflict = false; + Bitmapset *gather_partial_match = NULL; + Bitmapset *gather_full_match = NULL; + int i = -1; + pgpa_trove_entry *scan_entry = NULL; + int flags; + bool scan_type_conflict = false; + Bitmapset *scan_type_indexes = NULL; + Bitmapset *scan_type_rel_indexes = NULL; + uint64 gather_mask = 0; + uint64 scan_type = 0; + + /* Scrutinize available scan advice. */ + while ((i = bms_next_member(scan_indexes, i)) >= 0) + { + pgpa_trove_entry *my_entry = &scan_entries[i]; + uint64 my_scan_type = 0; + + /* Translate our advice tags to a scan strategy advice value. */ + if (my_entry->tag == PGPA_TAG_BITMAP_HEAP_SCAN) + my_scan_type = PGS_BITMAPSCAN; + else if (my_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN) + my_scan_type = PGS_INDEXONLYSCAN | PGS_CONSIDER_INDEXONLY; + else if (my_entry->tag == PGPA_TAG_INDEX_SCAN) + my_scan_type = PGS_INDEXSCAN; + else if (my_entry->tag == PGPA_TAG_SEQ_SCAN) + my_scan_type = PGS_SEQSCAN; + else if (my_entry->tag == PGPA_TAG_TID_SCAN) + my_scan_type = PGS_TIDSCAN; + + /* + * If this is understandable scan advice, hang on to the entry, the + * inferred scan type type, and the index at which we found it. + * + * Also make a note if we see conflicting scan type advice. Note that + * we regard two index specifications as conflicting unless they match + * exactly. In theory, perhaps we could regard INDEX_SCAN(a c) and + * INDEX_SCAN(a b.c) as non-conflicting if it happens that the only + * index named c is in schema b, but it doesn't seem worth the code. + */ + if (my_scan_type != 0) + { + if (scan_type != 0 && scan_type != my_scan_type) + scan_type_conflict = true; + if (!scan_type_conflict && scan_entry != NULL && + my_entry->target->itarget != NULL && + scan_entry->target->itarget != NULL && + !pgpa_index_targets_equal(scan_entry->target->itarget, + my_entry->target->itarget)) + scan_type_conflict = true; + scan_entry = my_entry; + scan_type = my_scan_type; + scan_type_indexes = bms_add_member(scan_type_indexes, i); + } + } + + /* Scrutinize available gather-related and partitionwise advice. */ + i = -1; + while ((i = bms_next_member(rel_indexes, i)) >= 0) + { + pgpa_trove_entry *my_entry = &rel_entries[i]; + uint64 my_gather_mask = 0; + bool just_one_rel; + + just_one_rel = my_entry->target->ttype == PGPA_TARGET_IDENTIFIER + || list_length(my_entry->target->children) == 1; + + /* + * PARTITIONWISE behaves like a scan type, except that if there's more + * than one relation targeted, it has no effect at this level. + */ + if (my_entry->tag == PGPA_TAG_PARTITIONWISE) + { + if (just_one_rel) + { + const uint64 my_scan_type = PGS_APPEND | PGS_MERGE_APPEND; + + if (scan_type != 0 && scan_type != my_scan_type) + scan_type_conflict = true; + scan_entry = my_entry; + scan_type = my_scan_type; + scan_type_rel_indexes = + bms_add_member(scan_type_rel_indexes, i); + } + continue; + } + + /* + * GATHER and GATHER_MERGE applied to a single rel mean that we should + * use the correspondings strategy here, while applying either to more + * than one rel means we should not use those strategies here, but + * rather at the level of the joinrel that corresponds to what was + * specified. NO_GATHER can only be applied to single rels. + * + * Note that setting PGS_CONSIDER_NONPARTIAL in my_gather_mask is + * equivalent to allowing the non-use of either form of Gather here. + */ + if (my_entry->tag == PGPA_TAG_GATHER || + my_entry->tag == PGPA_TAG_GATHER_MERGE) + { + if (!just_one_rel) + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + else if (my_entry->tag == PGPA_TAG_GATHER) + my_gather_mask = PGS_GATHER; + else + my_gather_mask = PGS_GATHER_MERGE; + } + else if (my_entry->tag == PGPA_TAG_NO_GATHER) + { + Assert(just_one_rel); + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + } + + /* + * If we set my_gather_mask up above, then we (1) make a note if the + * advice conflicted, (2) remember the mask value, and (3) remember + * whether this was a full or partial match. + */ + if (my_gather_mask != 0) + { + if (gather_mask != 0 && gather_mask != my_gather_mask) + gather_conflict = true; + gather_mask = my_gather_mask; + if (just_one_rel) + gather_full_match = bms_add_member(gather_full_match, i); + else + gather_partial_match = bms_add_member(gather_partial_match, i); + } + } + + /* Enforce choice of index. */ + if (scan_entry != NULL && !scan_type_conflict && + (scan_entry->tag == PGPA_TAG_INDEX_SCAN || + scan_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN)) + { + pgpa_index_target *itarget = scan_entry->target->itarget; + IndexOptInfo *matched_index = NULL; + + Assert(itarget->itype == PGPA_INDEX_NAME); + + foreach_node(IndexOptInfo, index, rel->indexlist) + { + char *relname = get_rel_name(index->indexoid); + Oid nspoid = get_rel_namespace(index->indexoid); + char *relnamespace = get_namespace_name(nspoid); + + if (strcmp(itarget->indname, relname) == 0 && + (itarget->indnamespace == NULL || + strcmp(itarget->indnamespace, relnamespace) == 0)) + { + matched_index = index; + break; + } + } + + if (matched_index == NULL) + { + /* Don't force the scan type if the index doesn't exist. */ + scan_type = 0; + + /* Mark advice as inapplicable. */ + pgpa_trove_set_flags(scan_entries, scan_type_indexes, + PGPA_TE_INAPPLICABLE); + } + else + { + /* Retain this index and discard the rest. */ + rel->indexlist = list_make1(matched_index); + } + } + + /* + * Mark all the scan method entries as fully matched; and if they specify + * different things, mark them all as conflicting. + */ + flags = PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL; + if (scan_type_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(scan_entries, scan_type_indexes, flags); + pgpa_trove_set_flags(rel_entries, scan_type_rel_indexes, flags); + + /* + * Mark every Gather-related piece of advice as partially matched. Mark + * the ones that included this relation as a target by itself as fully + * matched. If there was a conflict, mark them all as conflicting. + */ + flags = PGPA_TE_MATCH_PARTIAL; + if (gather_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(rel_entries, gather_partial_match, flags); + flags |= PGPA_TE_MATCH_FULL; + pgpa_trove_set_flags(rel_entries, gather_full_match, flags); + + /* If there is a non-conflicting scan specification, enforce it. */ + if (scan_type != 0 && !scan_type_conflict) + { + rel->pgs_mask &= + ~(PGS_SCAN_ANY | PGS_APPEND | PGS_MERGE_APPEND | + PGS_CONSIDER_INDEXONLY); + rel->pgs_mask |= scan_type; + } + + /* If there is a non-conflicting gather specification, enforce it. */ + if (gather_mask != 0 && !gather_conflict) + { + rel->pgs_mask &= + ~(PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL); + rel->pgs_mask |= gather_mask; + } +} + +/* + * Add feedback entries to for one trove slice to the provided list and + * return the resulting list. + * + * Feedback entries are generated from the trove entry's flags. It's assumed + * that the caller has already set all relevant flags with the exception of + * PGPA_TE_FAILED. We set that flag here if appropriate. + */ +static List * +pgpa_planner_append_feedback(List *list, pgpa_trove *trove, + pgpa_trove_lookup_type type, + pgpa_identifier *rt_identifiers, + pgpa_plan_walker_context *walker) +{ + pgpa_trove_entry *entries; + int nentries; + StringInfoData buf; + + initStringInfo(&buf); + pgpa_trove_lookup_all(trove, type, &entries, &nentries); + for (int i = 0; i < nentries; ++i) + { + pgpa_trove_entry *entry = &entries[i]; + DefElem *item; + + /* + * If this entry was fully matched, check whether generating advice + * from this plan would produce such an entry. If not, label the entry + * as failed. + */ + if ((entry->flags & PGPA_TE_MATCH_FULL) != 0 && + !pgpa_walker_would_advise(walker, rt_identifiers, + entry->tag, entry->target)) + entry->flags |= PGPA_TE_FAILED; + + item = makeDefElem(pgpa_cstring_trove_entry(entry), + (Node *) makeInteger(entry->flags), -1); + list = lappend(list, item); + } + + return list; +} + +#ifdef USE_ASSERT_CHECKING + +/* + * Fast hash function for a key consisting of an RTI and plan name. + */ +static uint32 +pgpa_ri_checker_hash_key(pgpa_ri_checker_key key) +{ + fasthash_state hs; + int sp_len; + + fasthash_init(&hs, 0); + + hs.accum = key.rti; + fasthash_combine(&hs); + + /* plan_name can be NULL */ + if (key.plan_name == NULL) + sp_len = 0; + else + sp_len = fasthash_accum_cstring(&hs, key.plan_name); + + /* hashfn_unstable.h recommends using string length as tweak */ + return fasthash_final32(&hs, sp_len); +} + +#endif + +/* + * Save the range table identifier for one relation for future cross-checking. + */ +static void +pgpa_ri_checker_save(pgpa_planner_state *pps, PlannerInfo *root, + RelOptInfo *rel) +{ +#ifdef USE_ASSERT_CHECKING + pgpa_ri_checker_key key; + pgpa_ri_checker *check; + pgpa_identifier rid; + const char *rid_string; + bool found; + + key.rti = bms_singleton_member(rel->relids); + key.plan_name = root->plan_name; + pgpa_compute_identifier_by_rti(root, key.rti, &rid); + rid_string = pgpa_identifier_string(&rid); + check = pgpa_ri_check_insert(pps->ri_check_hash, key, &found); + Assert(!found || strcmp(check->rid_string, rid_string) == 0); + check->rid_string = rid_string; +#endif +} + +/* + * Validate that the range table identifiers we were able to generate during + * planning match the ones we generated from the final plan. + */ +static void +pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt) +{ +#ifdef USE_ASSERT_CHECKING + pgpa_identifier *rt_identifiers; + pgpa_ri_check_iterator it; + pgpa_ri_checker *check; + + /* Create identifiers from the planned statement. */ + rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt); + + /* Iterate over identifiers created during planning, so we can compare. */ + pgpa_ri_check_start_iterate(pps->ri_check_hash, &it); + while ((check = pgpa_ri_check_iterate(pps->ri_check_hash, &it)) != NULL) + { + int rtoffset = 0; + const char *rid_string; + Index flat_rti; + + /* + * If there's no plan name associated with this entry, then the + * rtoffset is 0. Otherwise, we can search the SubPlanRTInfo list to + * find the rtoffset. + */ + if (check->key.plan_name != NULL) + { + foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos) + { + /* + * If rtinfo->dummy is set, then the subquery's range table + * will only have been partially copied to the final range + * table. Specifically, only RTE_RELATION entries and + * RTE_SUBQUERY entries that were once RTE_RELATION entries + * will be copied, as per add_rtes_to_flat_rtable. Therefore, + * there's no fixed rtoffset that we can apply to the RTIs + * used during planning to locate the corresponding relations + * in the final rtable. + * + * With more complex logic, we could work around that problem + * by remembering the whole contents of the subquery's rtable + * during planning, determining which of those would have been + * copied to the final rtable, and matching them up. But it + * doesn't seem like a worthwhile endeavor for right now, + * because RTIs from such subqueries won't appear in the plan + * tree itself, just in the range table. Hence, we can neither + * generate nor accept advice for them. + */ + if (strcmp(check->key.plan_name, rtinfo->plan_name) == 0 + && !rtinfo->dummy) + { + rtoffset = rtinfo->rtoffset; + Assert(rtoffset > 0); + break; + } + } + + /* + * It's not an error if we don't find the plan name: that just + * means that we planned a subplan by this name but it ended up + * being a dummy subplan and so wasn't included in the final plan + * tree. + */ + if (rtoffset == 0) + continue; + } + + /* + * check->key.rti is the RTI that we saw prior to range-table + * flattening, so we must add the appropriate RT offset to get the + * final RTI. + */ + flat_rti = check->key.rti + rtoffset; + Assert(flat_rti <= list_length(pstmt->rtable)); + + /* Assert that the string we compute now matches the previous one. */ + rid_string = pgpa_identifier_string(&rt_identifiers[flat_rti - 1]); + Assert(strcmp(rid_string, check->rid_string) == 0); + } +#endif +} diff --git a/contrib/pg_plan_advice/pgpa_planner.h b/contrib/pg_plan_advice/pgpa_planner.h new file mode 100644 index 0000000000..7d40b910b0 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_planner.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * + * pgpa_planner.h + * planner hooks + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_planner.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_PLANNER_H +#define PGPA_PLANNER_H + +extern void pgpa_planner_install_hooks(void); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_scan.c b/contrib/pg_plan_advice/pgpa_scan.c new file mode 100644 index 0000000000..4156f5209a --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_scan.c @@ -0,0 +1,303 @@ +/*------------------------------------------------------------------------- + * + * pgpa_scan.c + * analysis of scans in Plan trees + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_scan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pgpa_scan.h" +#include "pgpa_walker.h" + +#include "nodes/parsenodes.h" +#include "parser/parsetree.h" + +static pgpa_scan *pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan, + pgpa_scan_strategy strategy, + Bitmapset *relids, + bool needs_no_gather); + + +static RTEKind unique_nonjoin_rtekind(Bitmapset *relids, List *rtable); + +/* + * Build a pgpa_scan object for a Plan node and update the plan walker + * context as appopriate. If this is an Append or MergeAppend scan, also + * build pgpa_scan for any scans that were consolidated into this one by + * Append/MergeAppend pull-up. + * + * If there is at least one ElidedNode for this plan node, pass the uppermost + * one as elided_node, else pass NULL. + * + * Set the 'beneath_any_gather' node if we are underneath a Gather or + * Gather Merge node. + * + * Set the 'within_join_problem' flag if we're inside of a join problem and + * not otherwise. + */ +pgpa_scan * +pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan, + ElidedNode *elided_node, + bool beneath_any_gather, bool within_join_problem) +{ + pgpa_scan_strategy strategy = PGPA_SCAN_ORDINARY; + Bitmapset *relids = NULL; + int rti = -1; + bool needs_no_gather = !beneath_any_gather; + List *child_append_relid_sets = NIL; + + if (elided_node != NULL) + { + NodeTag elided_type = elided_node->elided_type; + + /* + * If setrefs processing elided an Append or MergeAppend node that had + * only one surviving child, then this is a partitionwise "scan" -- + * which may really be a partitionwise join, but there's no need to + * distinguish. + * + * If it's a trivial SubqueryScan that was elided, then this is an + * "ordinary" scan i.e. one for which we need to generate advice + * because the planner has not made any meaningful choice. + */ + relids = elided_node->relids; + if (elided_type == T_Append || elided_type == T_MergeAppend) + strategy = PGPA_SCAN_PARTITIONWISE; + else + strategy = PGPA_SCAN_ORDINARY; + + /* + * If this is an elided Append or MergeAppend node, we don't need to + * emit NO_GATHER() because we'll also emit it for the underlying + * scan, which is good enough. + * + * If it's an elided SubqueryScan, the same argument is likely to + * apply, because the subquery probably contains references to tables, + * and if it doesn't, then planner isn't likely to want to do it in + * parallel anyway. But also, we can't implement NO_GATHER() advice + * for a non-relation RTEKind, because get_relation_info() isn't + * called in such cases. That should probably be fixed at some point, + * but until then we shouldn't emit it. + */ + needs_no_gather = false; + + /* Join RTIs can be present, but advice never refers to them. */ + relids = pgpa_filter_out_join_relids(relids, walker->pstmt->rtable); + } + else if ((rti = pgpa_scanrelid(plan)) != 0) + { + RangeTblEntry *rte; + + relids = bms_make_singleton(rti); + + switch (nodeTag(plan)) + { + case T_SeqScan: + strategy = PGPA_SCAN_SEQ; + break; + case T_BitmapHeapScan: + strategy = PGPA_SCAN_BITMAP_HEAP; + break; + case T_IndexScan: + strategy = PGPA_SCAN_INDEX; + break; + case T_IndexOnlyScan: + strategy = PGPA_SCAN_INDEX_ONLY; + break; + case T_TidScan: + case T_TidRangeScan: + strategy = PGPA_SCAN_TID; + break; + default: + + /* + * This case includes a ForeignScan targeting a single + * relation; no other strategy is possible in that case, but + * see below, where things are different in multi-relation + * cases. + */ + strategy = PGPA_SCAN_ORDINARY; + + /* + * We can't handle NO_GATHER() for non-relation RTEs because + * get_relation_info won't be called. + */ + rte = rt_fetch(rti, walker->pstmt->rtable); + if (rte->rtekind != RTE_RELATION) + needs_no_gather = false; + + break; + } + } + else if ((relids = pgpa_relids(plan)) != NULL) + { + switch (nodeTag(plan)) + { + case T_ForeignScan: + + /* + * If multiple relations are being targeted by a single + * foreign scan, then the foreign join has been pushed to the + * remote side, and we want that to be reflected in the + * generated advice. + */ + strategy = PGPA_SCAN_FOREIGN; + break; + case T_Append: + + /* + * Append nodes can represent partitionwise scans of a a + * relation, but when they implement a set operation, they are + * just ordinary scans. + */ + if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable) + == RTE_RELATION) + strategy = PGPA_SCAN_PARTITIONWISE; + else + strategy = PGPA_SCAN_ORDINARY; + + /* NO_GATHER() should be emitted for underlying rels only. */ + needs_no_gather = false; + + /* Be sure to account for pulled-up scans. */ + child_append_relid_sets = + ((Append *) plan)->child_append_relid_sets; + break; + case T_MergeAppend: + /* Some logic here as for Append, above. */ + if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable) + == RTE_RELATION) + strategy = PGPA_SCAN_PARTITIONWISE; + else + strategy = PGPA_SCAN_ORDINARY; + + /* NO_GATHER() should be emitted for underlying rels only. */ + needs_no_gather = false; + + /* Be sure to account for pulled-up scans. */ + child_append_relid_sets = + ((MergeAppend *) plan)->child_append_relid_sets; + break; + default: + strategy = PGPA_SCAN_ORDINARY; + + /* + * We can't handle NO_GATHER() for single non-relation RTEs + * because get_relation_info won't be called. + */ + if (bms_membership(relids) == BMS_SINGLETON) + { + RangeTblEntry *rte; + + rte = rt_fetch(bms_singleton_member(relids), + walker->pstmt->rtable); + if (rte->rtekind != RTE_RELATION) + needs_no_gather = false; + } + + break; + } + + + /* Join RTIs can be present, but advice never refers to them. */ + relids = pgpa_filter_out_join_relids(relids, walker->pstmt->rtable); + } + + /* + * If this is an Append or MergeAppend node into which subordinate Append + * or MergeAppend paths were merged, each of those merged paths is + * effectively another scan for which we need to account. + */ + foreach_node(Bitmapset, child_relids, child_append_relid_sets) + { + Bitmapset *child_nonjoin_relids; + + child_nonjoin_relids = + pgpa_filter_out_join_relids(child_relids, + walker->pstmt->rtable); + (void) pgpa_make_scan(walker, plan, strategy, + child_nonjoin_relids, false); + } + + /* + * If this plan node has no associated RTIs, it's not a scan. When the + * 'within_join_problem' flag is set, that's unexpected, so throw an + * error, else return quietly. + */ + if (relids == NULL) + { + if (within_join_problem) + elog(ERROR, "plan node has no RTIs: %d", (int) nodeTag(plan)); + return NULL; + } + + return pgpa_make_scan(walker, plan, strategy, relids, needs_no_gather); +} + +/* + * Create a single pgpa_scan object and update the pgpa_plan_walker_context. + */ +static pgpa_scan * +pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan, + pgpa_scan_strategy strategy, Bitmapset *relids, + bool needs_no_gather) +{ + pgpa_scan *scan; + + /* Create the scan object. */ + scan = palloc(sizeof(pgpa_scan)); + scan->plan = plan; + scan->strategy = strategy; + scan->relids = relids; + + /* Add it to the appropriate list. */ + walker->scans[scan->strategy] = lappend(walker->scans[scan->strategy], + scan); + + /* Caller tells us whether NO_GATHER() advice for this scan is needed. */ + if (needs_no_gather) + walker->no_gather_scans = bms_add_members(walker->no_gather_scans, + scan->relids); + + return scan; +} + +/* + * Determine the unique rtekind of a set of relids. + */ +static RTEKind +unique_nonjoin_rtekind(Bitmapset *relids, List *rtable) +{ + int rti = -1; + bool first = true; + RTEKind rtekind; + + Assert(relids != NULL); + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, rtable); + + if (rte->rtekind == RTE_JOIN) + continue; + + if (first) + { + rtekind = rte->rtekind; + first = false; + } + else if (rtekind != rte->rtekind) + elog(ERROR, "rtekind mismatch: %d vs. %d", + rtekind, rte->rtekind); + } + + if (first) + elog(ERROR, "no non-RTE_JOIN RTEs found"); + + return rtekind; +} diff --git a/contrib/pg_plan_advice/pgpa_scan.h b/contrib/pg_plan_advice/pgpa_scan.h new file mode 100644 index 0000000000..3bb8726ff1 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_scan.h @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * pgpa_scan.h + * analysis of scans in Plan trees + * + * For purposes of this module, a "scan" includes (1) single plan nodes that + * scan multiple RTIs, such as a degenerate Result node that replaces what + * would otherwise have been a join, and (2) Append and MergeAppend nodes + * implementing a partitionwise scan or a partitionwise join. Said + * differently, scans are the leaves of the join tree for a single join + * problem. + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_scan.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_SCAN_H +#define PGPA_SCAN_H + +#include "nodes/plannodes.h" + +typedef struct pgpa_plan_walker_context pgpa_plan_walker_context; + +/* + * Scan strategies. + * + * PGPA_SCAN_ORDINARY is any scan strategy that isn't interesting to us + * because there is no meaningful planner decision involved. For example, + * the only way to scan a subquery is a SubqueryScan, and the only way to + * scan a VALUES construct is a ValuesScan. We need not care exactly which + * type of planner node was used in such cases, because the same thing will + * happen when replanning. + * + * PGPA_SCAN_ORDINARY also includes Result nodes that correspond to scans + * or even joins that are proved empty. We don't know whether or not the scan + * or join will still be provably empty at replanning time, but if it is, + * then no scan-type advice is needed, and if it's not, we can't recommend + * a scan type based on the current plan. + * + * PGPA_SCAN_PARTITIONWISE also lumps together scans and joins: this can + * be either a partitionwise scan of a partitioned table or a partitionwise + * join between several partitioned tables. Note that all decisions about + * whether or not to use partitionwise join are meaningful: no matter what + * we decided this time, we could do more or fewer things partitionwise the + * next time. + * + * PGPA_SCAN_FOREIGN is only used when there's more than one relation involved; + * a single-table foreign scan is classified as ordinary, since there is no + * decision to make in that case. + * + * Other scan strategies map one-to-one to plan nodes. + */ +typedef enum +{ + PGPA_SCAN_ORDINARY = 0, + PGPA_SCAN_SEQ, + PGPA_SCAN_BITMAP_HEAP, + PGPA_SCAN_FOREIGN, + PGPA_SCAN_INDEX, + PGPA_SCAN_INDEX_ONLY, + PGPA_SCAN_PARTITIONWISE, + PGPA_SCAN_TID + /* update NUM_PGPA_SCAN_STRATEGY if you add anything here */ +} pgpa_scan_strategy; + +#define NUM_PGPA_SCAN_STRATEGY ((int) PGPA_SCAN_TID + 1) + +/* + * All of the details we need regarding a scan. + */ +typedef struct pgpa_scan +{ + Plan *plan; + pgpa_scan_strategy strategy; + Bitmapset *relids; +} pgpa_scan; + +extern pgpa_scan *pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan, + ElidedNode *elided_node, + bool beneath_any_gather, + bool within_join_problem); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_scanner.l b/contrib/pg_plan_advice/pgpa_scanner.l new file mode 100644 index 0000000000..c49b29d906 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_scanner.l @@ -0,0 +1,302 @@ +%top{ +/* + * Scanner for plan advice + * + * Copyright (c) 2000-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_scanner.l + */ +#include "postgres.h" + +#include "common/string.h" +#include "nodes/miscnodes.h" +#include "parser/scansup.h" + +#include "pgpa_ast.h" +#include "pgpa_parser.h" + +/* + * Extra data that we pass around when during scanning. + * + * 'litbuf' is used to implement the <xd> exclusive state, which handles + * double-quoted identifiers. + */ +typedef struct pgpa_yy_extra_type +{ + StringInfoData litbuf; +} pgpa_yy_extra_type; + +} + +%{ +/* LCOV_EXCL_START */ + +#define YY_DECL \ + extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, \ + char **parse_error_msg_p, yyscan_t yyscanner) + +/* No reason to constrain amount of data slurped */ +#define YY_READ_BUF_SIZE 16777216 + +/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ +#undef fprintf +#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg) + +static void +fprintf_to_ereport(const char *fmt, const char *msg) +{ + ereport(ERROR, (errmsg_internal("%s", msg))); +} +%} + +%option reentrant +%option bison-bridge +%option 8bit +%option never-interactive +%option nodefault +%option noinput +%option nounput +%option noyywrap +%option noyyalloc +%option noyyrealloc +%option noyyfree +%option warn +%option prefix="pgpa_yy" +%option extra-type="pgpa_yy_extra_type *" + +/* + * What follows is a severely stripped-down version of the core scanner. We + * only care about recognizing identifiers with or without identifier quoting + * (i.e. double-quoting), decimal integers, and a small handful of other + * things. Keep these rules in sync with src/backend/parser/scan.l. As in that + * file, we use an exclusive state called 'xc' for C-style comments, and an + * exclusive state called 'xd' for double-quoted identifiers. + */ +%x xc +%x xd + +ident_start [A-Za-z\200-\377_] +ident_cont [A-Za-z\200-\377_0-9\$] + +identifier {ident_start}{ident_cont}* + +decdigit [0-9] +decinteger {decdigit}(_?{decdigit})* + +space [ \t\n\r\f\v] +whitespace {space}+ + +dquote \" +xdstart {dquote} +xdstop {dquote} +xddouble {dquote}{dquote} +xdinside [^"]+ + +xcstart \/\* +xcstop \*+\/ +xcinside [^*/]+ + +%% + +{whitespace} { /* ignore */ } + +{identifier} { + char *str; + bool fail; + pgpa_advice_tag_type tag; + + /* + * Unlike the core scanner, we don't truncate identifiers + * here. There is no obvious reason to do so. + */ + str = downcase_identifier(yytext, yyleng, false, false); + yylval->str = str; + + /* + * If it's not a tag, just return TOK_IDENT; else, return + * a token type based on how further parsing should + * proceed. + */ + tag = pgpa_parse_advice_tag(str, &fail); + if (fail) + return TOK_IDENT; + else if (tag == PGPA_TAG_JOIN_ORDER) + return TOK_TAG_JOIN_ORDER; + else if (tag == PGPA_TAG_INDEX_SCAN || + tag == PGPA_TAG_INDEX_ONLY_SCAN) + return TOK_TAG_INDEX; + else if (tag == PGPA_TAG_BITMAP_HEAP_SCAN) + return TOK_TAG_BITMAP; + else if (tag == PGPA_TAG_SEQ_SCAN || + tag == PGPA_TAG_TID_SCAN || + tag == PGPA_TAG_NO_GATHER) + return TOK_TAG_SIMPLE; + else + return TOK_TAG_GENERIC; + } + +{decinteger} { + char *endptr; + + errno = 0; + yylval->integer = strtoint(yytext, &endptr, 10); + if (*endptr != '\0' || errno == ERANGE) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "integer out of range"); + return TOK_INTEGER; + } + +{xcstart} { + BEGIN(xc); + } + +{xdstart} { + BEGIN(xd); + resetStringInfo(&yyextra->litbuf); + } + +"||" { return TOK_OR; } + +"&&" { return TOK_AND; } + +. { return yytext[0]; } + +<xc>{xcstop} { + BEGIN(INITIAL); + } + +<xc>{xcinside} { + /* discard multiple characters without slash or asterisk */ + } + +<xc>. { + /* + * Discard any single character. flex prefers longer + * matches, so this rule will never be picked when we could + * have matched xcstop. + * + * NB: At present, we don't bother to support nested + * C-style comments here, but this logic could be extended + * if that restriction poses a problem. + */ + } + +<xc><<EOF>> { + BEGIN(INITIAL); + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "unterminated comment"); + } + +<xd>{xdstop} { + BEGIN(INITIAL); + if (yyextra->litbuf.len == 0) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "zero-length delimited identifier"); + yylval->str = pstrdup(yyextra->litbuf.data); + return TOK_IDENT; + } + +<xd>{xddouble} { + appendStringInfoChar(&yyextra->litbuf, '"'); + } + +<xd>{xdinside} { + appendBinaryStringInfo(&yyextra->litbuf, yytext, yyleng); + } + +<xd><<EOF>> { + BEGIN(INITIAL); + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "unterminated quoted identifier"); + } + +%% + +/* LCOV_EXCL_STOP */ + +/* + * Handler for errors while scanning or parsing advice. + * + * bison passes the error message to us via 'message', and the context is + * available via the 'yytext' macro. We assemble those values into a final + * error text and then arrange to pass it back to the caller of pgpa_yyparse() + * by storing it into *parse_error_msg_p. + */ +void +pgpa_yyerror(List **result, char **parse_error_msg_p, yyscan_t yyscanner, + const char *message) +{ + struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; /* needed for yytext + * macro */ + + + /* report only the first error in a parse operation */ + if (*parse_error_msg_p) + return; + + if (yytext[0]) + *parse_error_msg_p = psprintf("%s at or near \"%s\"", message, yytext); + else + *parse_error_msg_p = psprintf("%s at end of input", message); +} + +/* + * Initialize the advice scanner. + * + * This should be called before parsing begins. + */ +void +pgpa_scanner_init(const char *str, yyscan_t *yyscannerp) +{ + yyscan_t yyscanner; + pgpa_yy_extra_type *yyext = palloc0_object(pgpa_yy_extra_type); + + if (yylex_init(yyscannerp) != 0) + elog(ERROR, "yylex_init() failed: %m"); + + yyscanner = *yyscannerp; + + initStringInfo(&yyext->litbuf); + pgpa_yyset_extra(yyext, yyscanner); + + yy_scan_string(str, yyscanner); +} + + +/* + * Shut down the advice scanner. + * + * This should be called after parsing is complete. + */ +void +pgpa_scanner_finish(yyscan_t yyscanner) +{ + yylex_destroy(yyscanner); +} + +/* + * Interface functions to make flex use palloc() instead of malloc(). + * It'd be better to make these static, but flex insists otherwise. + */ + +void * +yyalloc(yy_size_t size, yyscan_t yyscanner) +{ + return palloc(size); +} + +void * +yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner) +{ + if (ptr) + return repalloc(ptr, size); + else + return palloc(size); +} + +void +yyfree(void *ptr, yyscan_t yyscanner) +{ + if (ptr) + pfree(ptr); +} diff --git a/contrib/pg_plan_advice/pgpa_trove.c b/contrib/pg_plan_advice/pgpa_trove.c new file mode 100644 index 0000000000..9db1077d48 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_trove.c @@ -0,0 +1,492 @@ +/*------------------------------------------------------------------------- + * + * pgpa_trove.c + * All of the advice given for a particular query, appropriately + * organized for convenient access. + * + * This name comes from the English expression "trove of advice", which + * means a collection of wisdom. This slightly unusual term is chosen to + * avoid naming confusion; for example, "collection of advice" would + * invite confusion with pgpa_collector.c. Note that, while we don't know + * whether the provided advice is actually wise, it's not our job to + * question the user's choices. + * + * The goal of this module is to make it easy to locate the specific + * bits of advice that pertain to any given part of a query, or to + * determine that there are none. + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_trove.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pgpa_trove.h" + +#include "common/hashfn_unstable.h" + +/* + * An advice trove is organized into a series of "slices", each of which + * contains information about one topic e.g. scan methods. Each slice consists + * of an array of trove entries plus a hash table that we can use to determine + * which ones are relevant to a particular part of the query. + */ +typedef struct pgpa_trove_slice +{ + unsigned nallocated; + unsigned nused; + pgpa_trove_entry *entries; + struct pgpa_trove_entry_hash *hash; +} pgpa_trove_slice; + +/* + * Scan advice is stored into 'scan'; join advice is stored into 'join'; and + * advice that can apply to both cases is stored into 'rel'. This lets callers + * ask just for what's relevant. These slices correspond to the possible values + * of pgpa_trove_lookup_type. + */ +struct pgpa_trove +{ + pgpa_trove_slice join; + pgpa_trove_slice rel; + pgpa_trove_slice scan; +}; + +/* + * We're going to build a hash table to allow clients of this module to find + * relevant advice for a given part of the query quickly. However, we're going + * to use only three of the five key fields as hash keys. There are two reasons + * for this. + * + * First, it's allowable to set partition_schema to NULL to match a partition + * with the correct name in any schema. + * + * Second, we expect the "occurrence" and "partition_schema" portions of the + * relation identifiers to be mostly uninteresting. Most of the time, the + * occurrence field will be 1 and the partition_schema values will all be the + * same. Even when there is some variation, the absolute number of entries + * that have the same values for all three of these key fields should be + * quite small. + */ +typedef struct +{ + const char *alias_name; + const char *partition_name; + const char *plan_name; +} pgpa_trove_entry_key; + +typedef struct +{ + pgpa_trove_entry_key key; + int status; + Bitmapset *indexes; +} pgpa_trove_entry_element; + +static uint32 pgpa_trove_entry_hash_key(pgpa_trove_entry_key key); + +static inline bool +pgpa_trove_entry_compare_key(pgpa_trove_entry_key a, pgpa_trove_entry_key b) +{ + if (strcmp(a.alias_name, b.alias_name) != 0) + return false; + + if (!strings_equal_or_both_null(a.partition_name, b.partition_name)) + return false; + + if (!strings_equal_or_both_null(a.plan_name, b.plan_name)) + return false; + + return true; +} + +#define SH_PREFIX pgpa_trove_entry +#define SH_ELEMENT_TYPE pgpa_trove_entry_element +#define SH_KEY_TYPE pgpa_trove_entry_key +#define SH_KEY key +#define SH_HASH_KEY(tb, key) pgpa_trove_entry_hash_key(key) +#define SH_EQUAL(tb, a, b) pgpa_trove_entry_compare_key(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +static void pgpa_init_trove_slice(pgpa_trove_slice *tslice); +static void pgpa_trove_add_to_slice(pgpa_trove_slice *tslice, + pgpa_advice_tag_type tag, + pgpa_advice_target *target); +static void pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, + pgpa_advice_target *target, + int index); +static Bitmapset *pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, + pgpa_identifier *rid); + +/* + * Build a trove of advice from a list of advice items. + * + * Caller can obtain a list of advice items to pass to this function by + * calling pgpa_parse(). + */ +pgpa_trove * +pgpa_build_trove(List *advice_items) +{ + pgpa_trove *trove = palloc_object(pgpa_trove); + + pgpa_init_trove_slice(&trove->join); + pgpa_init_trove_slice(&trove->rel); + pgpa_init_trove_slice(&trove->scan); + + foreach_ptr(pgpa_advice_item, item, advice_items) + { + switch (item->tag) + { + case PGPA_TAG_JOIN_ORDER: + { + pgpa_advice_target *target; + + /* + * For most advice types, each element in the top-level + * list is a separate target, but it's most convenient to + * regard the entirety of a JOIN_ORDER specification as a + * single target. Since it wasn't represented that way + * during parsing, build a surrogate object now. + */ + target = palloc0_object(pgpa_advice_target); + target->ttype = PGPA_TARGET_ORDERED_LIST; + target->children = item->targets; + + pgpa_trove_add_to_slice(&trove->join, + item->tag, target); + } + break; + + case PGPA_TAG_BITMAP_HEAP_SCAN: + case PGPA_TAG_INDEX_ONLY_SCAN: + case PGPA_TAG_INDEX_SCAN: + case PGPA_TAG_SEQ_SCAN: + case PGPA_TAG_TID_SCAN: + + /* + * Scan advice. + */ + foreach_ptr(pgpa_advice_target, target, item->targets) + { + /* + * For now, all of our scan types target single relations, + * but in the future this might not be true, e.g. a custom + * scan could replace a join. + */ + Assert(target->ttype == PGPA_TARGET_IDENTIFIER); + pgpa_trove_add_to_slice(&trove->scan, + item->tag, target); + } + break; + + case PGPA_TAG_FOREIGN_JOIN: + case PGPA_TAG_HASH_JOIN: + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + case PGPA_TAG_MERGE_JOIN_PLAIN: + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + case PGPA_TAG_NESTED_LOOP_PLAIN: + case PGPA_TAG_SEMIJOIN_NON_UNIQUE: + case PGPA_TAG_SEMIJOIN_UNIQUE: + + /* + * Join strategy advice. + */ + foreach_ptr(pgpa_advice_target, target, item->targets) + { + pgpa_trove_add_to_slice(&trove->join, + item->tag, target); + } + break; + + case PGPA_TAG_PARTITIONWISE: + case PGPA_TAG_GATHER: + case PGPA_TAG_GATHER_MERGE: + case PGPA_TAG_NO_GATHER: + + /* + * Advice about a RelOptInfo relevant to both scans and joins. + */ + foreach_ptr(pgpa_advice_target, target, item->targets) + { + pgpa_trove_add_to_slice(&trove->rel, + item->tag, target); + } + break; + } + } + + return trove; +} + +/* + * Search a trove of advice for relevant entries. + * + * All parameters are input parameters except for *result, which is an output + * parameter used to return results to the caller. + */ +void +pgpa_trove_lookup(pgpa_trove *trove, pgpa_trove_lookup_type type, + int nrids, pgpa_identifier *rids, pgpa_trove_result *result) +{ + pgpa_trove_slice *tslice; + Bitmapset *indexes; + + Assert(nrids > 0); + + if (type == PGPA_TROVE_LOOKUP_SCAN) + tslice = &trove->scan; + else if (type == PGPA_TROVE_LOOKUP_JOIN) + tslice = &trove->join; + else + tslice = &trove->rel; + + indexes = pgpa_trove_slice_lookup(tslice, &rids[0]); + for (int i = 1; i < nrids; ++i) + { + Bitmapset *other_indexes; + + /* + * If the caller is asking about two relations that aren't part of the + * same subquery, they've messed up. + */ + Assert(strings_equal_or_both_null(rids[0].plan_name, + rids[i].plan_name)); + + other_indexes = pgpa_trove_slice_lookup(tslice, &rids[i]); + indexes = bms_union(indexes, other_indexes); + } + + result->entries = tslice->entries; + result->indexes = indexes; +} + +/* + * Return all entries in a trove slice to the caller. + * + * The first two arguments are input arguments, and the remainder are output + * arguments. + */ +void +pgpa_trove_lookup_all(pgpa_trove *trove, pgpa_trove_lookup_type type, + pgpa_trove_entry **entries, int *nentries) +{ + pgpa_trove_slice *tslice; + + if (type == PGPA_TROVE_LOOKUP_SCAN) + tslice = &trove->scan; + else if (type == PGPA_TROVE_LOOKUP_JOIN) + tslice = &trove->join; + else + tslice = &trove->rel; + + *entries = tslice->entries; + *nentries = tslice->nused; +} + +/* + * Convert a trove entry to an item of plan advice that would produce it. + */ +char * +pgpa_cstring_trove_entry(pgpa_trove_entry *entry) +{ + StringInfoData buf; + + initStringInfo(&buf); + appendStringInfo(&buf, "%s", pgpa_cstring_advice_tag(entry->tag)); + + /* JOIN_ORDER tags are transformed by pgpa_build_trove; undo that here */ + if (entry->tag != PGPA_TAG_JOIN_ORDER) + appendStringInfoChar(&buf, '('); + else + Assert(entry->target->ttype == PGPA_TARGET_ORDERED_LIST); + + pgpa_format_advice_target(&buf, entry->target); + + if (entry->target->itarget != NULL) + { + appendStringInfoChar(&buf, ' '); + pgpa_format_index_target(&buf, entry->target->itarget); + } + + if (entry->tag != PGPA_TAG_JOIN_ORDER) + appendStringInfoChar(&buf, ')'); + + return buf.data; +} + +/* + * Set PGPA_TE_* flags on a set of trove entries. + */ +void +pgpa_trove_set_flags(pgpa_trove_entry *entries, Bitmapset *indexes, int flags) +{ + int i = -1; + + while ((i = bms_next_member(indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &entries[i]; + + entry->flags |= flags; + } +} + +/* + * Add a new advice target to an existing pgpa_trove_slice object. + */ +static void +pgpa_trove_add_to_slice(pgpa_trove_slice *tslice, + pgpa_advice_tag_type tag, + pgpa_advice_target *target) +{ + pgpa_trove_entry *entry; + + if (tslice->nused >= tslice->nallocated) + { + int new_allocated; + + new_allocated = tslice->nallocated * 2; + tslice->entries = repalloc_array(tslice->entries, pgpa_trove_entry, + new_allocated); + tslice->nallocated = new_allocated; + } + + entry = &tslice->entries[tslice->nused]; + entry->tag = tag; + entry->target = target; + entry->flags = 0; + + pgpa_trove_add_to_hash(tslice->hash, target, tslice->nused); + + tslice->nused++; +} + +/* + * Update the hash table for a newly-added advice target. + */ +static void +pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, pgpa_advice_target *target, + int index) +{ + pgpa_trove_entry_key key; + pgpa_trove_entry_element *element; + bool found; + + /* For non-identifiers, add entries for all descendents. */ + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + pgpa_trove_add_to_hash(hash, child_target, index); + } + return; + } + + /* Sanity checks. */ + Assert(target->rid.occurrence > 0); + Assert(target->rid.alias_name != NULL); + + /* Add an entry for this relation identifier. */ + key.alias_name = target->rid.alias_name; + key.partition_name = target->rid.partrel; + key.plan_name = target->rid.plan_name; + element = pgpa_trove_entry_insert(hash, key, &found); + if (!found) + element->indexes = NULL; + element->indexes = bms_add_member(element->indexes, index); +} + +/* + * Create and initialize a new pgpa_trove_slice object. + */ +static void +pgpa_init_trove_slice(pgpa_trove_slice *tslice) +{ + /* + * In an ideal world, we'll make tslice->nallocated big enough that the + * array and hash table will be large enough to contain the number of + * advice items in this trove slice, but a generous default value is not + * good for performance, because pgpa_init_trove_slice() has to zero an + * amount of memory proportional to tslice->nallocated. Hence, we keep the + * starting value quite small, on the theory that advice strings will + * often be relatively short. + */ + tslice->nallocated = 16; + tslice->nused = 0; + tslice->entries = palloc_array(pgpa_trove_entry, tslice->nallocated); + tslice->hash = pgpa_trove_entry_create(CurrentMemoryContext, + tslice->nallocated, NULL); +} + +/* + * Fast hash function for a key consisting of alias_name, partition_name, + * and plan_name. + */ +static uint32 +pgpa_trove_entry_hash_key(pgpa_trove_entry_key key) +{ + fasthash_state hs; + int sp_len; + + fasthash_init(&hs, 0); + + /* alias_name may not be NULL */ + sp_len = fasthash_accum_cstring(&hs, key.alias_name); + + /* partition_name and plan_name, however, can be NULL */ + if (key.partition_name != NULL) + sp_len += fasthash_accum_cstring(&hs, key.partition_name); + if (key.plan_name != NULL) + sp_len += fasthash_accum_cstring(&hs, key.plan_name); + + /* + * hashfn_unstable.h recommends using string length as tweak. It's not + * clear to me what to do if there are multiple strings, so for now I'm + * just using the total of all of the lengths. + */ + return fasthash_final32(&hs, sp_len); +} + +/* + * Look for matching entries. + */ +static Bitmapset * +pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, pgpa_identifier *rid) +{ + pgpa_trove_entry_key key; + pgpa_trove_entry_element *element; + Bitmapset *result = NULL; + + Assert(rid->occurrence >= 1); + + key.alias_name = rid->alias_name; + key.partition_name = rid->partrel; + key.plan_name = rid->plan_name; + + element = pgpa_trove_entry_lookup(tslice->hash, key); + + if (element != NULL) + { + int i = -1; + + while ((i = bms_next_member(element->indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &tslice->entries[i]; + + /* + * We know that this target or one of its descendents matches the + * identifier on the three key fields above, but we don't know + * which descendent or whether the occurence and schema also + * match. + */ + if (pgpa_identifier_matches_target(rid, entry->target)) + result = bms_add_member(result, i); + } + } + + return result; +} diff --git a/contrib/pg_plan_advice/pgpa_trove.h b/contrib/pg_plan_advice/pgpa_trove.h new file mode 100644 index 0000000000..479c3f7577 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_trove.h @@ -0,0 +1,113 @@ +/*------------------------------------------------------------------------- + * + * pgpa_trove.h + * All of the advice given for a particular query, appropriately + * organized for convenient access. + * + * Copyright (c) 2016-2024, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_trove.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_TROVE_H +#define PGPA_TROVE_H + +#include "pgpa_ast.h" + +#include "nodes/bitmapset.h" + +typedef struct pgpa_trove pgpa_trove; + +/* + * Flags that can be set on a pgpa_trove_entry to indicate what happened when + * trying to plan using advice. + * + * PGPA_TE_MATCH_PARTIAL means that we found some part of the query that at + * least partially matched the target; e.g. given JOIN_ORDER(a b), this would + * be set if we ever saw any joinrel including either "a" or "b". + * + * PGPA_TE_MATCH_FULL means that we found an exact match for the target; e.g. + * given JOIN_ORDER(a b), this would be set if we saw a joinrel containing + * exactly "a" and "b" and nothing else. + * + * PGPA_TE_INAPPLICABLE means that the advice doesn't properly apply to the + * target; e.g. INDEX_SCAN(foo bar_idx) would be so marked if bar_idx does not + * exist on foo. The fact that this bit has been set does not mean that the + * advice had no effect. + * + * PGPA_TE_CONFLICTING means that a conflict was detected between what this + * advice wants and what some other plan advice wants; e.g. JOIN_ORDER(a b) + * would conflict with HASH_JOIN(a), because the former requires "a" to be the + * outer table while the latter requires it to be the inner table. + * + * PGPA_TE_FAILED means that the resulting plan did not conform to the advice. + */ +#define PGPA_TE_MATCH_PARTIAL 0x0001 +#define PGPA_TE_MATCH_FULL 0x0002 +#define PGPA_TE_INAPPLICABLE 0x0004 +#define PGPA_TE_CONFLICTING 0x0008 +#define PGPA_TE_FAILED 0x0010 + +/* + * Each entry in a trove of advice represents the application of a tag to + * a single target. + */ +typedef struct pgpa_trove_entry +{ + pgpa_advice_tag_type tag; + pgpa_advice_target *target; + int flags; +} pgpa_trove_entry; + +/* + * What kind of information does the caller want to find in a trove? + * + * PGPA_TROVE_LOOKUP_SCAN means we're looking for scan advice. + * + * PGPA_TROVE_LOOKUP_JOIN means we're looking for join-related advice. + * This includes join order advice, join method advice, and semijoin-uniqueness + * advice. + * + * PGPA_TROVE_LOOKUP_REL means we're looking for general advice about this + * a RelOptInfo that may correspond to either a scan or a join. This includes + * gather-related advice and partitionwise advice. Note that partitionwise + * advice might seem like join advice, but that's not a helpful way of viewing + * the matter because (1) partitionwise advice is also relevant at the scan + * level and (2) other types of join advice affect only what to do from + * join_path_setup_hook, but partitionwise advice affects what to do in + * joinrel_setup_hook. + */ +typedef enum pgpa_trove_lookup_type +{ + PGPA_TROVE_LOOKUP_JOIN, + PGPA_TROVE_LOOKUP_REL, + PGPA_TROVE_LOOKUP_SCAN +} pgpa_trove_lookup_type; + +/* + * This struct is used to store the result of a trove lookup. For each member + * of "indexes", the entry at the corresponding offset within "entries" is one + * of the results. + */ +typedef struct pgpa_trove_result +{ + pgpa_trove_entry *entries; + Bitmapset *indexes; +} pgpa_trove_result; + +extern pgpa_trove *pgpa_build_trove(List *advice_items); +extern void pgpa_trove_lookup(pgpa_trove *trove, + pgpa_trove_lookup_type type, + int nrids, + pgpa_identifier *rids, + pgpa_trove_result *result); +extern void pgpa_trove_lookup_all(pgpa_trove *trove, + pgpa_trove_lookup_type type, + pgpa_trove_entry **entries, + int *nentries); +extern char *pgpa_cstring_trove_entry(pgpa_trove_entry *entry); +extern void pgpa_trove_set_flags(pgpa_trove_entry *entries, + Bitmapset *indexes, int flags); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c new file mode 100644 index 0000000000..4dffc60114 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_walker.c @@ -0,0 +1,965 @@ +/*------------------------------------------------------------------------- + * + * pgpa_walker.c + * Plan tree iteration + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_walker.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pgpa_join.h" +#include "pgpa_scan.h" +#include "pgpa_walker.h" + +#include "nodes/plannodes.h" +#include "parser/parsetree.h" + +static void pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan, + bool within_join_problem, + pgpa_join_unroller *join_unroller, + List *active_query_features, + bool beneath_any_gather); +static Bitmapset *pgpa_process_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_unrolled_join *ujoin); + +static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Plan *plan); + +static void pgpa_qf_add_rti(List *active_query_features, Index rti); +static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids); +static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan, + List *rtable); + +static bool pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target, + bool toplevel); +static bool pgpa_walker_join_order_matches_member(pgpa_join_member *member, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target); +static bool pgpa_walker_contains_scan(pgpa_plan_walker_context *walker, + pgpa_scan_strategy strategy, + Bitmapset *relids); +static bool pgpa_walker_contains_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Bitmapset *relids); +static bool pgpa_walker_contains_join(pgpa_plan_walker_context *walker, + pgpa_join_strategy strategy, + Bitmapset *relids); +static bool pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker, + Bitmapset *relids); +static Index pgpa_walker_get_rti(Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_identifier *rid); + +/* + * Top-level entrypoint for the plan tree walk. + * + * Populates walker based on a traversal of the Plan trees in pstmt. + * + * sj_unique_rels is a list of pgpa_sj_unique_rel objects, one for each + * relation we considered making unique as part of semijoin planning. + */ +void +pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt, + List *sj_unique_rels) +{ + ListCell *lc; + List *sj_unique_rtis = NULL; + List *sj_nonunique_qfs = NULL; + + /* Initialization. */ + memset(walker, 0, sizeof(pgpa_plan_walker_context)); + walker->pstmt = pstmt; + + /* Walk the main plan tree. */ + pgpa_walk_recursively(walker, pstmt->planTree, 0, NULL, NIL, false); + + /* Main plan tree walk won't reach subplans, so walk those. */ + foreach(lc, pstmt->subplans) + { + Plan *plan = lfirst(lc); + + if (plan != NULL) + pgpa_walk_recursively(walker, plan, 0, NULL, NIL, false); + } + + /* Adjust RTIs from sj_unique_rels for the flattened range table. */ + foreach_ptr(pgpa_sj_unique_rel, ur, sj_unique_rels) + { + int rtindex = -1; + int rtoffset = 0; + bool dummy = false; + Bitmapset *relids = NULL; + + /* If this is a subplan, find the range table offset. */ + if (ur->plan_name != NULL) + { + foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos) + { + if (strcmp(ur->plan_name, rtinfo->plan_name) == 0) + { + rtoffset = rtinfo->rtoffset; + dummy = rtinfo->dummy; + break; + } + } + + if (rtoffset == 0) + elog(ERROR, "no rtoffset for plan %s", ur->plan_name); + } + + /* If this entry pertains to a dummy subquery, ignore it. */ + if (dummy) + continue; + + /* Offset each entry from the original set. */ + while ((rtindex = bms_next_member(ur->relids, rtindex)) >= 0) + relids = bms_add_member(relids, rtindex + rtoffset); + + /* Store the resulting set. */ + sj_unique_rtis = lappend(sj_unique_rtis, relids); + } + + /* + * Remove any non-unique semjoin query features for which making the rel + * unique wasn't considered. + */ + foreach_ptr(pgpa_query_feature, qf, + walker->query_features[PGPAQF_SEMIJOIN_NON_UNIQUE]) + { + if (list_member(sj_unique_rtis, qf->relids)) + sj_nonunique_qfs = lappend(sj_nonunique_qfs, qf); + } + walker->query_features[PGPAQF_SEMIJOIN_NON_UNIQUE] = sj_nonunique_qfs; + + /* + * If we find any cases where analysis of the Plan tree shows that the + * semijoin was made unique but this possibility was never observed to be + * considered during planning, then we have a bug somewhere. + */ + foreach_ptr(pgpa_query_feature, qf, + walker->query_features[PGPAQF_SEMIJOIN_UNIQUE]) + { + if (!list_member(sj_unique_rtis, qf->relids)) + { + StringInfoData buf; + + initStringInfo(&buf); + outBitmapset(&buf, qf->relids); + elog(ERROR, + "unique semijoin found for relids %s but not observed during planning", + buf.data); + } + } +} + +/* + * Main workhorse for the plan tree walk. + * + * If within_join_problem is true, we encountered a join at some higher level + * of the tree walk and haven't yet descended out of the portion of the plan + * tree that is part of that same join problem. We're no longer in the same + * join problem if (1) we cross into a different subquery or (2) we descend + * through an Append or MergeAppend node, below which any further joins would + * be partitionwise joins planned separately from the outer join problem. + * + * If join_unroller != NULL, the join unroller code expects us to find a join + * that should be unrolled into that object. This implies that we're within a + * join problem, but the reverse is not true: when we've traversed all the + * joins but are still looking for the scan that is the leaf of the join tree, + * join_unroller will be NULL but within_join_problem will be true. + * + * Each element of active_query_features corresponds to some item of advice + * that needs to enumerate all the relations it affects. We add RTIs we find + * during tree traversal to each of these query features. + * + * If beneath_any_gather == true, some higher level of the tree traversal found + * a Gather or Gather Merge node. + */ +static void +pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan, + bool within_join_problem, + pgpa_join_unroller *join_unroller, + List *active_query_features, + bool beneath_any_gather) +{ + pgpa_join_unroller *outer_join_unroller = NULL; + pgpa_join_unroller *inner_join_unroller = NULL; + bool join_unroller_toplevel = false; + List *pushdown_query_features = NIL; + ListCell *lc; + List *extraplans = NIL; + List *elided_nodes = NIL; + + Assert(within_join_problem || join_unroller == NULL); + + /* + * If this is a Gather or Gather Merge node, directly add it to the list + * of currently-active query features. + * + * Otherwise, check the future_query_features list to see whether this was + * previously identified as a plan node that needs to be treated as a + * query feature. + * + * Note that the caller also has a copy to active_query_features, so we + * can't destructively modify it without making a copy. + */ + if (IsA(plan, Gather)) + { + active_query_features = + lappend(list_copy(active_query_features), + pgpa_add_feature(walker, PGPAQF_GATHER, plan)); + beneath_any_gather = true; + } + else if (IsA(plan, GatherMerge)) + { + active_query_features = + lappend(list_copy(active_query_features), + pgpa_add_feature(walker, PGPAQF_GATHER_MERGE, plan)); + beneath_any_gather = true; + } + else + { + foreach_ptr(pgpa_query_feature, qf, walker->future_query_features) + { + if (qf->plan == plan) + { + active_query_features = list_copy(active_query_features); + active_query_features = lappend(active_query_features, qf); + walker->future_query_features = + list_delete_ptr(walker->future_query_features, plan); + break; + } + } + } + + /* + * Find all elided nodes for this Plan node. + */ + foreach_node(ElidedNode, n, walker->pstmt->elidedNodes) + { + if (n->plan_node_id == plan->plan_node_id) + elided_nodes = lappend(elided_nodes, n); + } + + /* If we found any elided_nodes, handle them. */ + if (elided_nodes != NIL) + { + int num_elided_nodes = list_length(elided_nodes); + ElidedNode *last_elided_node; + + /* + * RTIs for the final -- and thus logically uppermost -- elided node + * should be collected for query features passed down by the caller. + * However, elided nodes act as barriers to query features, which + * means that (1) the remaining elided nodes, if any, should be + * ignored for purposes of query features and (2) the list of active + * query features should be reset to empty so that we do not add RTIs + * from the plan node that is logically beneath the elided node to the + * query features passed down from the caller. + */ + last_elided_node = list_nth(elided_nodes, num_elided_nodes - 1); + pgpa_qf_add_rtis(active_query_features, + pgpa_filter_out_join_relids(last_elided_node->relids, + walker->pstmt->rtable)); + active_query_features = NIL; + + /* + * If we're within a join problem, the join_unroller is responsible + * for building the scan for the final elided node, so throw it out. + */ + if (within_join_problem) + elided_nodes = list_truncate(elided_nodes, num_elided_nodes - 1); + + /* Build scans for all (or the remaining) elided nodes. */ + foreach_node(ElidedNode, elided_node, elided_nodes) + { + (void) pgpa_build_scan(walker, plan, elided_node, + beneath_any_gather, within_join_problem); + } + + /* + * If there were any elided nodes, then everything beneath those nodes + * is not part of the same join problem. + * + * In more detail, if an Append or MergeAppend was elided, then a + * partitionwise join was chosen and only a single child survived; if + * a SubqueryScan was elided, the subquery was planned without + * flattening it into the parent. + */ + within_join_problem = false; + join_unroller = NULL; + } + + /* + * If we're within a join problem, the join unroller is responsible for + * building any required scan for this node. If not, we do it here. + */ + if (!within_join_problem) + (void) pgpa_build_scan(walker, plan, NULL, beneath_any_gather, false); + + /* + * If this join needs to unrolled but there's no join unroller already + * available, create one. + */ + if (join_unroller == NULL && pgpa_is_join(plan)) + { + join_unroller = pgpa_create_join_unroller(); + join_unroller_toplevel = true; + within_join_problem = true; + } + + /* + * If this join is to be unrolled, pgpa_unroll_join() will return the join + * unroller object that should be passed down when we recurse into the + * outer and inner sides of the plan. + */ + if (join_unroller != NULL) + pgpa_unroll_join(walker, plan, beneath_any_gather, join_unroller, + &outer_join_unroller, &inner_join_unroller); + + /* Add RTIs from the plan node to all active query features. */ + pgpa_qf_add_plan_rtis(active_query_features, plan, walker->pstmt->rtable); + + /* + * Recurse into the outer and inner subtrees. + * + * As an exception, if this is a ForeignScan, don't recurse. postgres_fdw + * sometimes stores an EPQ recheck plan in plan->leftree, but that's going + * to mention the same set of relations as the ForeignScan itself, and we + * have no way to emit advice targeting the EPQ case vs. the non-EPQ case. + * Moreover, it's not entirely clear what other FDWs might do with the + * left and right subtrees. Maybe some better handling is needed here, but + * for now, we just punt. + */ + if (!IsA(plan, ForeignScan)) + { + if (plan->lefttree != NULL) + pgpa_walk_recursively(walker, plan->lefttree, within_join_problem, + outer_join_unroller, active_query_features, + beneath_any_gather); + if (plan->righttree != NULL) + pgpa_walk_recursively(walker, plan->righttree, within_join_problem, + inner_join_unroller, active_query_features, + beneath_any_gather); + } + + /* + * If we created a join unroller up above, then it's also our join to use + * it to build the final pgpa_unrolled_join, and to destroy the object. + */ + if (join_unroller_toplevel) + { + pgpa_unrolled_join *ujoin; + + ujoin = pgpa_build_unrolled_join(walker, join_unroller); + walker->toplevel_unrolled_joins = + lappend(walker->toplevel_unrolled_joins, ujoin); + pgpa_destroy_join_unroller(join_unroller); + (void) pgpa_process_unrolled_join(walker, ujoin); + } + + /* + * Some plan types can have additional children. Nodes like Append that + * can have any number of children store them in a List; a SubqueryScan + * just has a field for a single additional Plan. + */ + switch (nodeTag(plan)) + { + case T_Append: + { + Append *aplan = (Append *) plan; + + extraplans = aplan->appendplans; + if (bms_is_empty(aplan->apprelids)) + pushdown_query_features = active_query_features; + } + break; + case T_MergeAppend: + { + MergeAppend *maplan = (MergeAppend *) plan; + + extraplans = maplan->mergeplans; + if (bms_is_empty(maplan->apprelids)) + pushdown_query_features = active_query_features; + } + break; + case T_BitmapAnd: + extraplans = ((BitmapAnd *) plan)->bitmapplans; + break; + case T_BitmapOr: + extraplans = ((BitmapOr *) plan)->bitmapplans; + break; + case T_SubqueryScan: + + /* + * We don't pass down active_query_features across here, because + * those are specific to a subquery level. + */ + pgpa_walk_recursively(walker, ((SubqueryScan *) plan)->subplan, + 0, NULL, NIL, beneath_any_gather); + break; + case T_CustomScan: + extraplans = ((CustomScan *) plan)->custom_plans; + break; + default: + break; + } + + /* If we found a list of extra children, iterate over it. */ + foreach(lc, extraplans) + { + Plan *subplan = lfirst(lc); + + pgpa_walk_recursively(walker, subplan, 0, NULL, pushdown_query_features, + beneath_any_gather); + } +} + +/* + * Perform final processing of a newly-constructed pgpa_unrolled_join. This + * only needs to be called for toplevel pgpa_unrolled_join objects, since it + * recurses to sub-joins as needed. + * + * Our goal is to add the set of inner relids to the relevant join_strategies + * list, and to do the same for any sub-joins. To that end, the return value + * is the set of relids found beneath the inner side of the join, but it is + * expected that the toplevel caller will ignore this. + */ +static Bitmapset * +pgpa_process_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_unrolled_join *ujoin) +{ + Bitmapset *all_relids = NULL; + + for (int k = 0; k < ujoin->ninner; ++k) + { + pgpa_join_member *member = &ujoin->inner[k]; + Bitmapset *relids; + + if (member->unrolled_join != NULL) + relids = pgpa_process_unrolled_join(walker, + member->unrolled_join); + else + { + Assert(member->scan != NULL); + relids = member->scan->relids; + } + walker->join_strategies[ujoin->strategy[k]] = + lappend(walker->join_strategies[ujoin->strategy[k]], relids); + all_relids = bms_add_members(all_relids, relids); + } + + return all_relids; +} + +/* + * Arrange for the given plan node to be treated as a query feature when the + * tree walk reaches it. + * + * Make sure to only use this for nodes that the tree walk can't have reached + * yet! + */ +void +pgpa_add_future_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, Plan *plan) +{ + pgpa_query_feature *qf = pgpa_add_feature(walker, type, plan); + + walker->future_query_features = + lappend(walker->future_query_features, qf); +} + +/* + * Return the last of any elided nodes associated with this plan node ID. + * + * The last elided node is the one that would have been uppermost in the plan + * tree had it not been removed during setrefs processig. + */ +ElidedNode * +pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan) +{ + ElidedNode *elided_node = NULL; + + foreach_node(ElidedNode, n, pstmt->elidedNodes) + { + if (n->plan_node_id == plan->plan_node_id) + elided_node = n; + } + + return elided_node; +} + +/* + * Certain plan nodes can refer to a set of RTIs. Extract and return the set. + */ +Bitmapset * +pgpa_relids(Plan *plan) +{ + if (IsA(plan, Result)) + return ((Result *) plan)->relids; + else if (IsA(plan, ForeignScan)) + return ((ForeignScan *) plan)->fs_relids; + else if (IsA(plan, Append)) + return ((Append *) plan)->apprelids; + else if (IsA(plan, MergeAppend)) + return ((MergeAppend *) plan)->apprelids; + + return NULL; +} + +/* + * Extract the scanned RTI from a plan node. + * + * Returns 0 if there isn't one. + */ +Index +pgpa_scanrelid(Plan *plan) +{ + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_TableFuncScan: + case T_ValuesScan: + case T_CteScan: + case T_NamedTuplestoreScan: + case T_WorkTableScan: + case T_ForeignScan: + case T_CustomScan: + case T_IndexScan: + case T_IndexOnlyScan: + return ((Scan *) plan)->scanrelid; + default: + return 0; + } +} + +/* + * Construct a new Bitmapset containing non-RTE_JOIN members of 'relids'. + */ +Bitmapset * +pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable) +{ + int rti = -1; + Bitmapset *result = NULL; + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, rtable); + + if (rte->rtekind != RTE_JOIN) + result = bms_add_member(result, rti); + } + + return result; +} + +/* + * Create a pgpa_query_feature and add it to the list of all query features + * for this plan. + */ +static pgpa_query_feature * +pgpa_add_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, Plan *plan) +{ + pgpa_query_feature *qf = palloc0_object(pgpa_query_feature); + + qf->type = type; + qf->plan = plan; + + walker->query_features[qf->type] = + lappend(walker->query_features[qf->type], qf); + + return qf; +} + +/* + * Add a single RTI to each active query feature. + */ +static void +pgpa_qf_add_rti(List *active_query_features, Index rti) +{ + foreach_ptr(pgpa_query_feature, qf, active_query_features) + { + qf->relids = bms_add_member(qf->relids, rti); + } +} + +/* + * Add a set of RTIs to each active query feature. + */ +static void +pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids) +{ + foreach_ptr(pgpa_query_feature, qf, active_query_features) + { + qf->relids = bms_add_members(qf->relids, relids); + } +} + +/* + * Add RTIs directly contained in a plan node to each active query feature, + * but filter out any join RTIs, since advice doesn't mention those. + */ +static void +pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan, List *rtable) +{ + Bitmapset *relids; + Index rti; + + if ((relids = pgpa_relids(plan)) != NULL) + { + relids = pgpa_filter_out_join_relids(relids, rtable); + pgpa_qf_add_rtis(active_query_features, relids); + } + else if ((rti = pgpa_scanrelid(plan)) != 0) + pgpa_qf_add_rti(active_query_features, rti); +} + +/* + * If we generated plan advice using the provided walker object and array + * of identifiers, would we generate the specified tag/target combination? + * + * If yes, the plan conforms to the advice; if no, it does not. Note that + * we have know way of knowing whether the planner was forced to emit a plan + * that conformed to the advice or just happened to do so. + */ +bool +pgpa_walker_would_advise(pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers, + pgpa_advice_tag_type tag, + pgpa_advice_target *target) +{ + Index rtable_length = list_length(walker->pstmt->rtable); + Bitmapset *relids = NULL; + + if (tag == PGPA_TAG_JOIN_ORDER) + { + foreach_ptr(pgpa_unrolled_join, ujoin, walker->toplevel_unrolled_joins) + { + if (pgpa_walker_join_order_matches(ujoin, rtable_length, + rt_identifiers, target, true)) + return true; + } + + return false; + } + + if (target->ttype == PGPA_TARGET_IDENTIFIER) + { + Index rti; + + rti = pgpa_walker_get_rti(rtable_length, rt_identifiers, &target->rid); + relids = bms_make_singleton(rti); + } + else + { + Assert(target->ttype == PGPA_TARGET_ORDERED_LIST); + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + Index rti; + + Assert(child_target->ttype == PGPA_TARGET_IDENTIFIER); + rti = pgpa_compute_rti_from_identifier(rtable_length, + rt_identifiers, + &child_target->rid); + if (rti == 0) + elog(ERROR, "cannot determine RTI for advice target"); + relids = bms_add_member(relids, rti); + } + } + + switch (tag) + { + case PGPA_TAG_JOIN_ORDER: + /* should have been handled above */ + pg_unreachable(); + break; + case PGPA_TAG_BITMAP_HEAP_SCAN: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_BITMAP_HEAP, + relids); + case PGPA_TAG_FOREIGN_JOIN: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_FOREIGN, + relids); + case PGPA_TAG_INDEX_ONLY_SCAN: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_INDEX_ONLY, + relids); + case PGPA_TAG_INDEX_SCAN: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_INDEX, + relids); + case PGPA_TAG_PARTITIONWISE: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_PARTITIONWISE, + relids); + case PGPA_TAG_SEQ_SCAN: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_SEQ, + relids); + case PGPA_TAG_TID_SCAN: + return pgpa_walker_contains_scan(walker, + PGPA_SCAN_TID, + relids); + case PGPA_TAG_GATHER: + return pgpa_walker_contains_feature(walker, + PGPAQF_GATHER, + relids); + case PGPA_TAG_GATHER_MERGE: + return pgpa_walker_contains_feature(walker, + PGPAQF_GATHER_MERGE, + relids); + case PGPA_TAG_SEMIJOIN_NON_UNIQUE: + return pgpa_walker_contains_feature(walker, + PGPAQF_SEMIJOIN_NON_UNIQUE, + relids); + case PGPA_TAG_SEMIJOIN_UNIQUE: + return pgpa_walker_contains_feature(walker, + PGPAQF_SEMIJOIN_UNIQUE, + relids); + case PGPA_TAG_HASH_JOIN: + return pgpa_walker_contains_join(walker, + JSTRAT_HASH_JOIN, + relids); + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + return pgpa_walker_contains_join(walker, + JSTRAT_MERGE_JOIN_MATERIALIZE, + relids); + case PGPA_TAG_MERGE_JOIN_PLAIN: + return pgpa_walker_contains_join(walker, + JSTRAT_MERGE_JOIN_PLAIN, + relids); + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + return pgpa_walker_contains_join(walker, + JSTRAT_NESTED_LOOP_MATERIALIZE, + relids); + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + return pgpa_walker_contains_join(walker, + JSTRAT_NESTED_LOOP_MEMOIZE, + relids); + case PGPA_TAG_NESTED_LOOP_PLAIN: + return pgpa_walker_contains_join(walker, + JSTRAT_NESTED_LOOP_PLAIN, + relids); + case PGPA_TAG_NO_GATHER: + return pgpa_walker_contains_no_gather(walker, relids); + } + + /* should not get here */ + return false; +} + +/* + * Does an unrolled join match the join order specified by an advice target? + */ +static bool +pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target, + bool toplevel) +{ + int nchildren = list_length(target->children); + + Assert(target->ttype == PGPA_TARGET_ORDERED_LIST); + + /* At toplevel, we allow a prefix match. */ + if (toplevel) + { + if (nchildren > ujoin->ninner + 1) + return false; + } + else + { + if (nchildren != ujoin->ninner + 1) + return false; + } + + /* Outermost rel must match. */ + if (!pgpa_walker_join_order_matches_member(&ujoin->outer, + rtable_length, + rt_identifiers, + linitial(target->children))) + return false; + + /* Each inner rel must match. */ + for (int n = 0; n < nchildren - 1; ++n) + { + pgpa_advice_target *child_target = list_nth(target->children, n + 1); + + if (!pgpa_walker_join_order_matches_member(&ujoin->inner[n], + rtable_length, + rt_identifiers, + child_target)) + return false; + } + + return true; +} + +/* + * Does one member of an unrolled join match an advice target? + */ +static bool +pgpa_walker_join_order_matches_member(pgpa_join_member *member, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target) +{ + Bitmapset *relids = NULL; + + if (member->unrolled_join != NULL) + { + if (target->ttype != PGPA_TARGET_ORDERED_LIST) + return false; + return pgpa_walker_join_order_matches(member->unrolled_join, + rtable_length, + rt_identifiers, + target, + false); + } + + Assert(member->scan != NULL); + switch (target->ttype) + { + case PGPA_TARGET_ORDERED_LIST: + /* Could only match an unrolled join */ + return false; + + case PGPA_TARGET_UNORDERED_LIST: + { + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + Index rti; + + rti = pgpa_walker_get_rti(rtable_length, rt_identifiers, + &child_target->rid); + relids = bms_add_member(relids, rti); + } + break; + } + + case PGPA_TARGET_IDENTIFIER: + { + Index rti; + + rti = pgpa_walker_get_rti(rtable_length, rt_identifiers, + &target->rid); + relids = bms_make_singleton(rti); + break; + } + } + + return bms_equal(member->scan->relids, relids); +} + +/* + * Does this walker say that the given scan strategy should be used for the + * given relid set? + */ +static bool +pgpa_walker_contains_scan(pgpa_plan_walker_context *walker, + pgpa_scan_strategy strategy, + Bitmapset *relids) +{ + List *scans = walker->scans[strategy]; + + foreach_ptr(pgpa_scan, scan, scans) + { + /* + * XXX. If this is index-related advice, we should also validate that + * the advice target's index target matches the Plan tree. + */ + if (bms_equal(scan->relids, relids)) + return true; + } + + return false; +} + +/* + * Does this walker say that the given query feature applies to the given + * relid set? + */ +static bool +pgpa_walker_contains_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Bitmapset *relids) +{ + List *query_features = walker->query_features[type]; + + foreach_ptr(pgpa_query_feature, qf, query_features) + { + if (bms_equal(qf->relids, relids)) + return true; + } + + return false; +} + +/* + * Does the walker say that the given join strategy should be used for the + * given relid set? + */ +static bool +pgpa_walker_contains_join(pgpa_plan_walker_context *walker, + pgpa_join_strategy strategy, + Bitmapset *relids) +{ + List *join_strategies = walker->join_strategies[strategy]; + + foreach_ptr(Bitmapset, jsrelids, join_strategies) + { + if (bms_equal(jsrelids, relids)) + return true; + } + + return false; +} + +/* + * Does the walker say that the given relids should be marked as NO_GATHER? + */ +static bool +pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker, + Bitmapset *relids) +{ + return bms_is_subset(relids, walker->no_gather_scans); +} + +/* + * Convenience function to convert a relation identifier to an RTI. + * + * We throw an error here because we expect this to be used on system-generated + * advice. Hence, failure here indicates an advice generation bug. + */ +static Index +pgpa_walker_get_rti(Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_identifier *rid) +{ + Index rti; + + rti = pgpa_compute_rti_from_identifier(rtable_length, + rt_identifiers, + rid); + if (rti == 0) + elog(ERROR, "cannot determine RTI for advice target"); + return rti; +} diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h new file mode 100644 index 0000000000..b91a36ca3d --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_walker.h @@ -0,0 +1,141 @@ +/*------------------------------------------------------------------------- + * + * pgpa_walker.h + * Plan tree iteration + * + * Copyright (c) 2016-2025, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_walker.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_WALKER_H +#define PGPA_WALKER_H + +#include "pgpa_ast.h" +#include "pgpa_join.h" +#include "pgpa_scan.h" + +/* + * When generating advice, we should emit either SEMIJOIN_UNIQUE advice or + * SEMIJOIN_NON_UNIQUE advice for each semijoin depending on whether we chose + * to implement it as a semijoin or whether we instead chose to make the + * nullable side unique and then perform an inner join. When the make-unique + * strategy is not chosen, it's not easy to tell from the final plan tree + * whether it was considered. That's awkward, because we don't want to emit + * useless SEMIJOIN_NON_UNIQUE advice when there was no decision to be made. + * + * To avoid that, during planning, we create a pgpa_sj_unique_rel for each + * relation that we considered making unique for purposes of semijoin planning. + */ +typedef struct pgpa_sj_unique_rel +{ + char *plan_name; + Bitmapset *relids; +} pgpa_sj_unique_rel; + +/* + * We use the term "query feature" to refer to plan nodes that are interesting + * in the following way: to generate advice, we'll need to know the set of + * same-subquery, non-join RTIs occuring at or below that plan node, without + * admixture of parent and child RTIs. + * + * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge + * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll + * want to admit some kind of advice that describes the portion of the plan + * tree that appears beneath those nodes. + * + * Each semijoin can be implemented either by directly performing a semijoin, + * or by making one side unique and then performing a normal join. Either way, + * we use a query feature to notice what decision was made, so that we can + * describe it by enumerating the RTIs on that side of the join. + * + * To elaborate on the "no admixture of parent and child RTIs" rule, in all of + * these cases, if the entirety of an inheritance hierarchy appears beneath + * the query feature, we only want to name the parent table. But it's also + * possible to have cases where we must name child tables. This is particularly + * likely to happen when partitionwise join is in use, but could happen for + * Gather or Gather Merge even without that, if one of those appears below + * an Append or MergeAppend node for a single table. + */ +typedef enum pgpa_qf_type +{ + PGPAQF_GATHER, + PGPAQF_GATHER_MERGE, + PGPAQF_SEMIJOIN_NON_UNIQUE, + PGPAQF_SEMIJOIN_UNIQUE + /* update NUM_PGPA_QF_TYPES if you add anything here */ +} pgpa_qf_type; + +#define NUM_PGPA_QF_TYPES ((int) PGPAQF_SEMIJOIN_UNIQUE + 1) + +/* + * For each query feature, we keep track of the feature type and the set of + * relids that we found underneath the relevant plan node. See the comments + * on pgpa_qf_type, above, for additional details. + */ +typedef struct pgpa_query_feature +{ + pgpa_qf_type type; + Plan *plan; + Bitmapset *relids; +} pgpa_query_feature; + +/* + * Context object for plan tree walk. + * + * pstmt is the PlannedStmt we're studying. + * + * scans is an array of lists of pgpa_scan objects. The array is indexed by + * the scan's pgpa_scan_strategy. + * + * no_gather_scans is the set of scan RTIs that do not appear beneath any + * Gather or Gather Merge node. + * + * toplevel_unrolled_joins is a list of all pgpa_unrolled_join objects that + * are not a child of some other pgpa_unrolled_join. + * + * join_strategy is an array of lists of Bitmapset objects. Each Bitmapset + * is the set of relids that appears on the inner side of some join (excluding + * RTIs from partition children and subqueries). The array is indexed by + * pgpa_join_strategy. + * + * query_features is an array lists of pgpa_query_feature objects, indexed + * by pgpa_qf_type. + * + * future_query_features is only used during the plan tree walk and should + * be empty when the tree walk concludes. It is a list of pgpa_query_feature + * objects for Plan nodes that the plan tree walk has not yet encountered; + * when encountered, they will be moved to the list of active query features + * that is propagated via the call stack. + */ +typedef struct pgpa_plan_walker_context +{ + PlannedStmt *pstmt; + List *scans[NUM_PGPA_SCAN_STRATEGY]; + Bitmapset *no_gather_scans; + List *toplevel_unrolled_joins; + List *join_strategies[NUM_PGPA_JOIN_STRATEGY]; + List *query_features[NUM_PGPA_QF_TYPES]; + List *future_query_features; +} pgpa_plan_walker_context; + +extern void pgpa_plan_walker(pgpa_plan_walker_context *walker, + PlannedStmt *pstmt, + List *sj_unique_rels); + +extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Plan *plan); + +extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan); +extern Bitmapset *pgpa_relids(Plan *plan); +extern Index pgpa_scanrelid(Plan *plan); +extern Bitmapset *pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable); + +extern bool pgpa_walker_would_advise(pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers, + pgpa_advice_tag_type tag, + pgpa_advice_target *target); + +#endif diff --git a/contrib/pg_plan_advice/sql/gather.sql b/contrib/pg_plan_advice/sql/gather.sql new file mode 100644 index 0000000000..cb04ed5cf3 --- /dev/null +++ b/contrib/pg_plan_advice/sql/gather.sql @@ -0,0 +1,79 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 1; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SET debug_parallel_query = off; + +CREATE TABLE gt_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE gt_dim; + +CREATE TABLE gt_fact ( + id int not null, + dim_id integer not null references gt_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO gt_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE gt_fact; + +-- By default, we expect Gather Merge with a parallel hash join. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + +-- Force Gather or Gather Merge of both relations together. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Force a separate Gather or Gather Merge operation for each relation. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather((d d/d.d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Force a Gather or Gather Merge on one relation but no parallelism on other. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Force no Gather or Gather Merge use at all. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'no_gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/join_order.sql b/contrib/pg_plan_advice/sql/join_order.sql new file mode 100644 index 0000000000..5aa2fc62d3 --- /dev/null +++ b/contrib/pg_plan_advice/sql/join_order.sql @@ -0,0 +1,96 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; + +CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,100) g; +VACUUM ANALYZE jo_dim1; +CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim2 (id, dim2, val2) + SELECT g, 'some filler text ' || g, (g % 7) + 1 + FROM generate_series(1,1000) g; +VACUUM ANALYZE jo_dim2; + +CREATE TABLE jo_fact ( + id int primary key, + dim1_id integer not null references jo_dim1 (id), + dim2_id integer not null references jo_dim2 (id) +) WITH (autovacuum_enabled = false); +INSERT INTO jo_fact + SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE jo_fact; + +-- We expect to join to d2 first and then d1, since the condition on d2 +-- is more selective. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + +-- Force a few different join orders. Some of these are very inefficient, +-- but the planner considers them all viable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +COMMIT; + +-- The unusual formulation of this query is intended to prevent the query +-- planner from reducing the FULL JOIN to some other join type, so that we +-- can test what happens with a join type that cannot be reordered. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + +-- We should not be able to force the planner to join f to d1 first, because +-- that is not a valid join order, but we should be able to force the planner +-- to make either d2 or f the driving table. +BEGIN; +-- XXX: The advice feedback says 'partially matched' here which isn't exactly +-- wrong given the way that flag is handled in the code, but it's at the very +-- least confusing. Something should probably be improved here. +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +COMMIT; + +-- XXX: add tests for join order prefix matching +-- XXX: join_order(justonerel) shouldn't report partially matched diff --git a/contrib/pg_plan_advice/sql/join_strategy.sql b/contrib/pg_plan_advice/sql/join_strategy.sql new file mode 100644 index 0000000000..8eb823f1c0 --- /dev/null +++ b/contrib/pg_plan_advice/sql/join_strategy.sql @@ -0,0 +1,76 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; + +CREATE TABLE join_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE join_dim; + +CREATE TABLE join_fact ( + id int primary key, + dim_id integer not null references join_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO join_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +CREATE INDEX join_fact_dim_id ON join_fact (dim_id); +VACUUM ANALYZE join_fact; + +-- We expect a hash join by default. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + +-- Try forcing each join method in turn with join_dim as the inner table. +-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will +-- fail, because the planner knows that join_dim (id) is unique, and will +-- refuse to add mark/restore overhead. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +COMMIT; + +-- Now try forcing each join method in turn with join_fact as the inner +-- table. All of these should work. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +COMMIT; + +-- We can't force a foreign join between these tables, because they +-- aren't foreign tables. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/local_collector.sql b/contrib/pg_plan_advice/sql/local_collector.sql new file mode 100644 index 0000000000..fc838b2204 --- /dev/null +++ b/contrib/pg_plan_advice/sql/local_collector.sql @@ -0,0 +1,41 @@ +CREATE EXTENSION pg_plan_advice; +SET debug_parallel_query = off; + +-- Try clearing advice before we've collected any. +SELECT pg_clear_collected_local_advice(); + +-- Set a small advice collection limit so that we'll exceed it. +SET pg_plan_advice.local_collection_limit = 2; + +-- Set up a dummy table. +CREATE TABLE dummy_table (a int primary key, b text) + WITH (autovacuum_enabled = false, parallel_workers = 0); + +-- Test queries. +SELECT * FROM dummy_table a, dummy_table b; +SELECT * FROM dummy_table; + +-- Should return the advice from the second test query. +SELECT advice FROM pg_get_collected_local_advice() ORDER BY id DESC LIMIT 1; + +-- Now try clearing advice again. +SELECT pg_clear_collected_local_advice(); + +-- Raise the collection limit so that the collector uses multiple chunks. +SET pg_plan_advice.local_collection_limit = 2000; + +-- Push a bunch of queries through the collector. +DO $$ +BEGIN + FOR x IN 1..2000 LOOP + EXECUTE 'SELECT * FROM dummy_table'; + END LOOP; +END +$$; + +-- Check that the collector worked. +SELECT COUNT(*) FROM pg_get_collected_local_advice(); + +-- And clear one more time, to verify that this doesn't cause a problem +-- even with a larger number of entries. +SELECT pg_clear_collected_local_advice(); diff --git a/contrib/pg_plan_advice/sql/partitionwise.sql b/contrib/pg_plan_advice/sql/partitionwise.sql new file mode 100644 index 0000000000..e42c061176 --- /dev/null +++ b/contrib/pg_plan_advice/sql/partitionwise.sql @@ -0,0 +1,78 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET enable_partitionwise_join = true; + +CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int) + PARTITION BY RANGE (id); +CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt1; + +CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int) + PARTITION BY RANGE (id); +CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt2 (id, dim2, val2) + SELECT g, 'some other text ' || g, (g % 5) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt2; + +CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int) + PARTITION BY RANGE (id); +CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt3 (id, dim3, val3) + SELECT g, 'a third random text ' || g, (g % 7) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt3; + +CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int) + PARTITION BY RANGE (id); +CREATE TABLE ptmismatcha PARTITION OF ptmismatch + FOR VALUES FROM (1) to (1501) + WITH (autovacuum_enabled = false); +CREATE TABLE ptmismatchb PARTITION OF ptmismatch + FOR VALUES FROM (1501) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO ptmismatch (id, dimm, valm) + SELECT g, 'yet another text ' || g, (g % 2) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE ptmismatch; + +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + +-- Suppress partitionwise join, or do it just partially. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +COMMIT; + +-- Can't force a partitionwise join with a mismatched table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/scan.sql b/contrib/pg_plan_advice/sql/scan.sql new file mode 100644 index 0000000000..25416a75f4 --- /dev/null +++ b/contrib/pg_plan_advice/sql/scan.sql @@ -0,0 +1,195 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET seq_page_cost = 0.1; +SET random_page_cost = 0.1; +SET cpu_tuple_cost = 0; +SET cpu_index_tuple_cost = 0; + +CREATE TABLE scan_table (a int primary key, b text) + WITH (autovacuum_enabled = false); +INSERT INTO scan_table + SELECT g, 'some text ' || g FROM generate_series(1, 100000) g; +CREATE INDEX scan_table_b ON scan_table USING brin (b); +VACUUM ANALYZE scan_table; + +-- Sequential scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + +-- Index scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + +-- Index-only scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + +-- Bitmap heap scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + +-- TID scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + +-- TID range scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + +-- Try forcing each of our test queries to use the scan type they +-- wanted to use anyway. This should succeed. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; +COMMIT; + +-- Try to force a full scan of the table to use some other scan type. All +-- of these will fail. An index scan or bitmap heap scan could potentially +-- generate the correct answer, but the planner does not even consider these +-- possibilities due to the lack of a WHERE clause. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +COMMIT; + +-- Try again to force index use. This should now succeed for the INDEX_SCAN +-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the +-- query fetches columns not included in the index. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; +COMMIT; + +-- We can force a primary key lookup to use a sequential scan, but we +-- can't force it to use an index-only scan (due to the column list) +-- or a TID scan (due to the absence of a TID qual). +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- We can forcibly downgrade an index-only scan to an index scan, but we can't +-- force the use of an index that the planner thinks is inapplicable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- We can force the use of a sequential scan in place of a bitmap heap scan, +-- but a plain index scan on a BRIN index is not possible. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- We can force the use of a sequential scan rather than a TID scan or +-- TID range scan. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; +COMMIT; + +-- Test more complex scenarios with index scans. +BEGIN; +-- Should still work if we mention the schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +-- But not if we mention the wrong schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +-- It's OK to repeat the same advice. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +-- But it doesn't work if the index target is even notionally different. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- Test assorted incorrect advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +COMMIT; + +-- Test our ability to refer to multiple instances of the same alias. +BEGIN; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +COMMIT; + +-- Test our ability to refer to scans within a subquery. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +BEGIN; +-- Should not match. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +-- Should match first query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +-- Should match second query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +COMMIT; diff --git a/contrib/pg_plan_advice/sql/syntax.sql b/contrib/pg_plan_advice/sql/syntax.sql new file mode 100644 index 0000000000..0692dc895c --- /dev/null +++ b/contrib/pg_plan_advice/sql/syntax.sql @@ -0,0 +1,57 @@ +LOAD 'pg_plan_advice'; + +-- An empty string is allowed, and so is an empty target list. +SET pg_plan_advice.advice = ''; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = 'SEQ_SCAN()'; +EXPLAIN SELECT 1; + +-- Test assorted variations in capitalization, whitespace, and which parts of +-- the relation identifier are included. These should all work. +SET pg_plan_advice.advice = 'SEQ_SCAN(x)'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = 'seq_scan(x@y)'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = 'SEQ_scan(x#2)'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = ' SEQ_SCAN ( x / y . z ) '; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")'; +EXPLAIN SELECT 1; + +-- Syntax errors. +SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)'; +SET pg_plan_advice.advice = 'SEQ_SCAN'; +SET pg_plan_advice.advice = 'SEQ_SCAN('; +SET pg_plan_advice.advice = 'SEQ_SCAN("'; +SET pg_plan_advice.advice = 'SEQ_SCAN("")'; +SET pg_plan_advice.advice = 'SEQ_SCAN("a"'; +SET pg_plan_advice.advice = 'SEQ_SCAN(#'; +SET pg_plan_advice.advice = '()'; +SET pg_plan_advice.advice = '123'; + +-- Legal comments. +SET pg_plan_advice.advice = '/**/'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)'; +EXPLAIN SELECT 1; + +-- Unterminated comments. +SET pg_plan_advice.advice = '/*'; +SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops'; + +-- Nested comments are not supported, so the first of these is legal and +-- the second is not. +SET pg_plan_advice.advice = '/*/*/'; +EXPLAIN SELECT 1; +SET pg_plan_advice.advice = '/*/* stuff */*/'; + +-- Foreign join requires multiple relation identifiers. +SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)'; +SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))'; diff --git a/contrib/pg_plan_advice/t/001_regress.pl b/contrib/pg_plan_advice/t/001_regress.pl new file mode 100644 index 0000000000..5f7584c3a0 --- /dev/null +++ b/contrib/pg_plan_advice/t/001_regress.pl @@ -0,0 +1,147 @@ +# Copyright (c) 2021-2025, PostgreSQL Global Development Group + +# Run the core regression tests under pg_plan_advice to check for problems. +use strict; +use warnings FATAL => 'all'; + +use Cwd qw(abs_path); +use File::Basename qw(dirname); + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize the primary node +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init(); + +# Set up our desired configuration. +# +# We run with pg_plan_advice.shared_collection_limit set to ensure that the +# plan tree walker code runs against every query in the regression tests. If +# we're unable to properly analyze any of those plan trees, this test should fail. +# +# We set pg_plan_advice.advice to an advice string that will cause the advice +# trove to be populated with a few entries of various sorts, but which we do +# not expect to match anything in the regression test queries. This way, the +# planner hooks will be called, improving code coverage, but no plans should +# actually change. +# +# pg_plan_advice.always_explain_supplied_advice=false is needed to avoid breaking +# regression test queries that use EXPLAIN. In the real world, it seems like +# users will want EXPLAIN output to show supplied advice so that it's clear +# whether normal planner behavior has been altered, but here that's undesirable. +$node->append_conf('postgresql.conf', <<EOM); +pg_plan_advice.shared_collection_limit=1000000 +shared_preload_libraries=pg_plan_advice +pg_plan_advice.advice='SEQ_SCAN(entirely_fictitious) HASH_JOIN(total_fabrication) GATHER(completely_imaginary)' +pg_plan_advice.always_explain_supplied_advice=false +EOM +$node->start; + +my $srcdir = abs_path("../.."); + +# --dlpath is needed to be able to find the location of regress.so +# and any libraries the regression tests require. +my $dlpath = dirname($ENV{REGRESS_SHLIB}); + +# --outputdir points to the path where to place the output files. +my $outputdir = $PostgreSQL::Test::Utils::tmp_check; + +# --inputdir points to the path of the input files. +my $inputdir = "$srcdir/src/test/regress"; + +# Run the tests. +my $rc = + system($ENV{PG_REGRESS} . " " + . "--bindir= " + . "--dlpath=\"$dlpath\" " + . "--host=" . $node->host . " " + . "--port=" . $node->port . " " + . "--schedule=$srcdir/src/test/regress/parallel_schedule " + . "--max-concurrent-tests=20 " + . "--inputdir=\"$inputdir\" " + . "--outputdir=\"$outputdir\""); + +# Dump out the regression diffs file, if there is one +if ($rc != 0) +{ + my $diffs = "$outputdir/regression.diffs"; + if (-e $diffs) + { + print "=== dumping $diffs ===\n"; + print slurp_file($diffs); + print "=== EOF ===\n"; + } +} + +# Report results +is($rc, 0, 'regression tests pass'); + +# Create the extension so we can access the collector +$node->safe_psql('postgres', 'CREATE EXTENSION pg_plan_advice'); + +# Verify that a large amount of advice was collected +my $all_query_count = $node->safe_psql('postgres', <<EOM); +SELECT COUNT(*) FROM pg_get_collected_shared_advice(); +EOM +cmp_ok($all_query_count, '>', 20000, "copious advice collected"); + +# Verify that lots of different advice strings were collected +my $distinct_query_count = $node->safe_psql('postgres', <<EOM); +SELECT COUNT(*) FROM + (SELECT DISTINCT advice FROM pg_get_collected_shared_advice()); +EOM +cmp_ok($distinct_query_count, '>', 3000, "diverse advice collected"); + +# We want to test for the presence of our known tags in the collected advice. +# Put all tags into the hash that follows; map any tags that aren't tested +# by the core regression tests to 0, and others to 1. +my %tag_map = ( + BITMAP_HEAP_SCAN => 1, + FOREIGN_JOIN => 0, + GATHER => 1, + GATHER_MERGE => 1, + HASH_JOIN => 1, + INDEX_ONLY_SCAN => 1, + INDEX_SCAN => 1, + JOIN_ORDER => 1, + MERGE_JOIN_MATERIALIZE => 1, + MERGE_JOIN_PLAIN => 1, + NESTED_LOOP_MATERIALIZE => 1, + NESTED_LOOP_MEMOIZE => 1, + NESTED_LOOP_PLAIN => 1, + NO_GATHER => 1, + PARTITIONWISE => 1, + SEMIJOIN_NON_UNIQUE => 1, + SEMIJOIN_UNIQUE => 1, + SEQ_SCAN => 1, + TID_SCAN => 1, +); +for my $tag (sort keys %tag_map) +{ + my $checkit = $tag_map{$tag}; + + # Search for the given tag. This is not entirely robust: it could get thrown + # off by a table alias such as "FOREIGN_JOIN(", but that probably won't + # happen in the core regression tests. + my $tag_count = $node->safe_psql('postgres', <<EOM); +SELECT COUNT(*) FROM pg_get_collected_shared_advice() + WHERE advice LIKE '%$tag(%' +EOM + + # Check that the tag got a non-trivial amount of use, unless told otherwise. + cmp_ok($tag_count, '>', 10, "multiple uses of $tag") if $checkit; + + # Regardless, note the exact count in the log, for human consumption. + note("found $tag_count advice strings containing $tag"); +} + +# Trigger a partial cleanup of the shared advice collector, and then a full +# cleanup. +$node->safe_psql('postgres', <<EOM); +SET pg_plan_advice.shared_collection_limit=500; +SELECT * FROM pg_clear_collected_shared_advice(); +EOM + +done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 58c9a3f1e0..47cdb4a7ad 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3964,6 +3964,44 @@ pg_uuid_t pg_wchar pg_wchar_tbl pgp_armor_headers_state +pgpa_collected_advice +pgpa_advice_item +pgpa_advice_tag_type +pgpa_advice_target +pgpa_identifier +pgpa_index_target +pgpa_index_type +pgpa_itm_type +pgpa_join_class +pgpa_join_member +pgpa_join_state +pgpa_join_strategy +pgpa_join_unroller +pgpa_local_advice +pgpa_local_advice_chunk +pgpa_output_context +pgpa_plan_walker_context +pgpa_planner_state +pgpa_qf_type +pgpa_query_feature +pgpa_ri_checker +pgpa_ri_checker_key +pgpa_scan +pgpa_scan_strategy +pgpa_shared_advice +pgpa_shared_advice_chunk +pgpa_shared_state +pgpa_sj_unique_rel +pgpa_target_type +pgpa_trove +pgpa_trove_entry +pgpa_trove_entry_element +pgpa_trove_entry_hash +pgpa_trove_entry_key +pgpa_trove_lookup_type +pgpa_trove_result +pgpa_trove_slice +pgpa_unrolled_join pgsocket pgsql_thing_t pgssEntry |
