From 069bd67931b55b6806c6ef1bf3ff4c19b241448b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Nordstr=C3=B6m?= Date: Thu, 19 Sep 2024 16:53:34 +0200 Subject: [PATCH] Add ability to plan based on chunk table AM A chunk can use different table access methods so to support this more easily, cache the AM oid in the chunk struct. This allows identifying the access method quickly at, e.g., planning time. --- src/chunk.c | 10 ++- src/chunk.h | 1 + src/chunk_scan.c | 5 +- src/import/allpaths.c | 7 +- src/planner/planner.c | 9 +-- src/planner/planner.h | 42 ++++++++++ src/utils.c | 74 ++++++++++++++++++ src/utils.h | 5 ++ .../nodes/decompress_chunk/decompress_chunk.c | 24 +++--- .../nodes/decompress_chunk/decompress_chunk.h | 4 +- tsl/src/planner.c | 77 ++++++++++++------- 11 files changed, 201 insertions(+), 57 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 1863a24a91a..00a8dc0ef65 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -1614,12 +1614,14 @@ chunk_tuple_found(TupleInfo *ti, void *arg) * ts_chunk_build_from_tuple_and_stub() since chunk_resurrect() also uses * that function and, in that case, the chunk object is needed to create * the data table and related objects. */ - chunk->table_id = - ts_get_relation_relid(NameStr(chunk->fd.schema_name), NameStr(chunk->fd.table_name), false); - chunk->hypertable_relid = ts_hypertable_id_to_relid(chunk->fd.hypertable_id, false); + ts_get_rel_info_by_name(NameStr(chunk->fd.schema_name), + NameStr(chunk->fd.table_name), + &chunk->table_id, + &chunk->amoid, + &chunk->relkind); - chunk->relkind = get_rel_relkind(chunk->table_id); + Assert(OidIsValid(chunk->amoid) || chunk->fd.osm_chunk); Ensure(chunk->relkind > 0, "relkind for chunk \"%s\".\"%s\" is invalid", diff --git a/src/chunk.h b/src/chunk.h index 57fa9bcc48f..efeb7abeb21 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -66,6 +66,7 @@ typedef struct Chunk char relkind; Oid table_id; Oid hypertable_relid; + Oid amoid; /* Table access method used by chunk */ /* * The hypercube defines the chunks position in the N-dimensional space. diff --git a/src/chunk_scan.c b/src/chunk_scan.c index 9dabb3cfaf0..817918f6df2 100644 --- a/src/chunk_scan.c +++ b/src/chunk_scan.c @@ -130,7 +130,10 @@ ts_chunk_scan_by_chunk_ids(const Hyperspace *hs, const List *chunk_ids, unsigned for (int i = 0; i < locked_chunk_count; i++) { Chunk *chunk = locked_chunks[i]; - chunk->relkind = get_rel_relkind(chunk->table_id); + + ts_get_rel_info(chunk->table_id, &chunk->amoid, &chunk->relkind); + + Assert(OidIsValid(chunk->amoid) || chunk->fd.osm_chunk); } /* diff --git a/src/import/allpaths.c b/src/import/allpaths.c index 1dbc5e98c3e..e2358254226 100644 --- a/src/import/allpaths.c +++ b/src/import/allpaths.c @@ -187,16 +187,15 @@ ts_set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *parent_rel, Index pare TsRelType reltype = ts_classify_relation(root, child_rel, &ht); if (reltype == TS_REL_CHUNK_CHILD && !TS_HYPERTABLE_IS_INTERNAL_COMPRESSION_TABLE(ht)) { - TimescaleDBPrivate *fdw_private = (TimescaleDBPrivate *) child_rel->fdw_private; + const Chunk *chunk = ts_planner_chunk_fetch(root, child_rel); /* * This function is called only in tandem with our own hypertable * expansion, so the Chunk struct must be initialized already. */ - Assert(fdw_private->cached_chunk_struct != NULL); + Assert(chunk != NULL); - if (!ts_chunk_is_partial(fdw_private->cached_chunk_struct) && - ts_chunk_is_compressed(fdw_private->cached_chunk_struct)) + if (!ts_chunk_is_partial(chunk) && ts_chunk_is_compressed(chunk)) { child_rel->indexlist = NIL; } diff --git a/src/planner/planner.c b/src/planner/planner.c index db1a1eb2c56..29f1a7ebabc 100644 --- a/src/planner/planner.c +++ b/src/planner/planner.c @@ -1419,12 +1419,9 @@ timescaledb_get_relation_info_hook(PlannerInfo *root, Oid relation_objectid, boo (type == TS_REL_CHUNK_CHILD) && IS_UPDL_CMD(query); if (use_transparent_decompression && (is_standalone_chunk || is_child_chunk_in_update)) { - TimescaleDBPrivate *fdw_private = (TimescaleDBPrivate *) rel->fdw_private; - Assert(fdw_private->cached_chunk_struct == NULL); - fdw_private->cached_chunk_struct = - ts_chunk_get_by_relid(rte->relid, /* fail_if_not_found = */ true); - if (!ts_chunk_is_partial(fdw_private->cached_chunk_struct) && - ts_chunk_is_compressed(fdw_private->cached_chunk_struct)) + const Chunk *chunk = ts_planner_chunk_fetch(root, rel); + + if (!ts_chunk_is_partial(chunk) && ts_chunk_is_compressed(chunk)) { rel->indexlist = NIL; } diff --git a/src/planner/planner.h b/src/planner/planner.h index 9a00ff31536..e82ff36ac46 100644 --- a/src/planner/planner.h +++ b/src/planner/planner.h @@ -9,7 +9,9 @@ #include #include #include +#include +#include "chunk.h" #include "export.h" #include "guc.h" #include "hypertable.h" @@ -108,3 +110,43 @@ extern TSDLLEXPORT void ts_add_baserel_cache_entry_for_chunk(Oid chunk_reloid, Hypertable *hypertable); TsRelType TSDLLEXPORT ts_classify_relation(const PlannerInfo *root, const RelOptInfo *rel, Hypertable **ht); + +/* + * Chunk-equivalent of planner_rt_fetch(), but returns the corresponding chunk + * instead of range table entry. + * + * Returns NULL if this rel is not a chunk. + * + * This cache should be pre-warmed by hypertable expansion, but it + * doesn't run in the following cases: + * + * 1. if it was a direct query on the chunk; + * + * 2. if it is not a SELECT QUERY. + */ +static inline const Chunk * +ts_planner_chunk_fetch(const PlannerInfo *root, RelOptInfo *rel) +{ + TimescaleDBPrivate *rel_private; + + /* The rel can only be a chunk if it is part of a hypertable expansion + * (RELOPT_OTHER_MEMBER_REL) or a directly query on the chunk + * (RELOPT_BASEREL) */ + if (rel->reloptkind != RELOPT_OTHER_MEMBER_REL && rel->reloptkind != RELOPT_BASEREL) + return NULL; + + /* The rel_private entry should have been created as part of classifying + * the relation in timescaledb_get_relation_info_hook(). Therefore, + * ts_get_private_reloptinfo() asserts that it is already set but falls + * back to creating rel_private in release builds for safety. */ + rel_private = ts_get_private_reloptinfo(rel); + + if (NULL == rel_private->cached_chunk_struct) + { + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + rel_private->cached_chunk_struct = + ts_chunk_get_by_relid(rte->relid, /* fail_if_not_found = */ true); + } + + return rel_private->cached_chunk_struct; +} diff --git a/src/utils.c b/src/utils.c index 384858a770d..a9d803c1258 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1748,3 +1748,77 @@ ts_update_placeholder(PG_FUNCTION_ARGS) elog(ERROR, "this stub function is used only as placeholder during extension updates"); PG_RETURN_NULL(); } + +/* + * Get relation information from the syscache in one call. + * + * Returns relkind and access method used. Both are non-optional. + */ +void +ts_get_rel_info(Oid relid, Oid *amoid, char *relkind) +{ + HeapTuple tuple; + Form_pg_class cform; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", relid); + + cform = (Form_pg_class) GETSTRUCT(tuple); + *amoid = cform->relam; + *relkind = cform->relkind; + ReleaseSysCache(tuple); +} + +/* + * Get relation information from the syscache in one call. + * + * Returns relid, relkind and access method used. All are non-optional. + */ +void +ts_get_rel_info_by_name(const char *relnamespace, const char *relname, Oid *relid, Oid *amoid, + char *relkind) +{ + HeapTuple tuple; + Form_pg_class cform; + Oid namespaceoid = get_namespace_oid(relnamespace, false); + + tuple = SearchSysCache2(RELNAMENSP, PointerGetDatum(relname), ObjectIdGetDatum(namespaceoid)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %s.%s", relnamespace, relname); + + cform = (Form_pg_class) GETSTRUCT(tuple); + *relid = cform->oid; + *amoid = cform->relam; + *relkind = cform->relkind; + ReleaseSysCache(tuple); +} + +static Oid hypercore_amoid = InvalidOid; + +bool +ts_is_hypercore_am(Oid amoid) +{ + /* Can't use InvalidOid as an indication of non-cached value since + get_am_oid() will return InvalidOid when the access method does not + exist and we will do the lookup every time this query is called. This + boolean can be removed once we know that there should exist an access + method with the given name. */ + static bool iscached = false; + + if (!iscached && !OidIsValid(hypercore_amoid)) + { + hypercore_amoid = get_am_oid("hypercore", true); + iscached = true; + } + + if (!OidIsValid(hypercore_amoid)) + return false; + + /* Shouldn't get here for now */ + Assert(false); + + return amoid == hypercore_amoid; +} diff --git a/src/utils.h b/src/utils.h index ff2442383ba..5e20eaa3c9e 100644 --- a/src/utils.h +++ b/src/utils.h @@ -371,3 +371,8 @@ ts_datum_set_objectid(const AttrNumber attno, NullableDatum *datums, const Oid v else datums[AttrNumberGetAttrOffset(attno)].isnull = true; } + +extern TSDLLEXPORT void ts_get_rel_info_by_name(const char *relnamespace, const char *relname, + Oid *relid, Oid *amoid, char *relkind); +extern TSDLLEXPORT void ts_get_rel_info(Oid relid, Oid *amoid, char *relkind); +extern TSDLLEXPORT bool ts_is_hypercore_am(Oid amoid); diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c index c67721908e3..ec8dc268048 100644 --- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c +++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c @@ -67,10 +67,11 @@ static DecompressChunkPath *decompress_chunk_path_create(PlannerInfo *root, Comp int parallel_workers, Path *compressed_path); -static void decompress_chunk_add_plannerinfo(PlannerInfo *root, CompressionInfo *info, Chunk *chunk, - RelOptInfo *chunk_rel, bool needs_sequence_num); +static void decompress_chunk_add_plannerinfo(PlannerInfo *root, CompressionInfo *info, + const Chunk *chunk, RelOptInfo *chunk_rel, + bool needs_sequence_num); -static SortInfo build_sortinfo(Chunk *chunk, RelOptInfo *chunk_rel, CompressionInfo *info, +static SortInfo build_sortinfo(const Chunk *chunk, RelOptInfo *chunk_rel, CompressionInfo *info, List *pathkeys); static bool @@ -254,7 +255,8 @@ copy_decompress_chunk_path(DecompressChunkPath *src) } static CompressionInfo * -build_compressioninfo(PlannerInfo *root, Hypertable *ht, Chunk *chunk, RelOptInfo *chunk_rel) +build_compressioninfo(PlannerInfo *root, const Hypertable *ht, const Chunk *chunk, + RelOptInfo *chunk_rel) { AppendRelInfo *appinfo; CompressionInfo *info = palloc0(sizeof(CompressionInfo)); @@ -510,7 +512,7 @@ cost_batch_sorted_merge(PlannerInfo *root, CompressionInfo *compression_info, * compatible and the optimization can be used. */ static MergeBatchResult -can_batch_sorted_merge(PlannerInfo *root, CompressionInfo *info, Chunk *chunk) +can_batch_sorted_merge(PlannerInfo *root, CompressionInfo *info, const Chunk *chunk) { PathKey *pk; Var *var; @@ -620,8 +622,8 @@ can_batch_sorted_merge(PlannerInfo *root, CompressionInfo *info, Chunk *chunk) * To save planning time, we therefore refrain from adding them. */ static void -add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, Index ht_relid, - Path *path, Path *compressed_path) +add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, const Hypertable *ht, + Index ht_relid, Path *path, Path *compressed_path) { if (root->query_pathkeys == NIL) return; @@ -681,8 +683,8 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, #define IS_UPDL_CMD(parse) \ ((parse)->commandType == CMD_UPDATE || (parse)->commandType == CMD_DELETE) void -ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, - Chunk *chunk) +ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, const Hypertable *ht, + const Chunk *chunk) { RelOptInfo *compressed_rel; ListCell *lc; @@ -1650,7 +1652,7 @@ compressed_rel_setup_equivalence_classes(PlannerInfo *root, CompressionInfo *inf * and add it to PlannerInfo */ static void -decompress_chunk_add_plannerinfo(PlannerInfo *root, CompressionInfo *info, Chunk *chunk, +decompress_chunk_add_plannerinfo(PlannerInfo *root, CompressionInfo *info, const Chunk *chunk, RelOptInfo *chunk_rel, bool needs_sequence_num) { Index compressed_index = root->simple_rel_array_size; @@ -2012,7 +2014,7 @@ find_const_segmentby(RelOptInfo *chunk_rel, CompressionInfo *info) * If query pathkeys is shorter than segmentby + compress_orderby pushdown can still be done */ static SortInfo -build_sortinfo(Chunk *chunk, RelOptInfo *chunk_rel, CompressionInfo *info, List *pathkeys) +build_sortinfo(const Chunk *chunk, RelOptInfo *chunk_rel, CompressionInfo *info, List *pathkeys) { int pk_index; PathKey *pk; diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.h b/tsl/src/nodes/decompress_chunk/decompress_chunk.h index 2d2ad1816b7..e77f8c2b0c5 100644 --- a/tsl/src/nodes/decompress_chunk/decompress_chunk.h +++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.h @@ -57,8 +57,8 @@ typedef struct DecompressChunkPath bool batch_sorted_merge; } DecompressChunkPath; -void ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *rel, Hypertable *ht, - Chunk *chunk); +void ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *rel, const Hypertable *ht, + const Chunk *chunk); extern bool ts_is_decompress_chunk_path(Path *path); diff --git a/tsl/src/planner.c b/tsl/src/planner.c index 1b83848a82f..578f876d75d 100644 --- a/tsl/src/planner.c +++ b/tsl/src/planner.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -97,41 +98,59 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn } } +/* + * Check if a chunk should be decompressed via a DecompressChunk plan. + * + * Check first that it is a compressed chunk. Then, decompress unless it is + * SELECT * FROM ONLY . We check if it is the ONLY case by calling + * ts_rte_is_marked_for_expansion. Respecting ONLY here is important to not + * break postgres tools like pg_dump. + */ +static inline bool +use_decompress_chunk_node(const RelOptInfo *rel, const RangeTblEntry *rte, const Chunk *chunk) +{ + return ts_guc_enable_transparent_decompression && + /* Check that the chunk is actually compressed */ + chunk->fd.compressed_chunk_id != INVALID_CHUNK_ID && + /* Check that it is _not_ SELECT FROM ONLY */ + (rel->reloptkind != RELOPT_BASEREL || ts_rte_is_marked_for_expansion(rte)); +} + void tsl_set_rel_pathlist_query(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte, Hypertable *ht) { - /* We can get here via query on hypertable in that case reloptkind - * will be RELOPT_OTHER_MEMBER_REL or via direct query on chunk - * in that case reloptkind will be RELOPT_BASEREL. - * If we get here via SELECT * FROM , we decompress the chunk, - * unless the query was SELECT * FROM ONLY . - * We check if it is the ONLY case by calling ts_rte_is_marked_for_expansion. - * Respecting ONLY here is important to not break postgres tools like pg_dump. + /* Only interested in queries on relations that are part of hypertables + * with compression enabled, so quick exit if not this case. */ + if (ht == NULL || !TS_HYPERTABLE_HAS_COMPRESSION_TABLE(ht)) + return; + + /* + * For a chunk, we can get here via a query on the hypertable that expands + * to the chunk or by direct query on the chunk. In the former case, + * reloptkind will be RELOPT_OTHER_MEMBER_REL (nember of hypertable) or in + * the latter case reloptkind will be RELOPT_BASEREL (standalone rel). + * + * These two cases are checked in ts_planner_chunk_fetch(). */ - TimescaleDBPrivate *fdw_private = (TimescaleDBPrivate *) rel->fdw_private; - if (ts_guc_enable_transparent_decompression && ht && - (rel->reloptkind == RELOPT_OTHER_MEMBER_REL || - (rel->reloptkind == RELOPT_BASEREL && ts_rte_is_marked_for_expansion(rte))) && - TS_HYPERTABLE_HAS_COMPRESSION_TABLE(ht)) - { - if (fdw_private->cached_chunk_struct == NULL) - { - /* - * We can not have the cached Chunk struct, - * 1) if it was a direct query on the chunk; - * 2) if it is not a SELECT QUERY. - * Caching is done by our hypertable expansion, which doesn't run in - * these cases. - */ - fdw_private->cached_chunk_struct = - ts_chunk_get_by_relid(rte->relid, /* fail_if_not_found = */ true); - } + const Chunk *chunk = ts_planner_chunk_fetch(root, rel); - if (fdw_private->cached_chunk_struct->fd.compressed_chunk_id != INVALID_CHUNK_ID) - { - ts_decompress_chunk_generate_paths(root, rel, ht, fdw_private->cached_chunk_struct); - } + if (chunk == NULL) + return; + + if (use_decompress_chunk_node(rel, rte, chunk)) + { + ts_decompress_chunk_generate_paths(root, rel, ht, chunk); + } + /* + * If using our own access method on the chunk, we might want to add + * alternative paths. This should not be compatible with transparent + * decompression, so only add if we didn't add decompression paths above. + */ + else if (ts_is_hypercore_am(chunk->amoid)) + { + /* To be implemented */ + Assert(false); } }