From 6dc97e125b56ca5ff459c900a9bb34807b8f613e Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:58:58 +0300 Subject: [PATCH 1/7] Added more metrics and Average query runtime and Sessions dashboards --- config/grafana/dashboards/dash1.json | 455 +- .../provisioning/datasources/datasources.yml | 11 +- config/pgwatch-prometheus/metrics.yml | 4541 +++++++++++++++++ config/pgwatch-prometheus/sources.yml | 19 +- config/prometheus/prometheus.yml | 8 +- old-metrics.yml | 0 6 files changed, 5020 insertions(+), 14 deletions(-) create mode 100644 old-metrics.yml diff --git a/config/grafana/dashboards/dash1.json b/config/grafana/dashboards/dash1.json index ac64f5d..ba66e0a 100644 --- a/config/grafana/dashboards/dash1.json +++ b/config/grafana/dashboards/dash1.json @@ -17,10 +17,114 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, + "graphTooltip": 2, "id": 1, "links": [], "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(pgwatch_stat_statements_total_time[$agg_interval])) / sum(rate(pgwatch_stat_statements_calls[$agg_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Average query runtime", + "type": "timeseries" + }, { "datasource": { "type": "datasource", @@ -96,7 +200,7 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, + "x": 12, "y": 0 }, "id": 1, @@ -172,10 +276,6 @@ "options": {} }, { - "filter": { - "id": "byRefId", - "options": "/^(?:seriesToRows-B-B-B-B-B-B-B-B-B)$/" - }, "id": "extractFields", "options": { "delimiter": ",", @@ -212,6 +312,309 @@ } ], "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 16, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "E" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "#4e7299", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#afafaf", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle-in-transaction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Waiting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idleintransaction", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle-in-transaction", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idle", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_active", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Active", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_settings_max_connections", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max connections", + "range": true, + "refId": "D", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_total", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "E", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_waiting ", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "F", + "useBackend": false + } + ], + "title": "Sessions", + "type": "timeseries" } ], "preload": false, @@ -219,7 +622,43 @@ "schemaVersion": 41, "tags": [], "templating": { - "list": [] + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "10s", + "value": "10s" + }, + "name": "agg_interval", + "options": [ + { + "selected": true, + "text": "10s", + "value": "10s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "10s,1m,5m,1h", + "refresh": 2, + "type": "interval" + } + ] }, "time": { "from": "now-5m", @@ -229,5 +668,5 @@ "timezone": "browser", "title": "PoC", "uid": "00eb62a7-4b80-43cd-a890-45336979aa18", - "version": 2 + "version": 16 } \ No newline at end of file diff --git a/config/grafana/provisioning/datasources/datasources.yml b/config/grafana/provisioning/datasources/datasources.yml index 699bbbc..91ca878 100644 --- a/config/grafana/provisioning/datasources/datasources.yml +++ b/config/grafana/provisioning/datasources/datasources.yml @@ -4,6 +4,7 @@ datasources: - name: PGWatch-PostgreSQL type: postgres access: proxy + uid: P031DD592934B2F1F url: sink-postgres:5432 database: measurements user: pgwatch @@ -12,10 +13,16 @@ datasources: jsonData: sslmode: disable postgresVersion: 1500 - isDefault: true + isDefault: false - name: PGWatch-Prometheus type: prometheus access: proxy + uid: P7A0D6631BB10B34F url: http://sink-prometheus:9090 - isDefault: false \ No newline at end of file + isDefault: true + jsonData: + scrapeInterval: '5s' + queryTimeout: '5s' + timeInterval: '5s' + httpMethod: 'POST' \ No newline at end of file diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index 492724b..b8fbd61 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -1,4 +1,27 @@ # Simple PGWatch Metrics for Prometheus - just queryid and calls + +# The following structure is expected for metrics and preset definitions: +# metrics: +# metric_name: +# init_sql: |- +# CREATE EXTENSION IF NOT EXISTS some_extension; +# CREATE OR REPLACE FUNCTION get_some_stat(OUT some_stat int) +# ... +# sqls: +# 11: | +# select /* pgwatch_generated */ +# (extract(epoch from now()) * 1e9)::int8 as epoch_ns, +# ... +# 14: | +# select /* pgwatch_generated */ +# (extract(epoch from now()) * 1e9)::int8 as epoch_ns, +# ... +# gauges: +# - '*' +# is_instance_level: true +# node_status: primary +# statement_timeout_seconds: 300 +# metric_storage_name: db_stats metrics: pg_stat_statements_calls: description: "Simple queryid and calls metric" @@ -19,3 +42,4521 @@ metrics: metric_storage_name: pgss_calls node_status: primary statement_timeout_seconds: 5 + + archiver: + description: > + This metric retrieves key statistics from the PostgreSQL `pg_stat_archiver` view providing insights into the status of WAL file archiving. + It returns the total number of successfully archived files and failed archiving attempts. Additionally, it identifies if the most recent + attempt resulted in a failure and calculates how many seconds have passed since the last failure. The metric only considers data if WAL + archiving is enabled in the system, helping administrators monitor and diagnose issues related to the archiving process. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + archived_count, + failed_count, + case when coalesce(last_failed_time, '1970-01-01'::timestamptz) > coalesce(last_archived_time, '1970-01-01'::timestamptz) then 1 else 0 end as is_failing_int, + extract(epoch from now() - last_failed_time)::int8 as seconds_since_last_failure + from + pg_stat_archiver + where + current_setting('archive_mode') in ('on', 'always') + gauges: + - is_failing_int + - seconds_since_last_failure + is_instance_level: true + archiver_pending_count: + description: > + This metric retrieves the count of WAL files waiting to be archived by checking the pg_wal/archive_status directory + for files with .ready extension. It helps monitor the archiving backlog and potential issues with WAL archiving. + sqls: + 10: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as archiver_pending_count + from + (select pg_ls_dir('pg_wal/archive_status')) a + where + pg_ls_dir ~ '[0-9A-F]{24}.ready' + 9.4: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as archiver_pending_count + from + (select pg_ls_dir('pg_xlog/archive_status')) a + where + pg_ls_dir ~ '[0-9A-F]{24}.ready' + gauges: + - archiver_pending_count + is_instance_level: true + backends: + description: > + This metric gathers detailed information from the PostgreSQL pg_stat_activity view, providing an overview of the database's current session + and activity state. It tracks the total number of client backends, active sessions, idle sessions, sessions waiting on locks, and background workers. + The metric also calculates statistics on blocked sessions, most extended waiting times, average and longest session durations, transaction times, + and query durations. Additionally, it monitors autovacuum worker activity and provides the age of the oldest transaction (measured by xmin). + This metric helps administrators monitor session states, detect bottlenecks, and ensure the system is within its connection limits, + providing visibility into database performance and contention. + sqls: + 11: | + with sa_snapshot as ( + select * from pg_stat_activity + where pid != pg_backend_pid() + and datname = current_database() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select count(*) from sa_snapshot where backend_type = 'client backend') as total, + (select count(*) from pg_stat_activity where pid != pg_backend_pid()) as instance_total, + current_setting('max_connections')::int as max_connections, + (select count(*) from sa_snapshot where backend_type = 'background worker') as background_workers, + (select count(*) from sa_snapshot where state = 'active' and backend_type = 'client backend') as active, + (select count(*) from sa_snapshot where state = 'idle' and backend_type = 'client backend') as idle, + (select count(*) from sa_snapshot where state = 'idle in transaction' and backend_type = 'client backend') as idleintransaction, + (select count(*) from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as waiting, + (select coalesce(sum(case when coalesce(array_length(pg_blocking_pids(pid), 1), 0) >= 1 then 1 else 0 end), 0) from sa_snapshot where backend_type = 'client backend' and state = 'active') as blocked, + (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as longest_waiting_seconds, + (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as avg_waiting_seconds, + (select ceil(extract(epoch from (now() - backend_start)))::int from sa_snapshot where backend_type = 'client backend' order by backend_start limit 1) as longest_session_seconds, + (select round(avg(abs(extract(epoch from now() - backend_start)))::numeric, 3)::float from sa_snapshot where backend_type = 'client backend') as avg_session_seconds, + (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where xact_start is not null and backend_type = 'client backend' order by xact_start limit 1) as longest_tx_seconds, + (select round(avg(abs(extract(epoch from now() - xact_start)))::numeric, 3)::float from sa_snapshot where xact_start is not null and backend_type = 'client backend') as avg_tx_seconds, + (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where backend_type = 'autovacuum worker' order by xact_start limit 1) as longest_autovacuum_seconds, + (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where state = 'active' and backend_type = 'client backend') as longest_query_seconds, + (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where state = 'active' and backend_type = 'client backend') as avg_query_seconds, + (select max(age(backend_xmin))::int8 from sa_snapshot) as max_xmin_age_tx, + (select count(*) from sa_snapshot where state = 'active' and backend_type = 'autovacuum worker') as av_workers + gauges: + - '*' + backup_age_pgbackrest: + description: > + This metric retrieves the age of the last successful pgBackRest backup in seconds. It uses the `pgbackrest --output=json info` command to fetch + the backup information and calculates the age based on the current time and the timestamp of the last backup. The metric returns a retcode of 0 + on success, along with the age in seconds and a message indicating the status. + Expects pgBackRest is correctly configured on monitored DB and "jq" tool is installed on the DB server. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + retcode, + backup_age_seconds, + message + from + get_backup_age_pgbackrest() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_backup_age_pgbackrest(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS + $$ + import time + import json + import subprocess + + PGBACKREST_TIMEOUT = 30 + + def error(message, returncode=1): + return returncode, 1000000, 'Not OK. '+message + + pgbackrest_cmd=["pgbackrest", "--output=json", "info"] + + try: + p = subprocess.Popen(pgbackrest_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8') + stdout, stderr = p.communicate(timeout=PGBACKREST_TIMEOUT) + except OSError as e: + return error('Failed to execute pgbackrest: {}'.format(e)) + except subprocess.TimeoutExpired: + p.terminate() + try: + p.wait(0.5) + except subprocess.TimeoutExpired: + p.kill() + return error('pgbackrest failed to respond in {} seconds'.format(PGBACKREST_TIMEOUT)) + + if p.returncode != 0: + return error('Failed on "pgbackrest info" call', returncode=p.returncode) + + try: + data = json.loads(stdout) + backup_age_seconds = int(time.time()) - data[0]['backup'][-1]['timestamp']['stop'] + return 0, backup_age_seconds, 'OK. Last backup age in seconds: {}'.format(backup_age_seconds) + except (json.JSONDecodeError, KeyError) : + return error('Failed to parse pgbackrest output') + $$ LANGUAGE plpython3u VOLATILE; + + ALTER FUNCTION get_backup_age_pgbackrest() SET statement_timeout TO '30s'; + + GRANT EXECUTE ON FUNCTION get_backup_age_pgbackrest() TO pgwatch; + + COMMENT ON FUNCTION get_backup_age_pgbackrest() is 'created for pgwatch'; + is_instance_level: true + backup_age_walg: + description: > + Retrieves the age of the last successful WAL-G backup in seconds. It uses the `wal-g backup-list --json` command to fetch + the backup information and calculates the age based on the current time and the timestamp of the last backup. + The metric returns a retcode of 0 on success, along with the age in seconds and a message indicating the status. + Expects .wal-g.json is correctly configured with all necessary credentials and "jq" tool is installed on the DB server. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + retcode, + backup_age_seconds, + message + from + get_backup_age_walg() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_backup_age_walg(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS + $$ + import subprocess + retcode=1 + backup_age_seconds=1000000 + message='' + + # get latest wal-g backup timestamp + walg_last_backup_cmd="""wal-g backup-list --json | jq -r '.[0].time'""" + p = subprocess.run(walg_last_backup_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + if p.returncode != 0: + # plpy.notice("p.stdout: " + str(p.stderr) + str(p.stderr)) + return p.returncode, backup_age_seconds, 'Not OK. Failed on wal-g backup-list call' + + # plpy.notice("last_tz: " + last_tz) + last_tz=p.stdout.rstrip('\n\r') + + # get seconds since last backup from WAL-G timestamp in format '2020-01-22T17:50:51Z' + try: + plan = plpy.prepare("SELECT extract(epoch from now() - $1::timestamptz)::int AS backup_age_seconds;", ["text"]) + rv = plpy.execute(plan, [last_tz]) + except Exception as e: + return retcode, backup_age_seconds, 'Not OK. Failed to convert WAL-G backup timestamp to seconds' + else: + backup_age_seconds = rv[0]["backup_age_seconds"] + return 0, backup_age_seconds, 'OK. Last backup age in seconds: %s' % backup_age_seconds + + $$ LANGUAGE plpython3u VOLATILE; + + /* contacting S3 could be laggy depending on location */ + ALTER FUNCTION get_backup_age_walg() SET statement_timeout TO '30s'; + + GRANT EXECUTE ON FUNCTION get_backup_age_walg() TO pgwatch; + + COMMENT ON FUNCTION get_backup_age_walg() is 'created for pgwatch'; + is_instance_level: true + bgwriter: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_bgwriter` view, providing insights into the background writer's performance. + It returns the number of timed and requested checkpoints, checkpoint write and sync times, buffer statistics, and the last reset time. + This metric helps administrators monitor the background writer's activity and its impact on database performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + checkpoints_timed, + checkpoints_req, + checkpoint_write_time, + checkpoint_sync_time, + buffers_checkpoint, + buffers_clean, + maxwritten_clean, + buffers_backend, + buffers_backend_fsync, + buffers_alloc + from + pg_stat_bgwriter + 17: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + buffers_clean, + maxwritten_clean, + buffers_alloc, + (extract(epoch from now() - stats_reset))::int as last_reset_s + from + pg_stat_bgwriter + node_status: primary + is_instance_level: true + buffercache_by_db: + description: > + Retrieves buffer cache statistics grouped by database, providing insights into the size of buffers used by each database. + It calculates the total size of buffers in bytes for each database. + This metric helps administrators monitor buffer usage across different databases in the PostgreSQL instance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + datname as tag_database, + count(*) * (current_setting('block_size')::int8) as size_b + FROM + pg_buffercache AS b, + pg_database AS d + WHERE + d.oid = b.reldatabase + GROUP BY + datname + gauges: + - '*' + is_instance_level: true + buffercache_by_type: + description: > + Retrieves buffer cache statistics grouped by relation type, providing insights into the size of buffers used + by different relation kinds. It calculates the total size of buffers in bytes for each relation kind + (e.g., Table, Index, Toast, Materialized view). This metric helps administrators monitor buffer usage across + different relation types in the PostgreSQL instance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + CASE + WHEN relkind = 'r' THEN 'Table' -- TODO all relkinds covered? + WHEN relkind = 'i' THEN 'Index' + WHEN relkind = 't' THEN 'Toast' + WHEN relkind = 'm' THEN 'Materialized view' + ELSE 'Other' + END as tag_relkind, + count(*) * (current_setting('block_size')::int8) size_b + FROM + pg_buffercache AS b, + pg_class AS d + WHERE + d.oid = b.relfilenode + GROUP BY + relkind + gauges: + - '*' + is_instance_level: true + change_events: + description: > + The "change_events" built-in metric tracks DDL & config changes. Internally, it uses some other * + _hashes metrics that are not meant to be used independently. Such metrics should not be removed. + sqls: + 11: "" + checkpointer: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_checkpointer` view, providing insights into the checkpointer's performance. + It returns the number of timed and requested checkpoints, restart points, write and sync times, and buffer statistics. + This metric helps administrators monitor the checkpointer's activity and its impact on database performance. + sqls: + 11: "; -- covered by bgwriter" + 17: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + num_timed, + num_requested, + restartpoints_timed, + restartpoints_req, + restartpoints_done, + write_time, + sync_time, + buffers_written, + (extract(epoch from now() - stats_reset))::int as last_reset_s + from + pg_stat_checkpointer + configuration_hashes: + description: > + Retrieves configuration settings from the PostgreSQL `pg_settings` view, providing insights into the current configuration of the database. + This metric helps administrators monitor changes applied to the database configuration. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + name as tag_setting, + coalesce(reset_val, '') as value + from + pg_settings + where + name <> 'connection_ID' + cpu_load: + description: > + Retrieves the system load average for the last 1, 5, and 15 minutes using a custom PL/Python function. + This metric provides insights into the CPU load on the PostgreSQL server, helping administrators monitor system performance. + The function uses the `os.getloadavg()` method to fetch the load averages. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + round(load_1min::numeric, 2)::float as load_1min, + round(load_5min::numeric, 2)::float as load_5min, + round(load_15min::numeric, 2)::float as load_15min + from + get_load_average(); + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + CREATE OR REPLACE FUNCTION get_load_average(OUT load_1min float, OUT load_5min float, OUT load_15min float) AS + $$ + from os import getloadavg + la = getloadavg() + return [la[0], la[1], la[2]] + $$ LANGUAGE plpython3u VOLATILE; + GRANT EXECUTE ON FUNCTION get_load_average() TO pgwatch; + COMMENT ON FUNCTION get_load_average() is 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + database_conflicts: + description: > + Retrieves conflict statistics from the PostgreSQL `pg_stat_database_conflicts` view, providing insights into conflicts that have occurred + in the current database. It returns the number of conflicts related to tablespace, lock, snapshot, buffer pin, and deadlock. + This metric helps administrators monitor and diagnose issues related to database conflicts. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + confl_tablespace, + confl_lock, + confl_snapshot, + confl_bufferpin, + confl_deadlock + FROM + pg_stat_database_conflicts + WHERE + datname = current_database() + node_status: standby + datfrozenxid: + description: > + This metric tracks transaction ID and multixact ID ages to monitor wraparound risk. It retrieves the age + of the oldest datfrozenxid and datminmxid from pg_database for the current database, helping administrators + monitor and prevent transaction ID wraparound which can cause database shutdowns. + sqls: + 9.3: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + age(datfrozenxid) as datfrozenxid_age, + mxid_age(datminmxid) as datminmxid_age + from + pg_database + where + datname = current_database() + gauges: + - datfrozenxid_age + - datminmxid_age + db_size: + description: > + Retrieves the size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. + It returns the size in bytes for both the current database and the catalog schema. + This metric helps administrators monitor database size and storage consumption. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pg_database_size(current_database()) as size_b, + (select sum(pg_total_relation_size(c.oid))::int8 + from pg_class c join pg_namespace n on n.oid = c.relnamespace + where nspname = 'pg_catalog' and relkind = 'r' + ) as catalog_size_b + gauges: + - '*' + statement_timeout_seconds: 300 + db_size_approx: + description: > + Retrieves an approximate size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. + It returns the size in bytes for both the current database and the catalog schema. + This metric helps administrators monitor database size and storage consumption. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_setting('block_size')::int8 * ( + select sum(relpages) from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where c.relpersistence != 't' + ) as size_b, + current_setting('block_size')::int8 * ( + select sum(c.relpages + coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0)) + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + left join pg_class ct on ct.oid = c.reltoastrelid + left join pg_index ti on ti.indrelid = ct.oid + left join pg_class cti on cti.oid = ti.indexrelid + where nspname = 'pg_catalog' + and (c.relkind = 'r' + or c.relkind = 'i' and not c.relname ~ '^pg_toast') + ) as catalog_size_b + gauges: + - '*' + metric_storage_name: db_size + db_stats: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. + It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, + deadlocks, block read and write times, postmaster uptime, backup duration, recovery status, system identifier, and invalid indexes. + This metric helps administrators monitor database activity and performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 12: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + session_time::int8, + active_time::int8, + idle_in_transaction_time::int8, + sessions, + sessions_abandoned, + sessions_fatal, + sessions_killed, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 15: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + session_time::int8, + active_time::int8, + idle_in_transaction_time::int8, + sessions, + sessions_abandoned, + sessions_fatal, + sessions_killed, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + gauges: + - numbackends + - postmaster_uptime_s + - backup_duration_s + - backup_duration_s + - checksum_last_failure_s + db_stats_aurora: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_database` view for Amazon Aurora PostgreSQL, providing insights into the current database's performance. + It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, + deadlocks, block read and write times, postmaster uptime, recovery status, system identifier, and invalid indexes. + This metric helps administrators monitor database activity and performance in an Aurora PostgreSQL environment. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id + from + pg_stat_database, pg_control_system() + where + datname = current_database() + gauges: + - numbackends + - postmaster_uptime_s + - backup_duration_s + - checksum_last_failure_s + metric_storage_name: db_stats + index_hashes: + description: > + Retrieves the hash of index definitions in the PostgreSQL database, providing a way to track changes in index definitions over time. + This metric helps administrators monitor index changes and ensure consistency in index definitions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(nspname)||'.'||quote_ident(c.relname) as tag_index, + quote_ident(nspname)||'.'||quote_ident(r.relname) as "table", + i.indisvalid::text as is_valid, + coalesce(md5(pg_get_indexdef(i.indexrelid)), random()::text) as md5 + from + pg_index i + join + pg_class c on c.oid = i.indexrelid + join + pg_class r on r.oid = i.indrelid + join + pg_namespace n on n.oid = c.relnamespace + where + c.relnamespace not in (select oid from pg_namespace where nspname like any(array[E'pg\\_%', 'information_schema'])) + index_stats: + description: > + Retrieves detailed statistics about indexes in the PostgreSQL database, including index size, scan counts, tuple read and fetch counts, + block read and hit counts, and index validity. It also identifies the largest, most scanned, and unused indexes. + This metric helps administrators monitor index performance and identify potential issues with unused or invalid indexes. + sqls: + 11: |- + /* does not return all index stats but biggest, top scanned and biggest unused ones */ + WITH q_locked_rels AS ( + select relation from pg_locks where mode = 'AccessExclusiveLock' + ), + q_index_details AS ( + select + sui.schemaname, + sui.indexrelname, + sui.relname, + sui.indexrelid, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + sui.idx_scan, + sui.idx_tup_read, + sui.idx_tup_fetch, + io.idx_blks_read, + io.idx_blks_hit, + i.indisvalid, + i.indisprimary, + i.indisunique, + i.indisexclusion + from + pg_stat_user_indexes sui + join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid + join pg_index i on i.indexrelid = sui.indexrelid + where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) + and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) + ), + q_top_indexes AS ( + /* biggest */ + select * + from ( + select indexrelid + from q_index_details + where idx_scan > 1 + order by index_size_b desc + limit 200 + ) x + union + /* most block traffic */ + select * + from ( + select indexrelid + from q_index_details + order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc + limit 200 + ) y + union + /* most scans */ + select * + from ( + select indexrelid + from q_index_details + order by idx_scan desc nulls last + limit 200 + ) z + union + /* biggest unused non-constraint */ + select * + from ( + select q.indexrelid + from q_index_details q + where idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + order by index_size_b desc + limit 200 + ) z + union + /* all invalid */ + select * + from ( + select q.indexrelid + from q_index_details q + where not indisvalid + ) zz + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text as tag_schema, + indexrelname::text as tag_index_name, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, + relname::text as tag_table_name, + quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_read, 0) as idx_tup_read, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + coalesce(index_size_b, 0) as index_size_b, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, + md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, + regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, + case when not indisvalid then 1 else 0 end as is_invalid_int, + case when indisprimary then 1 else 0 end as is_pk_int, + case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, + system_identifier::text as tag_sys_id + FROM + q_index_details id + JOIN + pg_control_system() ON true + WHERE + indexrelid IN (select indexrelid from q_top_indexes) + ORDER BY + id.schemaname, id.relname, id.indexrelname + 16: |- + /* NB! does not return all index stats but biggest, top scanned and biggest unused ones */ + WITH q_locked_rels AS ( /* pgwatch_generated */ + select relation from pg_locks where mode = 'AccessExclusiveLock' + ), + q_index_details AS ( + select + sui.schemaname, + sui.indexrelname, + sui.relname, + sui.indexrelid, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + sui.idx_scan, + sui.idx_tup_read, + sui.idx_tup_fetch, + io.idx_blks_read, + io.idx_blks_hit, + i.indisvalid, + i.indisprimary, + i.indisunique, + i.indisexclusion, + extract(epoch from now() - last_idx_scan)::int as last_idx_scan_s + from + pg_stat_user_indexes sui + join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid + join pg_index i on i.indexrelid = sui.indexrelid + where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) + and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) + ), + q_top_indexes AS ( + /* biggest */ + select * + from ( + select indexrelid + from q_index_details + where idx_scan > 1 + order by index_size_b desc + limit 200 + ) x + union + /* most block traffic */ + select * + from ( + select indexrelid + from q_index_details + order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc + limit 200 + ) y + union + /* most scans */ + select * + from ( + select indexrelid + from q_index_details + order by idx_scan desc nulls last + limit 200 + ) z + union + /* biggest unused non-constraint */ + select * + from ( + select q.indexrelid + from q_index_details q + where idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + order by index_size_b desc + limit 200 + ) z + union + /* all invalid */ + select * + from ( + select q.indexrelid + from q_index_details q + where not indisvalid + ) zz + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text as tag_schema, + indexrelname::text as tag_index_name, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, + relname::text as tag_table_name, + quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_read, 0) as idx_tup_read, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + coalesce(index_size_b, 0) as index_size_b, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, + md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, + regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, + case when not indisvalid then 1 else 0 end as is_invalid_int, + case when indisprimary then 1 else 0 end as is_pk_int, + case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, + system_identifier::text as tag_sys_id, + last_idx_scan_s + FROM + q_index_details id + JOIN + pg_control_system() ON true + WHERE + indexrelid IN (select indexrelid from q_top_indexes) + ORDER BY + id.schemaname, id.relname, id.indexrelname + instance_up: + description: > + This metric has some special handling attached to it - it will store a 0 value if the database is not accessible. + Thus it can be used to for example calculate some percentual "uptime" indicator. + For standard metrics there will be no data rows stored when the DB is not reachable, but for this one, + there will be a zero stored for the "is_up" column that, under normal operations, would always be 1. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 1::int as is_up + invalid_indexes: + description: > + Retrieves a list of invalid indexes in the PostgreSQL database, providing insights into indexes that are not valid. + It returns the index name, schema, and whether the index is valid or not. This metric helps administrators identify and address issues with invalid indexes. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + format('%I.%I', n.nspname , ci.relname) as tag_index_full_name, + coalesce(pg_relation_size(indexrelid), 0) as index_size_b + from + pg_index i + join pg_class ci on ci.oid = i.indexrelid + join pg_class cr on cr.oid = i.indrelid + join pg_namespace n on n.oid = ci.relnamespace + where not n.nspname like E'pg\\_temp%' + and not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + ) + and not exists (select * from pg_locks where relation = indexrelid and mode = 'AccessExclusiveLock') /* can't get size then */ + order by index_size_b desc + limit 100 + kpi: + description: > + Retrieves key performance indicators (KPIs) from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. + It returns the number of backends, active and blocked backends, oldest transaction age, transactions per second (TPS), commit and rollback counts, + buffer read and hit counts, temporary bytes, sequence scans on tables larger than 10MB, tuple statistics, stored procedure calls, + block read and write times, deadlocks, recovery status, and postmaster uptime. + This metric helps administrators monitor database activity and performance. + sqls: + 11: | + WITH q_stat_tables AS ( + SELECT * FROM pg_stat_user_tables t + JOIN pg_class c ON c.oid = t.relid + WHERE NOT schemaname LIKE E'pg\\_temp%' + AND c.relpages > (1e7 / 8) -- >10MB + ), + q_stat_activity AS ( + SELECT * FROM pg_stat_activity + WHERE datname = current_database() AND pid != pg_backend_pid() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case + when pg_is_in_recovery() = false then + pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 + else + pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 + end as wal_location_b, + numbackends - 1 as numbackends, + (select count(*) from q_stat_activity where state in ('active', 'idle in transaction')) AS active_backends, + (select count(*) from q_stat_activity where wait_event_type in ('LWLock', 'Lock', 'BufferPin')) AS blocked_backends, + (select round(extract(epoch from now()) - extract(epoch from (select xact_start from q_stat_activity + where datid = d.datid and not query like 'autovacuum:%' order by xact_start limit 1))))::int AS kpi_oldest_tx_s, + xact_commit + xact_rollback AS tps, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + temp_bytes, + (select sum(seq_scan) from q_stat_tables)::int8 AS seq_scans_on_tbls_gt_10mb, + tup_inserted, + tup_updated, + tup_deleted, + (select sum(calls) from pg_stat_user_functions where not schemaname like any(array[E'pg\\_%', 'information_schema']))::int8 AS sproc_calls, + blk_read_time, + blk_write_time, + deadlocks, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s + FROM + pg_stat_database d + WHERE + datname = current_database() + gauges: + - numbackends + - active_backends + - blocked_backends + - kpi_oldest_tx_s + locks: + description: > + Retrieves lock statistics from the PostgreSQL `pg_locks` view, providing insights into the types and modes of locks currently held in the database. + It returns the lock type, lock mode, and the count of locks for each type and mode. This metric helps administrators monitor lock contention and performance. + sqls: + 11: |- + WITH q_locks AS ( + select + * + from + pg_locks + where + pid != pg_backend_pid() + and database = (select oid from pg_database where datname = current_database()) + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + locktypes AS tag_locktype, + coalesce((select count(*) FROM q_locks WHERE locktype = locktypes), 0) AS count + FROM + unnest('{relation, extend, page, tuple, transactionid, virtualxid, object, userlock, advisory}'::text[]) locktypes + gauges: + - '*' + locks_mode: + description: > + Retrieves lock mode statistics from the PostgreSQL `pg_locks` view, providing insights into the different lock modes currently held in the database. + It returns the lock mode and the count of locks for each mode. This metric helps administrators monitor lock contention and performance. + sqls: + 11: |- + WITH q_locks AS ( + select + * + from + pg_locks + where + pid != pg_backend_pid() + and database = (select oid from pg_database where datname = current_database()) + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + lockmodes AS tag_lockmode, + coalesce((select count(*) FROM q_locks WHERE mode = lockmodes), 0) AS count + FROM + unnest('{AccessShareLock, ExclusiveLock, RowShareLock, RowExclusiveLock, ShareLock, ShareRowExclusiveLock, AccessExclusiveLock, ShareUpdateExclusiveLock}'::text[]) lockmodes + gauges: + - '*' + logical_subscriptions: + description: > + Retrieves information about logical subscriptions in the PostgreSQL database, including their names, enabled status, and the number of relations in each subscription. + It also provides counts of relations in different states (inserted, deleted, synchronized, and replicated). + This metric helps administrators monitor logical replication subscriptions and their statuses. + sqls: + 11: | + with q_sr as ( + select * from pg_subscription_rel + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + subname::text as tag_subname, + subenabled, + (select count(*) from q_sr where srsubid = oid) as relcount, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'i') as state_i, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'd') as state_d, + (select count(*) from q_sr where srsubid = oid and srsubstate = 's') as state_s, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'r') as state_r + from + pg_subscription + where + subdbid = (select oid from pg_database where datname = current_database()) + gauges: + - '*' + pgbouncer_stats: + description: > + Retrieves statistics from the PgBouncer connection pooler. + This metric helps administrators monitor PgBouncer performance and connection pooling efficiency. + sqls: + 0: show stats + pgbouncer_clients: + description: > + Retrieves client connection statistics from the PgBouncer connection pooler, providing insights into the current state of client connections. + It returns the number of active, idle, and total client connections, as well as transaction counts and memory usage statistics. + This metric helps administrators monitor PgBouncer client connections and performance. + sqls: + 0: show clients + pgpool_processes: + description: > + Retrieves process statistics from the PgPool connection pooler, providing insights into the current state of PgPool processes. + It returns the number of active, idle, and total processes, as well as memory usage statistics. + This metric helps administrators monitor PgPool process performance and resource utilization. + sqls: + 3: show pool_processes + pgpool_stats: + description: > + Retrieves statistics from the PgPool connection pooler, providing insights into the current state of PgPool connections and transactions. + It returns the number of active, idle, and total connections, as well as transaction counts and memory usage statistics. + This metric helps administrators monitor PgPool performance and connection pooling efficiency. + sqls: + 3: show pool_nodes + postgres_role: + description: > + This metric determines the PostgreSQL server role (primary, standby, or standalone) by checking + if the server is in recovery mode and if it has any active replication connections. It returns + an integer value: 0 = standalone, 1 = primary with replicas, 2 = standby/replica. + sqls: + 9.0: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case pg_is_in_recovery() + when 't' then 2 + else (select case (select count(*) from pg_stat_replication where application_name != 'pg_basebackup') when '0' then 0 else 1 end) + end as in_recovery_int + gauges: + - in_recovery_int + is_instance_level: true + privilege_changes: + description: > + Retrieves information about privileges granted to roles on various database objects, including tables, functions, schemas, and databases. + It returns the object type, role name, object name, and privilege type for each privilege granted. + This metric helps administrators monitor and manage database access control and privileges. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + * + FROM ( + SELECT + 'table'::text AS object_type, + grantee::text AS tag_role, + quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, + privilege_type + FROM + information_schema.table_privileges + /* includes also VIEW-s actually */ + WHERE + NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + AND NOT table_schema IN ('information_schema', 'pg_catalog') + /* + union all + + select + -- quite a heavy query currently, maybe faster directly via pg_attribute + has_column_privilege? + 'column' AS object_type, + grantee::text AS tag_role, + quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, + privilege_type + FROM + information_schema.column_privileges cp + WHERE + NOT table_schema IN ('pg_catalog', 'information_schema') + AND NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + AND NOT EXISTS ( + SELECT + * + FROM + information_schema.table_privileges + WHERE + table_schema = cp.table_schema + AND table_name = cp.table_name + AND grantee = cp.grantee + AND privilege_type = cp.privilege_type) */ + UNION ALL + SELECT + 'function' AS object_type, + grantee::text AS tag_role, + quote_ident(routine_schema) || '.' || quote_ident(routine_name) AS tag_object, + privilege_type + FROM + information_schema.routine_privileges + WHERE + NOT routine_schema IN ('information_schema', 'pg_catalog') + AND NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + UNION ALL + SELECT + 'schema' AS object_type, + r.rolname::text AS tag_role, + quote_ident(n.nspname) AS tag_object, + p.perm AS privilege_type + FROM + pg_catalog.pg_namespace AS n + CROSS JOIN pg_catalog.pg_roles AS r + CROSS JOIN ( + VALUES ('USAGE'), + ('CREATE')) AS p (perm) + WHERE + NOT n.nspname IN ('information_schema', 'pg_catalog') + AND n.nspname NOT LIKE 'pg_%' + AND NOT r.rolsuper + AND r.oid >= 16384 + AND has_schema_privilege(r.oid, n.oid, p.perm) + UNION ALL + SELECT + 'database' AS object_type, + r.rolname::text AS role_name, + quote_ident(datname) AS tag_object, + p.perm AS permission + FROM + pg_catalog.pg_database AS d + CROSS JOIN pg_catalog.pg_roles AS r + CROSS JOIN ( + VALUES ('CREATE'), + ('CONNECT'), + ('TEMPORARY')) AS p (perm) + WHERE + d.datname = current_database() + AND NOT r.rolsuper + AND r.oid >= 16384 + AND has_database_privilege(r.oid, d.oid, p.perm) + UNION ALL + SELECT + 'superusers' AS object_type, + rolname::text AS role_name, + rolname::text AS tag_object, + 'SUPERUSER' AS permission + FROM + pg_catalog.pg_roles + WHERE + rolsuper + UNION ALL + SELECT + 'login_users' AS object_type, + rolname::text AS role_name, + rolname::text AS tag_object, + 'LOGIN' AS permission + FROM + pg_catalog.pg_roles + WHERE + rolcanlogin) y + psutil_cpu: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides CPU utilization and load averages using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + round(cpu_utilization::numeric, 2)::float as cpu_utilization, + round(load_1m_norm::numeric, 2)::float as load_1m_norm, + round(load_1m::numeric, 2)::float as load_1m, + round(load_5m_norm::numeric, 2)::float as load_5m_norm, + round(load_5m::numeric, 2)::float as load_5m, + round("user"::numeric, 2)::float as "user", + round(system::numeric, 2)::float as system, + round(idle::numeric, 2)::float as idle, + round(iowait::numeric, 2)::float as iowait, + round(irqs::numeric, 2)::float as irqs, + round(other::numeric, 2)::float as other + from + get_psutil_cpu() + init_sql: | + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_cpu( + OUT cpu_utilization float8, OUT load_1m_norm float8, OUT load_1m float8, OUT load_5m_norm float8, OUT load_5m float8, + OUT "user" float8, OUT system float8, OUT idle float8, OUT iowait float8, OUT irqs float8, OUT other float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + + from os import getloadavg + from psutil import cpu_times_percent, cpu_percent, cpu_count + from threading import Thread + + class GetCpuPercentThread(Thread): + def __init__(self, interval_seconds): + self.interval_seconds = interval_seconds + self.cpu_utilization_info = None + super(GetCpuPercentThread, self).__init__() + + def run(self): + self.cpu_utilization_info = cpu_percent(self.interval_seconds) + + t = GetCpuPercentThread(0.5) + t.start() + + ct = cpu_times_percent(0.5) + la = getloadavg() + + t.join() + + return t.cpu_utilization_info, la[0] / cpu_count(), la[0], la[1] / cpu_count(), la[1], ct.user, ct.system, ct.idle, ct.iowait, ct.irq + ct.softirq, ct.steal + ct.guest + ct.guest_nice + + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_cpu() TO pgwatch; + COMMENT ON FUNCTION get_psutil_cpu() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + psutil_disk: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides disk usage statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + dir_or_tablespace as tag_dir_or_tablespace, + path as tag_path, + total, used, free, percent + from + get_psutil_disk() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_disk( + OUT dir_or_tablespace text, OUT path text, OUT total float8, OUT used float8, OUT free float8, OUT percent float8 + ) + RETURNS SETOF record + LANGUAGE plpython3u + SECURITY DEFINER + AS $FUNCTION$ + + from os import stat + from os.path import join, exists + from psutil import disk_usage + ret_list = [] + + # data_directory + r = plpy.execute("select current_setting('data_directory') as dd, current_setting('log_directory') as ld, current_setting('server_version_num')::int as pgver") + dd = r[0]['dd'] + ld = r[0]['ld'] + du_dd = disk_usage(dd) + ret_list.append(['data_directory', dd, du_dd.total, du_dd.used, du_dd.free, du_dd.percent]) + + dd_stat = stat(dd) + # log_directory + if ld: + if not ld.startswith('/'): + ld_path = join(dd, ld) + else: + ld_path = ld + if exists(ld_path): + log_stat = stat(ld_path) + if log_stat.st_dev == dd_stat.st_dev: + pass # no new info, same device + else: + du = disk_usage(ld_path) + ret_list.append(['log_directory', ld_path, du.total, du.used, du.free, du.percent]) + + # WAL / XLOG directory + # plpy.notice('pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog', r[0]['pgver']) + joined_path_wal = join(r[0]['dd'], 'pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog') + wal_stat = stat(joined_path_wal) + if wal_stat.st_dev == dd_stat.st_dev: + pass # no new info, same device + else: + du = disk_usage(joined_path_wal) + ret_list.append(['pg_wal', joined_path_wal, du.total, du.used, du.free, du.percent]) + + # add user created tablespaces if any + sql_tablespaces = """ + select spcname as name, pg_catalog.pg_tablespace_location(oid) as location + from pg_catalog.pg_tablespace where not spcname like any(array[E'pg\\_%'])""" + for row in plpy.cursor(sql_tablespaces): + du = disk_usage(row['location']) + ret_list.append([row['name'], row['location'], du.total, du.used, du.free, du.percent]) + return ret_list + + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_disk() TO pgwatch; + COMMENT ON FUNCTION get_psutil_disk() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + psutil_disk_io_total: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides total disk I/O statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + read_count, + write_count, + read_bytes, + write_bytes + from + get_psutil_disk_io_total() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_disk_io_total( + OUT read_count float8, OUT write_count float8, OUT read_bytes float8, OUT write_bytes float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from psutil import disk_io_counters + dc = disk_io_counters(perdisk=False) + if dc: + return dc.read_count, dc.write_count, dc.read_bytes, dc.write_bytes + else: + return None, None, None, None + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_disk_io_total() TO pgwatch; + COMMENT ON FUNCTION get_psutil_disk_io_total() IS 'created for pgwatch'; + is_instance_level: true + psutil_mem: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides memory usage statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + total, used, free, buff_cache, available, percent, + swap_total, swap_used, swap_free, swap_percent + from + get_psutil_mem() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; /* "plpython3u" might need changing to "plpythonu" (Python 2 everywhere for new OS-es */ + + CREATE OR REPLACE FUNCTION get_psutil_mem( + OUT total float8, OUT used float8, OUT free float8, OUT buff_cache float8, OUT available float8, OUT percent float8, + OUT swap_total float8, OUT swap_used float8, OUT swap_free float8, OUT swap_percent float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from psutil import virtual_memory, swap_memory + vm = virtual_memory() + sw = swap_memory() + return vm.total, vm.used, vm.free, vm.buffers + vm.cached, vm.available, vm.percent, sw.total, sw.used, sw.free, sw.percent + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_mem() TO pgwatch; + COMMENT ON FUNCTION get_psutil_mem() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + reco_add_index: + description: > + Retrieves recommendations for creating indexes based on the `pg_qualstats_index_advisor()` function. + It provides insights into potential index creation opportunities to improve query performance. + This metric helps administrators optimize database performance by suggesting index creation. + sqls: + 11: |- + select /* pgwatch_generated */ + epoch_ns, + tag_reco_topic, + tag_object_name, + recommendation, + case when exists (select * from pg_inherits + where inhrelid = regclass(tag_object_name) + ) then 'Partitioned table, create the index on parent' else extra_info + end as extra_info + FROM ( + SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'create_index'::text as tag_reco_topic, + (regexp_matches(v::text, E'ON (.*?) '))[1] as tag_object_name, + v::text as recommendation, + '' as extra_info + FROM json_array_elements( + pg_qualstats_index_advisor() -> 'indexes') v + ) x + ORDER BY tag_object_name + node_status: primary + is_private: true + reco_default_public_schema: + description: > + Retrieves recommendations for revoking the CREATE privilege on the public schema from PUBLIC. + This metric helps enhance security by ensuring that only authorized users can create new objects in the public schema. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'default_public_schema_privs'::text as tag_reco_topic, + nspname::text as tag_object_name, + 'REVOKE CREATE ON SCHEMA public FROM PUBLIC;'::text as recommendation, + 'only authorized users should be allowed to create new objects'::text as extra_info + from + pg_namespace + where + nspname = 'public' + and nspacl::text ~ E'[,\\{]+=U?C/' + node_status: primary + reco_disabled_triggers: + description: > + Retrieves recommendations for reviewing and potentially dropping disabled triggers in the PostgreSQL database. + It provides insights into triggers that are currently disabled, helping administrators identify and manage unused or unnecessary triggers. + This metric helps maintain database performance and reduce clutter by suggesting the removal of unused triggers. + sqls: + 11: | + /* "temporarily" disabled triggers might be forgotten about... */ + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'disabled_triggers'::text as tag_reco_topic, + quote_ident(nspname)||'.'||quote_ident(relname) as tag_object_name, + 'review usage of trigger and consider dropping it if not needed anymore'::text as recommendation, + ''::text as extra_info + from + pg_trigger t + join + pg_class c on c.oid = t.tgrelid + join + pg_namespace n on n.oid = c.relnamespace + where + tgenabled = 'D' + node_status: primary + reco_drop_index: + description: > + Retrieves recommendations for dropping unused or invalid indexes in the PostgreSQL database. + It provides insights into indexes that have not been scanned and are consuming a significant portion of the database size. + This metric helps administrators optimize database performance by suggesting the removal of unnecessary indexes. + sqls: + 11: | + /* assumes the pg_qualstats extension */ + with q_database_size as ( + select pg_database_size(current_database()) as database_size_b + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'drop_index'::text as tag_reco_topic, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_object_name, + ('DROP INDEX ' || quote_ident(schemaname)||'.'||quote_ident(indexrelname) || ';')::text as recommendation, + 'Make sure to also check replica pg_stat_user_indexes.idx_scan count if using them for queries'::text as extra_info + from + pg_stat_user_indexes + join + pg_index using (indexrelid) + join + q_database_size on true + where + idx_scan = 0 + and ((pg_relation_size(indexrelid)::numeric / database_size_b) > 0.005 /* 0.5% DB size threshold */ + or indisvalid) + and not indisprimary + and not indisreplident + and not schemaname like '_timescaledb%' + node_status: primary + reco_nested_views: + description: > + Retrieves recommendations for overly nested views in the PostgreSQL database. + It identifies views that depend on other views and have a nesting depth greater than 3. + This metric helps administrators optimize query performance by suggesting the reduction of view nesting. + sqls: + 11: |- + WITH RECURSIVE views AS ( + -- get the directly depending views + SELECT v.oid::regclass AS view, + format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, + 1 AS level + FROM pg_depend AS d + JOIN pg_rewrite AS r + ON r.oid = d.objid + JOIN pg_class AS v + ON v.oid = r.ev_class + JOIN pg_namespace AS n + ON n.oid = v.relnamespace + WHERE v.relkind = 'v' + AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) + AND NOT v.relname LIKE E'pg\\_%' + AND d.classid = 'pg_rewrite'::regclass + AND d.refclassid = 'pg_class'::regclass + AND d.deptype = 'n' + UNION ALL + -- add the views that depend on these + SELECT v.oid::regclass, + format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, + views.level + 1 + FROM views + JOIN pg_depend AS d + ON d.refobjid = views.view + JOIN pg_rewrite AS r + ON r.oid = d.objid + JOIN pg_class AS v + ON v.oid = r.ev_class + JOIN pg_namespace AS n + ON n.oid = v.relnamespace + WHERE v.relkind = 'v' + AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) + AND d.classid = 'pg_rewrite'::regclass + AND d.refclassid = 'pg_class'::regclass + AND d.deptype = 'n' + AND v.oid <> views.view -- avoid loop + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'overly_nested_views'::text AS tag_reco_topic, + full_name::text as tag_object_name, + 'overly nested views can affect performance'::text recommendation, + 'nesting_depth: ' || coalesce(max(level)::text, '-') AS extra_info + FROM views + GROUP BY 1, 2, 3 + HAVING max(level) > 3 + ORDER BY max(level) DESC, full_name::text + node_status: primary + reco_partial_index_candidates: + description: > + Retrieves recommendations for creating partial indexes on columns with a high fraction of NULL values. + It identifies single-column indexes that could potentially be declared as partial indexes, leaving out NULL values. + This metric helps optimize index usage and improve query performance by suggesting the creation of partial indexes. + sqls: + 11: | + select distinct /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'partial_index_candidates'::text as tag_reco_topic, + quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) as tag_object_name, + ('index ' || quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) || ' on ' || quote_ident(s.schemaname) || '.' || quote_ident(s.tablename) || ' column ' || quote_ident(s.attname) || ' could possibly be declared partial leaving out NULL-s')::text as recommendation, + 'NULL fraction: ' || round((null_frac * 100)::numeric, 1) || '%, rowcount estimate: ' || (c.reltuples)::int8 || ', current definition: ' || pg_get_indexdef(i.indexrelid) as extra_info + from + pg_stats s + join pg_attribute a using (attname) + join pg_index i on i.indkey[0] = a.attnum and i.indrelid = a.attrelid + join pg_class c on c.oid = i.indrelid + join pg_class ci on ci.oid = i.indexrelid + join pg_namespace ni on ni.oid = ci.relnamespace + where + not indisprimary + and not indisunique + and indisready + and indisvalid + and i.indnatts = 1 /* simple 1 column indexes */ + and null_frac > 0.5 /* 50% empty */ + and not pg_get_indexdef(i.indexrelid) like '% WHERE %' + and c.reltuples >= 1e5 /* ignore smaller tables */ + and not exists ( /* leave out sub-partitions */ + select * from pg_inherits where inhrelid = c.oid + ) + reco_sprocs_wo_search_path: + description: > + Retrieves recommendations for stored procedures that do not have a fixed `search_path` set. + It identifies stored procedures that could potentially be abused by malicious users if used objects are not fully qualified. + This metric helps enhance security by suggesting the setting of a fixed search_path for stored procedures. + sqls: + 11: |- + with q_sprocs as ( + select /* pgwatch_generated */ + format('%s.%s', quote_ident(nspname), quote_ident(proname)) as sproc_name, + 'alter function ' || proname || '(' || pg_get_function_arguments(p.oid) || ') set search_path = X;' as fix_sql + from + pg_proc p + join pg_namespace n on n.oid = p.pronamespace + where prosecdef and not 'search_path' = ANY(coalesce(proconfig, '{}'::text[])) + and not pg_catalog.obj_description(p.oid, 'pg_proc') ~ 'pgwatch' + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'sprocs_wo_search_path'::text as tag_reco_topic, + sproc_name::text as tag_object_name, + fix_sql::text as recommendation, + 'functions without fixed search_path can be potentially abused by malicious users if used objects are not fully qualified'::text as extra_info + from + q_sprocs + order by + tag_object_name, extra_info + node_status: primary + reco_superusers: + description: > + Retrieves recommendations for reviewing the number of superusers in the PostgreSQL database. + It identifies if there are too many superusers, which can pose a security risk. + This metric helps maintain database security by suggesting a review of superuser accounts. + sqls: + 11: | + with q_su as ( + select count(*) from pg_roles where rolcanlogin and rolsuper + ), + q_total as ( + select count(*) from pg_roles where rolcanlogin + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'superuser_count'::text as tag_reco_topic, + '-'::text as tag_object_name, + 'too many superusers detected - review recommended'::text as recommendation, + format('%s active superusers, %s total active users', q_su.count, q_total.count) as extra_info + from + q_su, q_total + where + q_su.count >= 10 + node_status: primary + recommendations: + description: > + When enabled, this metric will find all other metrics starting with `reco_*` and execute those queries. + The metric targets performance, security, and other "best practices" violations. + Users can add new `reco_*` queries freely. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_qualstats; + sqls: + 11: /* dummy placeholder - special handling in code to collect other metrics named reco_* */ + replication: + description: > + This metric collects replication statistics from the `pg_stat_replication` view. + It provides insights into the status of replication connections, including lag times and states. + This metric is useful for monitoring replication health and performance. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + application_name as tag_application_name, + usename AS tag_usename, + concat(coalesce(client_addr::text, client_hostname), '_', client_port::text) as tag_client_info, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, sent_lsn)::int8, 0) as sent_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, write_lsn)::int8, 0) as write_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, flush_lsn)::int8, 0) as flush_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, replay_lsn)::int8, 0) as replay_lag_b, + (extract(epoch from write_lag) * 1000)::int8 as write_lag_ms, + (extract(epoch from flush_lag) * 1000)::int8 as flush_lag_ms, + (extract(epoch from replay_lag) * 1000)::int8 as replay_lag_ms, + state, + sync_state, + case when sync_state in ('sync', 'quorum') then 1 else 0 end as is_sync_int, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int + from + pg_stat_replication + where + coalesce(application_name, '') not in ('pg_basebackup', 'pg_rewind'); + gauges: + - '*' + is_instance_level: true + replication_slot_stats: + description: > + This metric collects statistics from the `pg_stat_replication_slots` view. + It provides insights into the status of replication slots, including transaction counts and byte usage. + This metric is useful for monitoring replication slot health and performance. + sqls: + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + slot_name::text as tag_slot_name, + spill_txns, + spill_count, + spill_bytes, + stream_txns, + stream_count, + stream_bytes, + total_txns, + total_bytes + from + pg_stat_replication_slots + replication_slots: + description: > + This metric collects information about replication slots from the `pg_replication_slots` view. + It provides insights into the status of replication slots, including their activity and lag times. + This metric is useful for monitoring replication slot health and performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + slot_name::text as tag_slot_name, + coalesce(plugin, 'physical')::text as tag_plugin, + active, + case when active then 0 else 1 end as non_active_int, + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::int8 as restart_lsn_lag_b, + greatest(age(xmin), age(catalog_xmin))::int8 as xmin_age_tx + from + pg_replication_slots + node_status: primary + gauges: + - '*' + is_instance_level: true + sequence_health: + description: > + This metric collects health statistics for sequences in the PostgreSQL database. + It provides insights into the usage and status of sequences, including maximum usage percentages and counts of sequences that are heavily used. + This metric is useful for monitoring sequence health and performance. + sqls: + 11: |- + with q_seq_data as ( + select * from pg_sequences + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select round(100.0 * coalesce(max(last_value::numeric / max_value), 0), 2)::float from q_seq_data where not cycle) as max_used_pct, + (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.5) as p50_used_seq_count, + (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.75) as p75_used_seq_count + server_log_event_counts: + description: > + This metric enables the Postgres server log "tailing" for errors. It can't be used for remote setups, though, + unless the DB logs are somehow mounted or copied over, as real file access is needed! + sqls: + 11: |- + /* + Dummy placeholder - special handling in gatherer code for log parsing + */ + settings: + description: > + This metric collects various PostgreSQL server settings and configurations. + It provides insights into the server's configuration, including version, memory settings, and other important parameters. + This metric is useful for monitoring server settings and ensuring optimal performance. + sqls: + 11: | + with qs as ( + select name, setting from pg_settings + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_setting('server_version') as server_version, + current_setting('server_version_num')::int8 as server_version_num, + (regexp_matches(regexp_replace(current_setting('server_version'), '(beta|devel).*', '', 'g'), E'\\d+'))[1]::float8 as major_version, + current_setting('block_size')::int as block_size, + current_setting('max_connections')::int as max_connections, + current_setting('hot_standby') as hot_standby, + (select setting from qs where name = 'hot_standby_feedback') as hot_standby_feedback, + current_setting('fsync') as fsync, + current_setting('full_page_writes') as full_page_writes, + current_setting('synchronous_commit') as synchronous_commit, + (select setting from qs where name = 'wal_compression') as wal_compression, + (select setting from qs where name = 'wal_log_hints') as wal_log_hints, + (select setting from qs where name = 'synchronous_standby_names') as synchronous_standby_names, + current_setting('shared_buffers') as shared_buffers, + current_setting('work_mem') as work_mem, + current_setting('maintenance_work_mem') as maintenance_work_mem, + current_setting('effective_cache_size') as effective_cache_size, + (select setting::int8 from qs where name = 'default_statistics_target') as default_statistics_target, + (select setting::float8 from qs where name = 'random_page_cost') as random_page_cost, + pg_size_pretty(((select setting::int8 from qs where name = 'min_wal_size') * 1024^2)::int8) as min_wal_size, + pg_size_pretty(((select setting::int8 from qs where name = 'max_wal_size') * 1024^2)::int8) as max_wal_size, + (select setting from qs where name = 'checkpoint_segments') as checkpoint_segments, + current_setting('checkpoint_timeout') as checkpoint_timeout, + current_setting('checkpoint_completion_target') as checkpoint_completion_target, + (select setting::int8 from qs where name = 'max_worker_processes') as max_worker_processes, + (select setting::int8 from qs where name = 'max_parallel_workers') as max_parallel_workers, + (select setting::int8 from qs where name = 'max_parallel_workers_per_gather') as max_parallel_workers_per_gather, + (select case when setting = 'on' then 1 else 0 end from qs where name = 'jit') as jit, + (select case when setting = 'on' then 1 else 0 end from qs where name = 'ssl') as ssl, + current_setting('statement_timeout') as statement_timeout, + current_setting('deadlock_timeout') as deadlock_timeout, + (select setting from qs where name = 'data_checksums') as data_checksums, + (select setting::int8 from qs where name = 'max_connections') as max_connections, + (select setting::int8 from qs where name = 'max_wal_senders') as max_wal_senders, + (select setting::int8 from qs where name = 'max_replication_slots') as max_replication_slots, + (select setting::int8 from qs where name = 'max_prepared_transactions') as max_prepared_transactions, + (select setting::int8 from qs where name = 'lock_timeout') || ' (ms)' as lock_timeout, + (select setting from qs where name = 'archive_mode') as archive_mode, + (select setting from qs where name = 'archive_command') as archive_command, + current_setting('archive_timeout') as archive_timeout, + (select setting from qs where name = 'shared_preload_libraries') as shared_preload_libraries, + (select setting from qs where name = 'listen_addresses') as listen_addresses, + (select setting from qs where name = 'ssl') as ssl, + (select setting from qs where name = 'autovacuum') as autovacuum, + (select setting::int8 from qs where name = 'autovacuum_max_workers') as autovacuum_max_workers, + (select setting::float8 from qs where name = 'autovacuum_vacuum_scale_factor') as autovacuum_vacuum_scale_factor, + (select setting::float8 from qs where name = 'autovacuum_vacuum_threshold') as autovacuum_vacuum_threshold, + (select setting::float8 from qs where name = 'autovacuum_analyze_scale_factor') as autovacuum_analyze_scale_factor, + (select setting::float8 from qs where name = 'autovacuum_analyze_threshold') as autovacuum_analyze_scale_factor + show_plans_realtime: + description: > + This metric collects real-time query plans from the `pg_show_plans` extension. + It provides insights into the execution plans of currently running queries, helping to identify performance issues and optimize query execution. + This metric is useful for monitoring query performance and understanding how queries are executed in real-time. + sqls: + 11: | + /* assumes pg_show_plans extension */ + select /* pgwatch_generated */ + max((extract(epoch from now()) * 1e9)::int8) as epoch_ns, + max(extract(epoch from now() - query_start))::int as max_s, + avg(extract(epoch from now() - query_start))::int as avg_s, + count(*), + array_to_string(array_agg(distinct usename order by usename), ',') as "users", + max(md5(plan)) as tag_hash, /* needed for influx */ + plan, + max(query) as query + from + pg_show_plans p + join + pg_stat_activity a + using (pid) + where + p.pid != pg_backend_pid() + and datname = current_database() + and now() - query_start > '1s'::interval + and backend_type = 'client backend' + group by + plan + order by + max_s desc + limit + 10 + smart_health_per_disk: + description: > + This metric collects SMART health status for all disk devices using the `smartmontools` utility. + It provides insights into the health of disk devices, including their SMART status and return codes. + This metric is useful for monitoring disk health and identifying potential issues with disk devices. + This helper is always meant to be tested and adjusted to make sure all disk are detected. + Most likely smartctl privileges must be escalated to give postgres access: `sudo chmod u+s /usr/local/sbin/smartctl` + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + device as tag_device, + retcode + from + get_smart_health_per_device() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_smart_health_per_device(OUT device text, OUT retcode int) RETURNS SETOF record AS + $$ + import subprocess + ret_list = [] + + #disk_detect_cmd='smartctl --scan | cut -d " " -f3 | grep mega' # for Lenovo ServerRAID M1210 + disk_detect_cmd='lsblk -io KNAME,TYPE | grep '' disk'' | cut -d " " -f1 | sort' + p = subprocess.run(disk_detect_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + if p.returncode != 0: + return ret_list + disks = p.stdout.splitlines() + + for disk in disks: + # health_cmd = 'smartctl -d $disk -a -q silent /dev/sda' % disk # for Lenovo ServerRAID M1210 members + health_cmd = 'smartctl -a -q silent /dev/%s' % disk + p = subprocess.run(health_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + ret_list.append((disk, p.returncode)) + + return ret_list + + $$ LANGUAGE plpython3u VOLATILE; + + GRANT EXECUTE ON FUNCTION get_smart_health_per_device() TO pgwatch; + + COMMENT ON FUNCTION get_smart_health_per_device() is 'created for pgwatch'; + sproc_hashes: + description: > + This metric collects hashes of all stored procedures in the database. + It provides a way to track changes in stored procedures over time by comparing their hashes. + This metric is useful for monitoring stored procedure integrity and detecting changes. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + p.oid::text as tag_oid, + quote_ident(nspname)||'.'||quote_ident(proname) as tag_sproc, + md5(prosrc) + from + pg_proc p + join + pg_namespace n on n.oid = pronamespace + where + not nspname like any(array[E'pg\\_%', 'information_schema']) + sproc_stats: + description: > + This metric collects statistics about user-defined functions (stored procedures) in the database. + It provides insights into function usage, including call counts and execution times. + This metric is useful for monitoring function performance and identifying potential bottlenecks. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text AS tag_schema, + funcname::text AS tag_function_name, + quote_ident(schemaname)||'.'||quote_ident(funcname) as tag_function_full_name, + p.oid::text as tag_oid, -- for overloaded funcs + calls as sp_calls, + self_time, + total_time + FROM + pg_stat_user_functions f + JOIN + pg_proc p ON p.oid = f.funcid + ORDER BY + total_time DESC + LIMIT + 300 + stat_activity: + description: > + This metric collects statistics about currently active queries in the database. + It provides insights into the state of active queries, including their duration and blocking status. + This metric is useful for monitoring query performance and identifying long-running or blocked queries. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + s.query as query, + count(*) as count + from pg_stat_activity s + where s.datname = current_database() + and s.state = 'active' + and s.backend_type = 'client backend' + and s.pid != pg_backend_pid() + and now() - s.query_start > '100ms'::interval + group by s.query + stat_activity_realtime: + description: > + This metric collects real-time statistics about currently active queries in the database. + It provides insights into the state of active queries, including their duration and blocking status. + This metric is useful for monitoring query performance and identifying long-running or blocked queries in real-time. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pid as tag_pid, + usename::text AS user, + application_name AS appname, + coalesce(client_addr::text, 'local') AS ip, + extract(epoch FROM (now() - query_start))::int AS duration_s, + (coalesce(wait_event_type, '') IN ('LWLockNamed', 'Lock', 'BufferPin'))::int AS waiting, + array_to_string(pg_blocking_pids(pid), ',') as blocking_pids, + ltrim(regexp_replace(query, E'[ \\t\\n\\r]+' , ' ', 'g'))::varchar(300) AS query + FROM + pg_stat_activity + WHERE + state != 'idle' + AND backend_type IN ('client backend', 'autovacuum worker') + AND pid != pg_backend_pid() + AND datname = current_database() + AND now() - query_start > '500ms'::interval + ORDER BY + now() - query_start DESC + LIMIT 25 + stat_io: + description: > + This metric collects I/O statistics from the `pg_stat_io` view. + It provides insights into read and write operations, including the number of reads, writes, and their associated times. + This metric is useful for monitoring I/O performance and identifying potential bottlenecks in disk operations. + sqls: + 16: |- + SELECT /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(backend_type, 'total') as tag_backend_type, + sum(coalesce(reads, 0))::int8 as reads, + (sum(coalesce(reads, 0) * op_bytes) / 1e6)::int8 as read_bytes_mb, + sum(coalesce(read_time, 0))::int8 as read_time_ms, + sum(coalesce(writes, 0))::int8 as writes, + (sum(coalesce(writes, 0) * op_bytes) / 1e6)::int8 as write_bytes_mb, + sum(coalesce(write_time, 0))::int8 as write_time_ms, + sum(coalesce(writebacks, 0))::int8 as writebacks, + (sum(coalesce(writebacks, 0) * op_bytes) / 1e6)::int8 as writeback_bytes_mb, + sum(coalesce(writeback_time, 0))::int8 as writeback_time_ms, + sum(coalesce(fsyncs, 0))::int8 fsyncs, + sum(coalesce(fsync_time, 0))::int8 fsync_time_ms, + max(extract(epoch from now() - stats_reset)::int) as stats_reset_s + FROM + pg_stat_io + GROUP BY + ROLLUP (backend_type) + is_instance_level: true + stat_ssl: + description: > + This metric collects SSL connection statistics from the `pg_stat_ssl` view. + It provides insights into the number of SSL connections, including those that are encrypted and those that are not. + This metric is useful for monitoring SSL usage and ensuring secure connections in the PostgreSQL database. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as total, + count(*) FILTER (WHERE ssl) as "on", + count(*) FILTER (WHERE NOT ssl) as "off" + FROM + pg_stat_ssl AS s, + pg_stat_activity AS a + WHERE + a.pid = s.pid + AND a.datname = current_database() + AND a.pid <> pg_backend_pid() + AND NOT (a.client_addr = '127.0.0.1' OR client_port = -1) + gauges: + - '*' + stat_statements: + description: > + This metric collects statistics from the `pg_stat_statements` extension. + It provides insights into query performance, including execution times, block reads/writes, and user information. + This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: |- + WITH q_data AS ( + SELECT + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 13: |- + WITH q_data AS ( + SELECT + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + select /* pgwatch_generated */ + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 15: |- + WITH /* pgwatch_generated */ q_data AS ( + SELECT + queryid::text AS tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.temp_blk_read_time, + b.temp_blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 17: |- + WITH /* pgwatch_generated */ q_data AS ( + SELECT + queryid::text AS tag_queryid, + /* + NB! if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, + round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.temp_blk_read_time, + b.temp_blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b; + stat_statements_calls: + description: > + This metric collects statistics from the `pg_stat_statements` extension, focusing on the number of calls and total execution time. + It provides insights into query performance, including execution times and call counts. + This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(sum(calls), 0)::int8 as calls, + coalesce(round(sum(total_time)::numeric, 3), 0)::float8 as total_time + from + pg_stat_statements + where + dbid = (select oid from pg_database where datname = current_database()) + 13: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(sum(calls), 0)::int8 as calls, + coalesce(round(sum(total_exec_time)::numeric, 3), 0)::float8 as total_time, + round(sum(total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements + where + dbid = (select oid from pg_database where datname = current_database()) + stat_statements_no_query_text: + description: > + This metric collects statistics from the `pg_stat_statements` extension without including the query text. + It provides insights into query performance, including execution times, block reads/writes, and user information, + while omitting the actual query text for security or privacy reasons. + This metric is useful for monitoring query performance without exposing sensitive query details. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: |- + with q_data as ( + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-'::text as tag_query, + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time + from + pg_stat_statements s + where + calls > 5 + and total_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 13: |- + with q_data as ( + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 15: |- + with /* pgwatch_generated */ q_data as ( + select + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + queryid::text as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, + sum(wal_fpi) as wal_fpi, + sum(wal_bytes) as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 17: |- + with /* pgwatch_generated */ q_data as ( + select + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + queryid::text as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, + round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a; + metric_storage_name: stat_statements + subscription_stats: + description: > + This metric collects statistics from the `pg_stat_subscription_stats` view, which provides information about the status of logical replication subscriptions. + It includes details such as the number of apply and sync errors, which can help in monitoring the health of logical replication. + sqls: + 15: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + subname::text as tag_subname, + apply_error_count, + sync_error_count + from + pg_stat_subscription_stats + table_bloat_approx_stattuple: + description: > + This metric collects approximate table bloat statistics using the `pgstattuple_approx` function. + It provides insights into the amount of free space and dead tuples in tables, which can help in identifying bloat issues. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(n.nspname)||'.'||quote_ident(c.relname) as tag_full_table_name, + approx_free_percent, + approx_free_space as approx_free_space_b, + approx_tuple_count, + dead_tuple_percent, + dead_tuple_len as dead_tuple_len_b + from + pg_class c + join lateral pgstattuple_approx(c.oid) st on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables, + join pg_namespace n on n.oid = c.relnamespace + where + relkind in ('r', 'm') + and c.relpages >= 128 -- tables > 1mb + and not n.nspname like any (array[E'pg\\_%', 'information_schema']) + node_status: primary + gauges: + - '*' + table_bloat_approx_summary: + description: > + This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. + It aggregates data from multiple tables to give an overview of bloat across the database. + sqls: + 11: |- + /* accessing pgstattuple_approx directly requires superuser or pg_stat_scan_tables/pg_monitor builtin roles or + execute grant on pgstattuple_approx(regclass) + */ + with table_bloat_approx as ( + select + avg(approx_free_percent)::double precision as approx_free_percent, + sum(approx_free_space)::double precision as approx_free_space, + avg(dead_tuple_percent)::double precision as dead_tuple_percent, + sum(dead_tuple_len)::double precision as dead_tuple_len + from + pg_class c + join + pg_namespace n on n.oid = c.relnamespace + join lateral pgstattuple_approx(c.oid) on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables + where + relkind in ('r', 'm') + and c.relpages >= 128 -- tables >1mb + and not n.nspname != 'information_schema' + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + approx_free_percent, + approx_free_space as approx_free_space_b, + dead_tuple_percent, + dead_tuple_len as dead_tuple_len_b + from + table_bloat_approx + where + approx_free_space > 0 + gauges: + - '*' + table_bloat_approx_summary_sql: + description: > + This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. + It aggregates data from multiple tables to give an overview of bloat across the database. + sqls: + 11: | + WITH q_bloat AS ( + SELECT + quote_ident(schemaname)||'.'||quote_ident(tblname) as full_table_name, + bloat_ratio as approx_bloat_percent, + bloat_size as approx_bloat_bytes, + fillfactor + FROM ( + + /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. + * This query is compatible with PostgreSQL 9.0 and more + */ + SELECT current_database(), + schemaname, + tblname, + bs * tblpages AS real_size, + (tblpages - est_tblpages) * bs AS extra_size, + CASE + WHEN tblpages - est_tblpages > 0 + THEN 100 * (tblpages - est_tblpages) / tblpages::float + ELSE 0 + END AS extra_ratio, + fillfactor, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages - est_tblpages_ff) * bs + ELSE 0 + END AS bloat_size, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float + ELSE 0 + END AS bloat_ratio, + is_na + -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag + FROM ( + SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, + ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + + ceil(toasttuples / 4) AS est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + -- , stattuple.pgstattuple(tblid) AS pst + FROM ( + SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END + - CASE + WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma + ELSE ceil(tpl_data_size)::int % ma END + ) AS tpl_size, + bs - page_hdr AS size_per_block, + (heappages + toastpages) AS tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + FROM ( + SELECT tbl.oid AS tblid, + ns.nspname AS schemaname, + tbl.relname AS tblname, + tbl.reltuples, + tbl.relpages AS heappages, + coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, + 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' + THEN 8 + ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE + WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 + ELSE 0::int END + + + CASE WHEN tbl.relhasoids THEN 4 ELSE 0 END AS tpl_hdr_size, + sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR count(att.attname) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname + AND s.tablename = tbl.relname AND s.inherited = false AND + s.attname = att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE att.attnum > 0 + AND NOT att.attisdropped + AND tbl.relkind IN ('r', 'm') + AND ns.nspname != 'information_schema' + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, tbl.relhasoids + ORDER BY 2, 3 + ) AS s + ) AS s2 + ) AS s3 + -- WHERE NOT is_na + ) s4 + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, + ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage + 12: | + WITH q_bloat AS ( + SELECT quote_ident(schemaname) || '.' || quote_ident(tblname) as full_table_name, + bloat_ratio as approx_bloat_percent, + bloat_size as approx_bloat_bytes, + fillfactor + FROM ( + + /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. + * This query is compatible with PostgreSQL 9.0 and more + */ + SELECT current_database(), + schemaname, + tblname, + bs * tblpages AS real_size, + (tblpages - est_tblpages) * bs AS extra_size, + CASE + WHEN tblpages > 0 AND tblpages - est_tblpages > 0 + THEN 100 * (tblpages - est_tblpages) / tblpages::float + ELSE 0 + END AS extra_ratio, + fillfactor, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages - est_tblpages_ff) * bs + ELSE 0 + END AS bloat_size, + CASE + WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float + ELSE 0 + END AS bloat_ratio, + is_na + -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag + FROM ( + SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, + ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + + ceil(toasttuples / 4) AS est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + -- , stattuple.pgstattuple(tblid) AS pst + FROM ( + SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END + - CASE + WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma + ELSE ceil(tpl_data_size)::int % ma END + ) AS tpl_size, + bs - page_hdr AS size_per_block, + (heappages + toastpages) AS tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + FROM ( + SELECT tbl.oid AS tblid, + ns.nspname AS schemaname, + tbl.relname AS tblname, + tbl.reltuples, + tbl.relpages AS heappages, + coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, + 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' + THEN 8 + ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE + WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 + ELSE 0::int END + + + 0 AS tpl_hdr_size, + sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR + count(att.attname) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname + AND s.tablename = tbl.relname AND s.inherited = false AND + s.attname = att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE att.attnum > 0 + AND NOT att.attisdropped + AND tbl.relkind IN ('r', 'm') + AND ns.nspname != 'information_schema' + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + ORDER BY 2, 3 + ) AS s + ) AS s2 + ) AS s3 + -- WHERE NOT is_na + ) s4 + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, + ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage + gauges: + - '*' + table_hashes: + description: > + This metric collects hashes of table definitions to detect changes in the schema. + It uses the `pg_catalog.pg_tables` view to gather information about tables and their columns. + The hash is computed based on the table schema, name, and column definitions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(table_schema)||'.'||quote_ident(table_name) as tag_table, + md5((array_agg((c.*)::text order by ordinal_position))::text) + from ( + SELECT current_database()::information_schema.sql_identifier AS table_catalog, + nc.nspname::information_schema.sql_identifier AS table_schema, + c.relname::information_schema.sql_identifier AS table_name, + a.attname::information_schema.sql_identifier AS column_name, + a.attnum::information_schema.cardinal_number AS ordinal_position, + pg_get_expr(ad.adbin, ad.adrelid)::information_schema.character_data AS column_default, + CASE + WHEN a.attnotnull OR t.typtype = 'd'::"char" AND t.typnotnull THEN 'NO'::text + ELSE 'YES'::text + END::information_schema.yes_or_no AS is_nullable, + CASE + WHEN t.typtype = 'd'::"char" THEN + CASE + WHEN bt.typelem <> 0::oid AND bt.typlen = '-1'::integer THEN 'ARRAY'::text + WHEN nbt.nspname = 'pg_catalog'::name THEN format_type(t.typbasetype, NULL::integer) + ELSE 'USER-DEFINED'::text + END + ELSE + CASE + WHEN t.typelem <> 0::oid AND t.typlen = '-1'::integer THEN 'ARRAY'::text + WHEN nt.nspname = 'pg_catalog'::name THEN format_type(a.atttypid, NULL::integer) + ELSE 'USER-DEFINED'::text + END + END::information_schema.character_data AS data_type, + information_schema._pg_char_max_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_maximum_length, + information_schema._pg_char_octet_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_octet_length, + information_schema._pg_numeric_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision, + information_schema._pg_numeric_precision_radix(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision_radix, + information_schema._pg_numeric_scale(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_scale, + information_schema._pg_datetime_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS datetime_precision, + information_schema._pg_interval_type(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.character_data AS interval_type, + NULL::integer::information_schema.cardinal_number AS interval_precision, + NULL::character varying::information_schema.sql_identifier AS character_set_catalog, + NULL::character varying::information_schema.sql_identifier AS character_set_schema, + NULL::character varying::information_schema.sql_identifier AS character_set_name, + CASE + WHEN nco.nspname IS NOT NULL THEN current_database() + ELSE NULL::name + END::information_schema.sql_identifier AS collation_catalog, + nco.nspname::information_schema.sql_identifier AS collation_schema, + co.collname::information_schema.sql_identifier AS collation_name, + CASE + WHEN t.typtype = 'd'::"char" THEN current_database() + ELSE NULL::name + END::information_schema.sql_identifier AS domain_catalog, + CASE + WHEN t.typtype = 'd'::"char" THEN nt.nspname + ELSE NULL::name + END::information_schema.sql_identifier AS domain_schema, + CASE + WHEN t.typtype = 'd'::"char" THEN t.typname + ELSE NULL::name + END::information_schema.sql_identifier AS domain_name, + current_database()::information_schema.sql_identifier AS udt_catalog, + COALESCE(nbt.nspname, nt.nspname)::information_schema.sql_identifier AS udt_schema, + COALESCE(bt.typname, t.typname)::information_schema.sql_identifier AS udt_name, + NULL::character varying::information_schema.sql_identifier AS scope_catalog, + NULL::character varying::information_schema.sql_identifier AS scope_schema, + NULL::character varying::information_schema.sql_identifier AS scope_name, + NULL::integer::information_schema.cardinal_number AS maximum_cardinality, + a.attnum::information_schema.sql_identifier AS dtd_identifier, + 'NO'::character varying::information_schema.yes_or_no AS is_self_referencing, + 'NO'::character varying::information_schema.yes_or_no AS is_identity, + NULL::character varying::information_schema.character_data AS identity_generation, + NULL::character varying::information_schema.character_data AS identity_start, + NULL::character varying::information_schema.character_data AS identity_increment, + NULL::character varying::information_schema.character_data AS identity_maximum, + NULL::character varying::information_schema.character_data AS identity_minimum, + NULL::character varying::information_schema.yes_or_no AS identity_cycle, + 'NEVER'::character varying::information_schema.character_data AS is_generated, + NULL::character varying::information_schema.character_data AS generation_expression, + CASE + WHEN c.relkind = 'r'::"char" OR (c.relkind = ANY (ARRAY['v'::"char", 'f'::"char"])) AND pg_column_is_updatable(c.oid::regclass, a.attnum, false) THEN 'YES'::text + ELSE 'NO'::text + END::information_schema.yes_or_no AS is_updatable + FROM pg_attribute a + LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum + JOIN (pg_class c + JOIN pg_namespace nc ON c.relnamespace = nc.oid) ON a.attrelid = c.oid + JOIN (pg_type t + JOIN pg_namespace nt ON t.typnamespace = nt.oid) ON a.atttypid = t.oid + LEFT JOIN (pg_type bt + JOIN pg_namespace nbt ON bt.typnamespace = nbt.oid) ON t.typtype = 'd'::"char" AND t.typbasetype = bt.oid + LEFT JOIN (pg_collation co + JOIN pg_namespace nco ON co.collnamespace = nco.oid) ON a.attcollation = co.oid AND (nco.nspname <> 'pg_catalog'::name OR co.collname <> 'default'::name) + WHERE NOT pg_is_other_temp_schema(nc.oid) AND a.attnum > 0 AND NOT a.attisdropped AND (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'f'::"char"])) + + ) c + where + not table_schema like any (array[E'pg\\_%', 'information_schema']) + group by + table_schema, table_name + order by + table_schema, table_name + table_io_stats: + description: > + This metric collects I/O statistics for tables, including heap and index block reads and hits. + It provides insights into the performance of table access patterns. + sqls: + 11: |- + select * from ( + with recursive + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, + schemaname::text as tag_schema, + relname::text as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(relname) as tag_table_full_name, + heap_blks_read, + heap_blks_hit, + idx_blks_read, + idx_blks_hit, + toast_blks_read, + toast_blks_hit, + tidx_blks_read, + tidx_blks_hit + FROM pg_statio_user_tables + WHERE NOT schemaname LIKE E'pg\\_temp%' + AND (heap_blks_read > 0 OR heap_blks_hit > 0 OR idx_blks_read > 0 OR idx_blks_hit > 0 OR + tidx_blks_read > 0 OR + tidx_blks_hit > 0) + ) + select epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + heap_blks_read, + heap_blks_hit, + idx_blks_read, + idx_blks_hit, + toast_blks_read, + toast_blks_hit, + tidx_blks_read, + tidx_blks_hit + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * + from ( + select epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(heap_blks_read)::int8, + sum(heap_blks_hit)::int8, + sum(idx_blks_read)::int8, + sum(idx_blks_hit)::int8, + sum(toast_blks_read)::int8, + sum(toast_blks_hit)::int8, + sum(tidx_blks_read)::int8, + sum(tidx_blks_hit)::int8 + from q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by 1, 2, 3, 4 + ) x + ) y + order by + coalesce(heap_blks_read, 0) + + coalesce(heap_blks_hit, 0) + + coalesce(idx_blks_read, 0) + + coalesce(idx_blks_hit, 0) + + coalesce(toast_blks_read, 0) + + coalesce(toast_blks_hit, 0) + + coalesce(tidx_blks_read, 0) + + coalesce(tidx_blks_hit, 0) + desc limit 300 + table_stats: + description: > + This metric collects statistics about user tables, including size, vacuum status, and transaction freeze age. + It provides insights into the health and performance of tables in the database. + sqls: + 11: |- + with recursive + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, -- not sent to final output + quote_ident(schemaname) as tag_schema, + quote_ident(ut.relname) as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, + pg_table_size(relid) as table_size_b, + abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + pg_total_relation_size(relid) as total_relation_size_b, + case when reltoastrelid != 0 then pg_total_relation_size(reltoastrelid) else 0::int8 end as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age + from pg_stat_user_tables ut + join + pg_class c on c.oid = ut.relid + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists(select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + and c.relpersistence != 't' -- and temp tables + ) + + select /* pgwatch_generated */ + epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2, 3, 4 + ) x + order by table_size_b desc nulls last limit 300 + 16: |- + with recursive /* pgwatch_generated */ + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, -- not sent to final output + quote_ident(schemaname) as tag_schema, + quote_ident(ut.relname) as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, + pg_table_size(relid) as table_size_b, + abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + pg_total_relation_size(relid) as total_relation_size_b, + case when c.reltoastrelid != 0 then pg_total_relation_size(c.reltoastrelid) else 0::int8 end as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, + extract(epoch from now() - last_seq_scan)::int8 as last_seq_scan_s + from pg_stat_user_tables ut + join pg_class c on c.oid = ut.relid + left join pg_class t on t.oid = c.reltoastrelid + left join pg_index ti on ti.indrelid = t.oid + left join pg_class tir on tir.oid = ti.indexrelid + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + and c.relpersistence != 't' -- and temp tables + order by case when c.relkind = 'p' then 1e9::int else coalesce(c.relpages, 0) + coalesce(t.relpages, 0) + coalesce(tir.relpages, 0) end desc + limit 1500 /* NB! When changing the bottom final LIMIT also adjust this limit. Should be at least 5x bigger as approx sizes depend a lot on vacuum frequency. + The general idea is to reduce filesystem "stat"-ing on tables that won't make it to final output anyways based on approximate size */ + ) + + select /* pgwatch_generated */ + epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age, + last_seq_scan_s + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age, + min(last_seq_scan_s)::int8 last_seq_scan_s + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2, 3, 4 + ) x + order by table_size_b desc nulls last limit 300 + gauges: + - table_size_b + - total_relation_size_b + - toast_size_b + - seconds_since_last_vacuum + - seconds_since_last_analyze + - n_live_tup + - n_dead_tup + statement_timeout_seconds: 300 + table_stats_approx: + description: > + This metric collects approximate statistics about user tables, including size, vacuum status, and transaction freeze age. + It provides insights into the health and performance of tables in the database. + sqls: + 11: |- + with recursive /* pgwatch_generated */ + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + with q_tbls_by_total_associated_relpages_approx as ( + select * from ( + select + c.oid, + c.relname, + c.relpages, + coalesce((select sum(relpages) from pg_class ci join pg_index i on i.indexrelid = ci.oid where i.indrelid = c.oid), 0) as index_relpages, + coalesce((select coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0) from pg_class ct left join pg_index ti on ti.indrelid = ct.oid left join pg_class cti on cti.oid = ti.indexrelid where ct.oid = c.reltoastrelid), 0) as toast_relpages, + case when 'autovacuum_enabled=off' = ANY(c.reloptions) then 1 else 0 end as no_autovacuum, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, + c.relpersistence + from + pg_class c + join pg_namespace n on n.oid = c.relnamespace + where + not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and c.relkind = 'r' + and c.relpersistence != 't' + ) x + order by relpages + index_relpages + toast_relpages desc limit 300 + ), q_block_size as ( + select current_setting('block_size')::int8 as bs + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, + quote_ident(schemaname)||'.'||quote_ident(ut.relname) as tag_table_full_name, + bs * relpages as table_size_b, + abs(greatest(ceil(log((bs*relpages+1) / 10^6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + bs * (relpages + index_relpages + toast_relpages) as total_relation_size_b, + bs * toast_relpages as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age, + relpersistence + from + pg_stat_user_tables ut + join q_tbls_by_total_associated_relpages_approx t on t.oid = ut.relid + join q_block_size on true + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + order by relpages desc + ) + select /* pgwatch_generated */ + epoch_ns, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age + from q_tstats + where not exists (select * from q_root_part where oid = q_tstats.relid) + union all + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2 + ) x; + + gauges: + - table_size_b + - total_relation_size_b + - toast_size_b + - seconds_since_last_vacuum + - seconds_since_last_analyze + - n_live_tup + - n_dead_tup + metric_storage_name: table_stats + unused_indexes: + description: > + This metric collects information about unused indexes in the database. + It helps identify indexes that are not being used and can potentially be dropped to improve performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + * + from ( + select + format('%I.%I', sui.schemaname, sui.indexrelname) as tag_index_full_name, + sui.idx_scan, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + system_identifier::text as tag_sys_id /* to easily check also all replicas as could be still used there */ + from + pg_stat_user_indexes sui + join pg_index i on i.indexrelid = sui.indexrelid + join pg_control_system() on true + where not sui.schemaname like E'pg\\_temp%' + and idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + and not exists (select * from pg_locks where relation = sui.relid and mode = 'AccessExclusiveLock') + ) x + where index_size_b > 100*1024^2 /* list >100MB only */ + order by index_size_b desc + limit 25 + vmstat: + description: > + This metric collects system-level statistics using the `vmstat` command. + It provides insights into memory usage, CPU load, and other system metrics. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + r, b, swpd, free, buff, cache, si, so, bi, bo, "in", cs, us, sy, id, wa, st, cpu_count, load_1m, load_5m, load_15m, total_memory + from + get_vmstat() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_vmstat( + IN delay int default 1, + OUT r int, OUT b int, OUT swpd int8, OUT free int8, OUT buff int8, OUT cache int8, OUT si int8, OUT so int8, OUT bi int8, + OUT bo int8, OUT "in" int, OUT cs int, OUT us int, OUT sy int, OUT id int, OUT wa int, OUT st int, + OUT cpu_count int, OUT load_1m float4, OUT load_5m float4, OUT load_15m float4, OUT total_memory int8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from os import cpu_count, popen + unit = 1024 # 'vmstat' default block byte size + + cpu_count = cpu_count() + vmstat_lines = popen('vmstat {} 2'.format(delay)).readlines() + vm = [int(x) for x in vmstat_lines[-1].split()] + # plpy.notice(vm) + load_1m, load_5m, load_15m = None, None, None + with open('/proc/loadavg', 'r') as f: + la_line = f.readline() + if la_line: + splits = la_line.split() + if len(splits) == 5: + load_1m, load_5m, load_15m = splits[0], splits[1], splits[2] + + total_memory = None + with open('/proc/meminfo', 'r') as f: + mi_line = f.readline() + splits = mi_line.split() + # plpy.notice(splits) + if len(splits) == 3: + total_memory = int(splits[1]) * 1024 + + return vm[0], vm[1], vm[2] * unit, vm[3] * unit, vm[4] * unit, vm[5] * unit, vm[6] * unit, vm[7] * unit, vm[8] * unit, \ + vm[9] * unit, vm[10], vm[11], vm[12], vm[13], vm[14], vm[15], vm[16], cpu_count, load_1m, load_5m, load_15m, total_memory + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_vmstat(int) TO pgwatch; + COMMENT ON FUNCTION get_vmstat(int) IS 'created for pgwatch'; + wait_events: + description: > + This metric collects information about active queries that are waiting for events in the database. + It provides insights into query performance and potential bottlenecks. + sqls: + 11: |- + with q_sa as ( + select * from pg_stat_activity where datname = current_database() and pid <> pg_backend_pid() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + wait_event_type as tag_wait_event_type, + wait_event as tag_wait_event, + count(*), + avg(abs(1e6* extract(epoch from now() - query_start)))::int8 as avg_query_duration_us, + max(abs(1e6* extract(epoch from now() - query_start)))::int8 as max_query_duration_us, + (select count(*) from q_sa where state = 'active') as total_active + from + q_sa + where + state = 'active' + and wait_event_type is not null + and wait_event_type <> 'Timeout' + group by + 1, 2, 3 + wal: + description: > + This metric collects information about the Write-Ahead Logging (WAL) system in PostgreSQL. + It provides insights into WAL activity, including the current WAL location, replay lag, and other related metrics. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case + when pg_is_in_recovery() = false then + pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 + else + pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 + end as xlog_location_b, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + system_identifier::text as tag_sys_id, + case + when pg_is_in_recovery() = false then + ('x'||substr(pg_walfile_name(pg_current_wal_lsn()), 1, 8))::bit(32)::int + else + (select min_recovery_end_timeline::int from pg_control_recovery()) + end as timeline + from pg_control_system() + gauges: + - '*' + is_instance_level: true + wal_receiver: + description: > + This metric collects information about the WAL receiver process in PostgreSQL. + It provides insights into the status of the WAL receiver, including replay lag and last replay timestamp. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())::int8 as replay_lag_b, + extract(epoch from (now() - pg_last_xact_replay_timestamp()))::int8 as last_replay_s + node_status: standby + gauges: + - '*' + is_instance_level: true + wal_size: + description: > + This metric collects the size of the Write-Ahead Log (WAL) directory in PostgreSQL. + It provides insights into the total size of WAL files currently stored in the database. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + sum(size)::int8 as wal_size_b + from pg_ls_waldir() + gauges: + - '*' + is_instance_level: true + wal_stats: + description: > + This metric collects statistics about the Write-Ahead Logging (WAL) system in PostgreSQL. + It provides insights into WAL activity, including the number of records, full page images, and write/sync times. + sqls: + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + wal_records, + wal_fpi, + (wal_bytes / 1024)::int8 as wal_bytes_kb, + wal_buffers_full, + wal_write, + wal_sync, + wal_write_time::int8, + wal_sync_time::int8 + from + pg_stat_wal +presets: + aiven: + description: aiven database metrics + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_col_stats: 86400 + index_stats: 900 + locks: 60 + locks_mode: 60 + recommendations: 43200 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 60 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 300 + table_stats: 300 + wal: 60 + wal_receiver: 120 + aurora: + description: AWS Aurora doesn't expose all Postgres functions and there's no WAL + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats_aurora: 60 + index_stats: 900 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + settings: 7200 + sproc_stats: 180 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal_receiver: 120 + azure: + description: similar to 'exhaustive' with stuff that's not accessible on Azure Database for PostgreSQL removed + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + kpi: 120 + locks: 60 + locks_mode: 60 + replication: 60 + replication_slots: 60 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_ssl: 60 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + wal: 60 + wal_receiver: 60 + wal_size: 300 + basic: + description: only the most important metrics - WAL, DB-level statistics (size, tx and backend counts) + metrics: + instance_up: 60 + db_size: 300 + db_stats: 60 + wal: 60 + exhaustive: + description: all important metrics for a deeper performance understanding + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + cpu_load: 60 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + wal_size: 300 + full: + description: almost all available metrics for a even deeper performance understanding + metrics: + archiver: 60 + archiver_pending_count: 300 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + cpu_load: 60 + datfrozenxid: 3600 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + kpi: 120 + locks: 60 + locks_mode: 60 + logical_subscriptions: 120 + postgres_role: 60 + psutil_cpu: 120 + psutil_disk: 120 + psutil_disk_io_total: 120 + psutil_mem: 120 + recommendations: 43200 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + server_log_event_counts: 60 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_ssl: 120 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + wal_size: 120 + gce: + description: similar to 'exhaustive' with stuff not accessible on GCE managed PostgreSQL engine removed + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + minimal: + description: single "Key Performance Indicators" query for fast cluster/db overview + metrics: + instance_up: 60 + kpi: 60 + pgbouncer: + description: pgbouncer stats + metrics: + pgbouncer_stats: 60 + pgbouncer_clients: 60 + pgpool: + description: pgpool stats + metrics: + pgpool_stats: 60 + pgpool_processes: 60 + prometheus-async: + description: Tuned for the Prometheus async scrapping + metrics: + backends: 30 + bgwriter: 60 + checkpointer: 60 + db_size: 300 + db_stats: 30 + locks_mode: 30 + replication: 120 + replication_slots: 120 + settings: 300 + sproc_stats: 180 + stat_statements_calls: 60 + table_io_stats: 300 + table_stats: 300 + wait_events: 60 + wal: 60 + rds: + description: similar to 'exhaustive' with stuff that's not accessible on AWS RDS removed + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + standard: + description: basic level + table, index, stat_statements stats + metrics: + cpu_load: 60 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + sequence_health: 3600 + sproc_stats: 180 + stat_statements: 180 + table_stats: 300 + wal: 60 + exhaustive_no_python: + description: like exhaustive, but no PL/Python helpers + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + wal_size: 300 + unprivileged: + description: no wrappers + only pg_stat_statements extension expected (developer mode) + metrics: + archiver: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_statements_calls: 60 + table_io_stats: 600 + table_stats: 300 + wal: 60 \ No newline at end of file diff --git a/config/pgwatch-prometheus/sources.yml b/config/pgwatch-prometheus/sources.yml index 66a424f..c6cea94 100644 --- a/config/pgwatch-prometheus/sources.yml +++ b/config/pgwatch-prometheus/sources.yml @@ -5,7 +5,24 @@ conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database kind: postgres custom_metrics: - pg_stat_statements_calls: 30 + pg_stat_statements_calls: 1 + backends: 1 + bgwriter: 1 + checkpointer: 1 + db_size: 1 + db_stats: 1 + locks_mode: 1 + replication: 1 + replication_slots: 1 + settings: 1 + sproc_stats: 1 + stat_statements: 1 + stat_statements_calls: 1 + table_io_stats: 1 + table_stats: 1 + wait_events: 1 + wal: 1 + custom_tags: env: demo cluster: local diff --git a/config/prometheus/prometheus.yml b/config/prometheus/prometheus.yml index 61a876b..d3eb569 100644 --- a/config/prometheus/prometheus.yml +++ b/config/prometheus/prometheus.yml @@ -1,6 +1,7 @@ global: - scrape_interval: 1s - evaluation_interval: 1s + scrape_interval: 15s # Default scrape interval + evaluation_interval: 15s # Default evaluation interval + scrape_timeout: 10s # Global scrape timeout rule_files: # - "first_rules.yml" @@ -10,5 +11,6 @@ scrape_configs: - job_name: 'pgwatch-prometheus' static_configs: - targets: ['pgwatch-prometheus:9091'] - scrape_interval: 30s + scrape_interval: 30s # How often to scrape PGWatch + scrape_timeout: 25s # Timeout for each scrape (must be < scrape_interval) metrics_path: /pgwatch \ No newline at end of file diff --git a/old-metrics.yml b/old-metrics.yml new file mode 100644 index 0000000..e69de29 -- GitLab From 68f1d1ed26d79f287d314154fb46b9ca11a7e0f7 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Mon, 30 Jun 2025 19:05:19 +0300 Subject: [PATCH 2/7] Fixed naming of values and shared crosshair --- config/grafana/dashboards/dash1.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config/grafana/dashboards/dash1.json b/config/grafana/dashboards/dash1.json index ba66e0a..658eeb1 100644 --- a/config/grafana/dashboards/dash1.json +++ b/config/grafana/dashboards/dash1.json @@ -17,7 +17,7 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 2, + "graphTooltip": 1, "id": 1, "links": [], "panels": [ @@ -116,7 +116,7 @@ "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "legendFormat": "Runtime", "range": true, "refId": "B", "useBackend": false @@ -628,18 +628,18 @@ "auto_count": 30, "auto_min": "10s", "current": { - "text": "10s", - "value": "10s" + "text": "1m", + "value": "1m" }, "name": "agg_interval", "options": [ { - "selected": true, + "selected": false, "text": "10s", "value": "10s" }, { - "selected": false, + "selected": true, "text": "1m", "value": "1m" }, @@ -668,5 +668,5 @@ "timezone": "browser", "title": "PoC", "uid": "00eb62a7-4b80-43cd-a890-45336979aa18", - "version": 16 + "version": 20 } \ No newline at end of file -- GitLab From d6dd9ab0e7708733dc8917ef852e49b187e602fb Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Tue, 1 Jul 2025 01:45:07 +0300 Subject: [PATCH 3/7] Initial DB overview dashboard --- .../dashboards/DBOverviewDashboard.json | 3716 +++++++++++++++++ config/grafana/dashboards/dash1.json | 330 +- config/pgwatch-prometheus/metrics.yml | 48 +- 3 files changed, 4047 insertions(+), 47 deletions(-) create mode 100644 config/grafana/dashboards/DBOverviewDashboard.json diff --git a/config/grafana/dashboards/DBOverviewDashboard.json b/config/grafana/dashboards/DBOverviewDashboard.json new file mode 100644 index 0000000..77a2efc --- /dev/null +++ b/config/grafana/dashboards/DBOverviewDashboard.json @@ -0,0 +1,3716 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 2, + "links": [], + "panels": [ + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "ASH", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "panels": [], + "title": "Host stats", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 3, + "panels": [], + "title": "Postgres stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#afafaf", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle-in-transaction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Waiting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 3 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idleintransaction", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle-in-transaction", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idle", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_active", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Active", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_waiting ", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "F", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_settings_max_connections", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max connections", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Sessions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle-in-transaction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 3 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idleintransaction", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle-in-transaction", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_active", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Active", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_settings_max_connections", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max connections", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Non-Idle Sessions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_calls[$__rate_interval]))", + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A" + } + ], + "title": "Calls (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: Postgres logs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rollbacks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Commits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "builder", + "expr": "sum(irate(pgwatch_db_stats_xact_rollback[$__rate_interval]))", + "hide": false, + "instant": false, + "interval": "20", + "legendFormat": "Rollbacks", + "range": true, + "refId": "B" + }, + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))", + "interval": "20", + "legendFormat": "Commits", + "range": true, + "refId": "A" + } + ], + "title": "Transactions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))/(sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))+sum(irate(pgwatch_db_stats_xact_rollback[$__rate_interval]))) * 100", + "interval": "20", + "legendFormat": "Commits", + "range": true, + "refId": "A" + } + ], + "title": "Commit ration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sec/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_calls_total_time[$__rate_interval]))", + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A" + } + ], + "title": "Statements total time (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(irate(pgwatch_stat_statements_calls_total_time[$__rate_interval])) / sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Statements time per call (pg_stat_statements) aka latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "TODO: Total rows (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "TODO: Rows per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sec/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_blk_read_time[$__rate_interval]))", + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_blk_write_time[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "B" + } + ], + "title": "blk_read_time vs blk_write_time (s/s) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sec/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_blk_read_time[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_blk_write_time[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "B" + } + ], + "title": "blk_read_time vs blk_write_time (s/s) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval])) * 8192", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_hit (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_hit (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_shared_blks_read[$__rate_interval])) * 8192", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_read (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_read[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_read (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_shared_blks_written[$__rate_interval])) * 8192", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_written (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_written[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_written (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_shared_blks_dirtied[$__rate_interval])) * 8192", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_dirtied (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_dirtied[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_dirtied (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 83 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval]))/(sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval])) + sum(irate(pgwatch_stat_statements_shared_blks_read[$__rate_interval]))) * 100", + "interval": "20", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "shared_blks_read_ratio (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 91 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_wal_xlog_location_b[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL bytes (pg_current_wal_lsn)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 91 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_wal_xlog_location_b[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL bytes per call (pg_current_wal_lsn)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 99 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(irate(pgwatch_stat_statements_wal_fpi[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL fpi (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 99 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_wal_fpi[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL fpi per call (pg_current_wal_lsn)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 107 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_temp_blks_read[$__rate_interval]))*8192", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Temp bytes read", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_temp_blks_written[$__rate_interval]))*8192", + "hide": false, + "interval": "20", + "legendFormat": "Temp bytes written", + "range": true, + "refId": "B" + } + ], + "title": "temp_bytes_read vs temp_bytes_written (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 107 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_temp_blks_read[$__rate_interval]))*8192/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Temp bytes read", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_stat_statements_temp_blks_written[$__rate_interval]))*8192/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "Temp bytes written", + "range": true, + "refId": "B" + } + ], + "title": "temp_bytes_read vs temp_bytes_written (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 115 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(pgwatch_locks_mode_count[$__rate_interval]) * 60) by (mode) > 0", + "fullMetaSearch": false, + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "includeNullMetadata": true, + "interval": "20", + "intervalFactor": 1, + "legendFormat": "__auto", + "metric": "pg_locks_count", + "policy": "default", + "range": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 5, + "useBackend": false + } + ], + "title": "Locks by mode", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 115 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: Longest non-idle transaction age, > 1 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 123 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: Age of the oldest transaction ID that has not been frozen", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 123 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: Age of the oldest multi-transaction ID", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 131 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: No. of pending WAL files to be archived", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 131 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: Number of queries stuck in idle in transaction state (>10min)", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Database Overview", + "uid": "f90500a0-a12e-4081-a2f0-07ed96f27915", + "version": 48 +} \ No newline at end of file diff --git a/config/grafana/dashboards/dash1.json b/config/grafana/dashboards/dash1.json index 658eeb1..4c14323 100644 --- a/config/grafana/dashboards/dash1.json +++ b/config/grafana/dashboards/dash1.json @@ -40,7 +40,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 8, + "fillOpacity": 12, "gradientMode": "none", "hideFrom": { "legend": false, @@ -77,7 +77,7 @@ } ] }, - "unit": "ms" + "unit": "percent" }, "overrides": [] }, @@ -87,7 +87,7 @@ "x": 0, "y": 0 }, - "id": 3, + "id": 6, "options": { "legend": { "calcs": [], @@ -104,25 +104,206 @@ "pluginVersion": "12.0.2", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" + "editorMode": "code", + "expr": "pgwatch_db_stats_blks_hit / (pgwatch_db_stats_blks_hit + pgwatch_db_stats_blks_read) * 100", + "legendFormat": "Buffer Cache Hit Ratio", + "range": true, + "refId": "A" + } + ], + "title": "Buffer Cache Hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "disableTextWrap": false, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(pgwatch_stat_statements_total_time[$agg_interval])) / sum(rate(pgwatch_stat_statements_calls[$agg_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "Runtime", + "expr": "sum(pgwatch_stat_statements_shared_blks_dirtied) * 8192", + "legendFormat": "Shared Blocks Dirtied", "range": true, - "refId": "B", - "useBackend": false + "refId": "A" } ], - "title": "Average query runtime", + "title": "Shared Blocks Written (Bytes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(pgwatch_stat_statements_shared_blks_written) * 8192", + "legendFormat": "Shared Blocks Written", + "range": true, + "refId": "A" + } + ], + "title": "Shared Blocks Written (Bytes)", "type": "timeseries" }, { @@ -201,7 +382,7 @@ "h": 8, "w": 12, "x": 12, - "y": 0 + "y": 8 }, "id": 1, "options": { @@ -225,7 +406,7 @@ "uid": "P7A0D6631BB10B34F" }, "disableTextWrap": false, - "editorMode": "code", + "editorMode": "builder", "expr": "pgwatch_pg_stat_statements_calls_calls", "fullMetaSearch": false, "hide": false, @@ -313,6 +494,111 @@ ], "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(pgwatch_stat_statements_total_time[$__rate_interval])) / sum(irate(pgwatch_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "20s", + "legendFormat": "Runtime", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Average query runtime", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -493,7 +779,7 @@ "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 24 }, "id": 2, "options": { @@ -661,12 +947,12 @@ ] }, "time": { - "from": "now-5m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "PoC", "uid": "00eb62a7-4b80-43cd-a890-45336979aa18", - "version": 20 + "version": 26 } \ No newline at end of file diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index b8fbd61..ffd49e5 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -4160,31 +4160,6 @@ metrics: GRANT EXECUTE ON FUNCTION get_vmstat(int) TO pgwatch; COMMENT ON FUNCTION get_vmstat(int) IS 'created for pgwatch'; - wait_events: - description: > - This metric collects information about active queries that are waiting for events in the database. - It provides insights into query performance and potential bottlenecks. - sqls: - 11: |- - with q_sa as ( - select * from pg_stat_activity where datname = current_database() and pid <> pg_backend_pid() - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - wait_event_type as tag_wait_event_type, - wait_event as tag_wait_event, - count(*), - avg(abs(1e6* extract(epoch from now() - query_start)))::int8 as avg_query_duration_us, - max(abs(1e6* extract(epoch from now() - query_start)))::int8 as max_query_duration_us, - (select count(*) from q_sa where state = 'active') as total_active - from - q_sa - where - state = 'active' - and wait_event_type is not null - and wait_event_type <> 'Timeout' - group by - 1, 2, 3 wal: description: > This metric collects information about the Write-Ahead Logging (WAL) system in PostgreSQL. @@ -4257,6 +4232,29 @@ metrics: wal_sync_time::int8 from pg_stat_wal + wait_events: + query: | + SELECT datname datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as wait_event_type, count(*) as total + FROM <% if @role == "db" -%>pg_stat_activity_all<% else -%>pg_stat_activity<% end -%> + WHERE state = 'active' AND datname = current_database() + GROUP BY datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') + UNION + SELECT 'server_process', coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as wait_event_type, count(*) as total + FROM <% if @role == "db" -%>pg_stat_activity_all<% else -%>pg_stat_activity<% end -%> + WHERE state = 'active' AND datname IS NULL and current_database() = (select datname from pg_database where NOT datname = ANY(ARRAY['postgres', 'template0','template1','repmgr','history']) ORDER by datname limit 1) + GROUP BY datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') + metrics: + - datname: + usage: "LABEL" + description: "Database name" + - wait_event_type: + usage: "LABEL" + description: "Wait event type" + - total: + usage: "GAUGE" + description: "Total number of processes with specific wait event type" + + presets: aiven: description: aiven database metrics -- GitLab From a0a98a7209e6bff2e1a0318b48a01b811f470e0f Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Tue, 1 Jul 2025 02:03:42 +0300 Subject: [PATCH 4/7] Fixed point size --- .../dashboards/DBOverviewDashboard.json | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/config/grafana/dashboards/DBOverviewDashboard.json b/config/grafana/dashboards/DBOverviewDashboard.json index 77a2efc..07f88ce 100644 --- a/config/grafana/dashboards/DBOverviewDashboard.json +++ b/config/grafana/dashboards/DBOverviewDashboard.json @@ -574,7 +574,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -671,7 +671,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -759,7 +759,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -897,7 +897,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -996,7 +996,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1094,7 +1094,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1196,7 +1196,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1288,7 +1288,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1380,7 +1380,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1491,7 +1491,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1602,7 +1602,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1704,7 +1704,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1806,7 +1806,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -1908,7 +1908,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2010,7 +2010,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2112,7 +2112,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2214,7 +2214,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2316,7 +2316,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2418,7 +2418,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2513,7 +2513,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2615,7 +2615,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2717,7 +2717,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2819,7 +2819,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -2921,7 +2921,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3036,7 +3036,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3151,7 +3151,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3286,7 +3286,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3374,7 +3374,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3462,7 +3462,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3550,7 +3550,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, @@ -3638,7 +3638,7 @@ "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, - "pointSize": 5, + "pointSize": 1, "scaleDistribution": { "type": "linear" }, -- GitLab From c6f0606d7aa507e722d9da4c5ff8b439008f1296 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 2 Jul 2025 20:44:52 +0300 Subject: [PATCH 5/7] Updated metrics and added new graphs --- .../dashboards/DBOverviewDashboard.json | 804 +- config/pgwatch-postgres/metrics.yml | 1 - config/pgwatch-prometheus/metrics.yml | 9349 +++++++++-------- config/pgwatch-prometheus/sources.yml | 33 +- 4 files changed, 5508 insertions(+), 4679 deletions(-) diff --git a/config/grafana/dashboards/DBOverviewDashboard.json b/config/grafana/dashboards/DBOverviewDashboard.json index 07f88ce..d6e7fb3 100644 --- a/config/grafana/dashboards/DBOverviewDashboard.json +++ b/config/grafana/dashboards/DBOverviewDashboard.json @@ -22,7 +22,7 @@ "links": [], "panels": [ { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -34,13 +34,243 @@ "title": "ASH", "type": "row" }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.5, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Timeout" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6f450c", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*CPU - .*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- Lock" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- LWLock" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- IO" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- Client" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{datname!=\"server_process\"})", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{datname=\"server_process\", wait_event_type!~\".*- Activity.*\"})", + "hide": false, + "legendFormat": "Postgres - {{wait_event_type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{datname=\"server_process\", wait_event_type=~\".*- Activity.*\"})", + "hide": false, + "legendFormat": "Idle Internal - {{wait_event_type}}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum by (event_type) (irate(pgwatch_wait_events_sampling_total[$__rate_interval]))>0", + "hide": false, + "interval": "20", + "legendFormat": " {{event_type}}", + "range": true, + "refId": "D" + } + ], + "title": "New panel", + "type": "timeseries" + }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 1 + "y": 19 }, "id": 2, "panels": [], @@ -53,7 +283,7 @@ "h": 1, "w": 24, "x": 0, - "y": 2 + "y": 20 }, "id": 3, "panels": [], @@ -213,6 +443,32 @@ } } ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "active", + "idle", + "idle in transaction" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] } ] }, @@ -220,7 +476,7 @@ "h": 8, "w": 12, "x": 0, - "y": 3 + "y": 21 }, "id": 4, "options": { @@ -247,67 +503,13 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "pgwatch_backends_idleintransaction", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "Idle-in-transaction", - "range": true, - "refId": "B", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "pgwatch_backends_idle", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "Idle", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "pgwatch_backends_active", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "Active", - "range": true, - "refId": "C", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "pgwatch_backends_waiting ", - "fullMetaSearch": false, + "editorMode": "code", + "expr": "sum by (state) (pgwatch_pg_stat_activity_count) > 0", "hide": false, - "includeNullMetadata": true, "instant": false, - "legendFormat": "Waiting", + "legendFormat": "{{state}}", "range": true, - "refId": "F", - "useBackend": false + "refId": "A" }, { "datasource": { @@ -421,13 +623,20 @@ ], "fill": "dash" } - }, + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ { - "id": "custom.hideFrom", + "id": "color", "value": { - "legend": false, - "tooltip": false, - "viz": true + "fixedColor": "super-light-green", + "mode": "fixed" } } ] @@ -435,13 +644,13 @@ { "matcher": { "id": "byName", - "options": "Active" + "options": "Idle" }, "properties": [ { "id": "color", "value": { - "fixedColor": "super-light-green", + "fixedColor": "#afafaf", "mode": "fixed" } } @@ -461,6 +670,47 @@ } } ] + }, + { + "matcher": { + "id": "byName", + "options": "Waiting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "active", + "idle", + "idle in transaction" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] } ] }, @@ -468,9 +718,9 @@ "h": 8, "w": 12, "x": 12, - "y": 3 + "y": 21 }, - "id": 5, + "id": 39, "options": { "legend": { "calcs": [ @@ -495,34 +745,13 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "pgwatch_backends_idleintransaction", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "Idle-in-transaction", - "range": true, - "refId": "B", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "pgwatch_backends_active", - "fullMetaSearch": false, + "editorMode": "code", + "expr": "sum by (state) (pgwatch_pg_stat_activity_count{state!=\"idle\"}) > 0", "hide": false, - "includeNullMetadata": true, "instant": false, - "legendFormat": "Active", + "legendFormat": "{{state}}", "range": true, - "refId": "C", - "useBackend": false + "refId": "A" }, { "datasource": { @@ -542,7 +771,7 @@ "useBackend": false } ], - "title": "Non-Idle Sessions", + "title": "Non-idle Sessions", "type": "timeseries" }, { @@ -600,7 +829,8 @@ "value": 80 } ] - } + }, + "unit": "ops" }, "overrides": [] }, @@ -608,7 +838,7 @@ "h": 8, "w": 12, "x": 0, - "y": 11 + "y": 29 }, "id": 6, "options": { @@ -631,12 +861,16 @@ "pluginVersion": "12.0.2", "targets": [ { + "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "20", "legendFormat": "Calls", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Calls (pg_stat_statements)", @@ -661,7 +895,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 12, "gradientMode": "none", "hideFrom": { "legend": false, @@ -679,7 +913,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -697,21 +931,53 @@ "value": 80 } ] - } + }, + "unit": "ops" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rollbacks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Commits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 11 + "y": 29 }, - "id": 9, + "id": 7, "options": { "legend": { "calcs": [], - "displayMode": "list", + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -724,10 +990,29 @@ "pluginVersion": "12.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "builder", + "expr": "sum(irate(pgwatch_db_stats_xact_rollback[$__rate_interval]))", + "hide": false, + "instant": false, + "interval": "20", + "legendFormat": "Rollbacks", + "range": true, + "refId": "B" + }, + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))", + "interval": "20", + "legendFormat": "Commits", + "range": true, "refId": "A" } ], - "title": "TODO: Postgres logs", + "title": "Transactions", "type": "timeseries" }, { @@ -749,7 +1034,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 12, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -767,7 +1052,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -787,50 +1072,19 @@ ] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Rollbacks" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Commits" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 19 + "y": 37 }, - "id": 7, + "id": 9, "options": { "legend": { "calcs": [], - "displayMode": "table", + "displayMode": "list", "placement": "bottom", "showLegend": true }, @@ -843,29 +1097,10 @@ "pluginVersion": "12.0.2", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "editorMode": "builder", - "expr": "sum(irate(pgwatch_db_stats_xact_rollback[$__rate_interval]))", - "hide": false, - "instant": false, - "interval": "20", - "legendFormat": "Rollbacks", - "range": true, - "refId": "B" - }, - { - "editorMode": "code", - "expr": "sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))", - "interval": "20", - "legendFormat": "Commits", - "range": true, "refId": "A" } ], - "title": "Transactions", + "title": "TODO: Postgres logs", "type": "timeseries" }, { @@ -933,7 +1168,7 @@ "h": 8, "w": 12, "x": 12, - "y": 19 + "y": 37 }, "id": 8, "options": { @@ -964,7 +1199,7 @@ "refId": "A" } ], - "title": "Commit ration", + "title": "Commit ratio", "type": "timeseries" }, { @@ -1031,7 +1266,7 @@ "h": 8, "w": 12, "x": 0, - "y": 27 + "y": 45 }, "id": 10, "options": { @@ -1055,7 +1290,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_calls_total_time[$__rate_interval]))", + "expr": "(sum(irate(pgwatch_pg_stat_statements_exec_time_total[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_plan_time_total[$__rate_interval])))/1000", "interval": "20", "legendFormat": "Calls", "range": true, @@ -1129,7 +1364,7 @@ "h": 8, "w": 12, "x": 12, - "y": 27 + "y": 45 }, "id": 11, "options": { @@ -1153,8 +1388,8 @@ "targets": [ { "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum(irate(pgwatch_stat_statements_calls_total_time[$__rate_interval])) / sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "editorMode": "code", + "expr": "(sum(irate(pgwatch_pg_stat_statements_exec_time_total[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_plan_time_total[$__rate_interval]))) / sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1186,7 +1421,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 12, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1230,13 +1465,17 @@ "h": 8, "w": 12, "x": 0, - "y": 35 + "y": 53 }, "id": 12, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -1250,13 +1489,14 @@ "targets": [ { "editorMode": "code", - "expr": "", - "legendFormat": "__auto", + "expr": "sum(irate(pgwatch_pg_stat_statements_rows[$__rate_interval]))", + "interval": "20", + "legendFormat": "No. of rows ", "range": true, "refId": "A" } ], - "title": "TODO: Total rows (pg_stat_statements)", + "title": "Total rows (pg_stat_statements)", "type": "timeseries" }, { @@ -1278,7 +1518,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 12, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1322,13 +1562,17 @@ "h": 8, "w": 12, "x": 12, - "y": 35 + "y": 53 }, "id": 13, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -1342,13 +1586,14 @@ "targets": [ { "editorMode": "code", - "expr": "", - "legendFormat": "__auto", + "expr": "sum(irate(pgwatch_pg_stat_statements_rows[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "interval": "20", + "legendFormat": "Rows per call", "range": true, "refId": "A" } ], - "title": "TODO: Rows per call (pg_stat_statements)", + "title": "Rows per call (pg_stat_statements)", "type": "timeseries" }, { @@ -1415,7 +1660,7 @@ "h": 8, "w": 12, "x": 0, - "y": 43 + "y": 61 }, "id": 14, "options": { @@ -1439,7 +1684,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_blk_read_time[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_read_total[$__rate_interval]))", "interval": "20", "legendFormat": "blk_read_time", "range": true, @@ -1451,7 +1696,7 @@ "uid": "P7A0D6631BB10B34F" }, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_blk_write_time[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total[$__rate_interval]))", "hide": false, "interval": "20", "legendFormat": "blk_read_time", @@ -1526,7 +1771,7 @@ "h": 8, "w": 12, "x": 12, - "y": 43 + "y": 61 }, "id": 15, "options": { @@ -1550,7 +1795,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_blk_read_time[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_read_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "interval": "20", "legendFormat": "blk_read_time", "range": true, @@ -1562,7 +1807,7 @@ "uid": "P7A0D6631BB10B34F" }, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_blk_write_time[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "hide": false, "interval": "20", "legendFormat": "blk_read_time", @@ -1637,7 +1882,7 @@ "h": 8, "w": 12, "x": 0, - "y": 51 + "y": 69 }, "id": 16, "options": { @@ -1662,7 +1907,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval])) * 8192", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1739,7 +1984,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 69 }, "id": 17, "options": { @@ -1764,7 +2009,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1841,7 +2086,7 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 77 }, "id": 18, "options": { @@ -1866,7 +2111,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_shared_blks_read[$__rate_interval])) * 8192", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1943,7 +2188,7 @@ "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 77 }, "id": 19, "options": { @@ -1968,7 +2213,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_read[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2045,7 +2290,7 @@ "h": 8, "w": 12, "x": 0, - "y": 67 + "y": 85 }, "id": 20, "options": { @@ -2070,7 +2315,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_shared_blks_written[$__rate_interval])) * 8192", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2147,7 +2392,7 @@ "h": 8, "w": 12, "x": 12, - "y": 67 + "y": 85 }, "id": 21, "options": { @@ -2172,7 +2417,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_written[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2249,7 +2494,7 @@ "h": 8, "w": 12, "x": 0, - "y": 75 + "y": 93 }, "id": 22, "options": { @@ -2274,7 +2519,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_shared_blks_dirtied[$__rate_interval])) * 8192", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2351,7 +2596,7 @@ "h": 8, "w": 12, "x": 12, - "y": 75 + "y": 93 }, "id": 23, "options": { @@ -2376,7 +2621,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(sum(irate(pgwatch_stat_statements_shared_blks_dirtied[$__rate_interval])) * 8192)/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2454,7 +2699,7 @@ "h": 8, "w": 24, "x": 0, - "y": 83 + "y": 101 }, "id": 24, "options": { @@ -2474,9 +2719,9 @@ "targets": [ { "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval]))/(sum(irate(pgwatch_stat_statements_shared_blks_hit[$__rate_interval])) + sum(irate(pgwatch_stat_statements_shared_blks_read[$__rate_interval]))) * 100", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval]))/(sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total[$__rate_interval]))) * 100", "interval": "20", - "legendFormat": "__auto", + "legendFormat": "shared_blks_read_ratio", "range": true, "refId": "A" } @@ -2548,7 +2793,7 @@ "h": 8, "w": 12, "x": 0, - "y": 91 + "y": 109 }, "id": 25, "options": { @@ -2650,7 +2895,7 @@ "h": 8, "w": 12, "x": 12, - "y": 91 + "y": 109 }, "id": 26, "options": { @@ -2675,7 +2920,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_wal_xlog_location_b[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_wal_xlog_location_b[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2752,7 +2997,7 @@ "h": 8, "w": 12, "x": 0, - "y": 99 + "y": 117 }, "id": 27, "options": { @@ -2777,7 +3022,7 @@ { "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(irate(pgwatch_stat_statements_wal_fpi[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_wal_fpi[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2854,7 +3099,7 @@ "h": 8, "w": 12, "x": 12, - "y": 99 + "y": 117 }, "id": 28, "options": { @@ -2879,7 +3124,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_wal_fpi[$__rate_interval]))/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_wal_fpi[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2956,7 +3201,7 @@ "h": 8, "w": 12, "x": 0, - "y": 107 + "y": 125 }, "id": 29, "options": { @@ -2981,7 +3226,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_temp_blks_read[$__rate_interval]))*8192", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_read[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2996,7 +3241,7 @@ "uid": "P7A0D6631BB10B34F" }, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_temp_blks_written[$__rate_interval]))*8192", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_written[$__rate_interval]))", "hide": false, "interval": "20", "legendFormat": "Temp bytes written", @@ -3071,7 +3316,7 @@ "h": 8, "w": 12, "x": 12, - "y": 107 + "y": 125 }, "id": 31, "options": { @@ -3096,7 +3341,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_temp_blks_read[$__rate_interval]))*8192/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_read[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -3111,7 +3356,7 @@ "uid": "P7A0D6631BB10B34F" }, "editorMode": "code", - "expr": "sum(irate(pgwatch_stat_statements_temp_blks_written[$__rate_interval]))*8192/sum(irate(pgwatch_stat_statements_calls_calls[$__rate_interval]))", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_written[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", "hide": false, "interval": "20", "legendFormat": "Temp bytes written", @@ -3119,7 +3364,7 @@ "refId": "B" } ], - "title": "temp_bytes_read vs temp_bytes_written (pg_stat_statements)", + "title": "temp_bytes_read vs temp_bytes_written per call (pg_stat_statements)", "type": "timeseries" }, { @@ -3139,9 +3384,9 @@ "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 12, + "barWidthFactor": 0.5, + "drawStyle": "bars", + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3189,13 +3434,17 @@ "h": 8, "w": 12, "x": 0, - "y": 115 + "y": 133 }, "id": 32, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -3211,7 +3460,7 @@ "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(pgwatch_locks_mode_count[$__rate_interval]) * 60) by (mode) > 0", + "expr": "sum by (lockmode) (rate(pgwatch_locks_mode_count[$__rate_interval]) * 60)", "fullMetaSearch": false, "groupBy": [ { @@ -3312,7 +3561,8 @@ "value": 80 } ] - } + }, + "unit": "s" }, "overrides": [] }, @@ -3320,7 +3570,7 @@ "h": 8, "w": 12, "x": 12, - "y": 115 + "y": 133 }, "id": 33, "options": { @@ -3328,7 +3578,7 @@ "calcs": [], "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": false }, "tooltip": { "hideZeros": false, @@ -3339,10 +3589,19 @@ "pluginVersion": "12.0.2", "targets": [ { - "refId": "A" + "disableTextWrap": false, + "editorMode": "code", + "expr": "pgwatch_pg_long_running_transactions_age_in_seconds", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "Longest trasaction's age in seconds", + "range": true, + "refId": "A", + "useBackend": false } ], - "title": "TODO: Longest non-idle transaction age, > 1 min", + "title": "Longest non-idle transaction age, > 1 min", "type": "timeseries" }, { @@ -3408,13 +3667,17 @@ "h": 8, "w": 12, "x": 0, - "y": 123 + "y": 141 }, "id": 34, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -3427,10 +3690,14 @@ "pluginVersion": "12.0.2", "targets": [ { + "editorMode": "code", + "expr": "pgwatch_pg_database_wraparound_age_datfrozenxid{datname!~\"template1|postgres\"}", + "legendFormat": "{{datname}}", + "range": true, "refId": "A" } ], - "title": "TODO: Age of the oldest transaction ID that has not been frozen", + "title": "Age of the oldest transaction ID that has not been frozen", "type": "timeseries" }, { @@ -3496,13 +3763,17 @@ "h": 8, "w": 12, "x": 12, - "y": 123 + "y": 141 }, "id": 35, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -3515,10 +3786,14 @@ "pluginVersion": "12.0.2", "targets": [ { + "editorMode": "code", + "expr": "pgwatch_pg_database_wraparound_age_datminmxid{datname!~\"template1|postgres\"}", + "legendFormat": "{{datname}}", + "range": true, "refId": "A" } ], - "title": "TODO: Age of the oldest multi-transaction ID", + "title": "Age of the oldest multi-transaction ID", "type": "timeseries" }, { @@ -3570,10 +3845,6 @@ "steps": [ { "color": "green" - }, - { - "color": "red", - "value": 80 } ] } @@ -3584,7 +3855,7 @@ "h": 8, "w": 12, "x": 0, - "y": 131 + "y": 149 }, "id": 36, "options": { @@ -3592,7 +3863,7 @@ "calcs": [], "displayMode": "list", "placement": "bottom", - "showLegend": true + "showLegend": false }, "tooltip": { "hideZeros": false, @@ -3603,6 +3874,10 @@ "pluginVersion": "12.0.2", "targets": [ { + "editorMode": "code", + "expr": "pgwatch_pg_archiver_pending_wal_count", + "legendFormat": "No of files", + "range": true, "refId": "A" } ], @@ -3672,13 +3947,17 @@ "h": 8, "w": 12, "x": 12, - "y": 131 + "y": 149 }, "id": 37, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -3691,7 +3970,15 @@ "pluginVersion": "12.0.2", "targets": [ { - "refId": "A" + "disableTextWrap": false, + "editorMode": "code", + "expr": "pgwatch_pg_stuck_idle_in_transaction_queries", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "No. of queries", + "range": true, + "refId": "A", + "useBackend": false } ], "title": "TODO: Number of queries stuck in idle in transaction state (>10min)", @@ -3699,18 +3986,19 @@ } ], "preload": false, + "refresh": "auto", "schemaVersion": 41, "tags": [], "templating": { "list": [] }, "time": { - "from": "now-1h", + "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Database Overview", "uid": "f90500a0-a12e-4081-a2f0-07ed96f27915", - "version": 48 + "version": 96 } \ No newline at end of file diff --git a/config/pgwatch-postgres/metrics.yml b/config/pgwatch-postgres/metrics.yml index 92a620c..20ac8fa 100644 --- a/config/pgwatch-postgres/metrics.yml +++ b/config/pgwatch-postgres/metrics.yml @@ -5,7 +5,6 @@ metrics: sqls: 11: |- select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, queryid, query from pg_stat_statements diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index ffd49e5..bb8d30c 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -23,4538 +23,5053 @@ # statement_timeout_seconds: 300 # metric_storage_name: db_stats metrics: - pg_stat_statements_calls: - description: "Simple queryid and calls metric" + archiver: + description: > + This metric retrieves key statistics from the PostgreSQL `pg_stat_archiver` view providing insights into the status of WAL file archiving. + It returns the total number of successfully archived files and failed archiving attempts. Additionally, it identifies if the most recent + attempt resulted in a failure and calculates how many seconds have passed since the last failure. The metric only considers data if WAL + archiving is enabled in the system, helping administrators monitor and diagnose issues related to the archiving process. sqls: 11: |- select /* pgwatch_generated */ (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - queryid::text as tag_queryid, - calls - from pg_stat_statements - where queryid is not null - order by calls desc - limit 10 + archived_count, + failed_count, + case when coalesce(last_failed_time, '1970-01-01'::timestamptz) > coalesce(last_archived_time, '1970-01-01'::timestamptz) then 1 else 0 end as is_failing_int, + extract(epoch from now() - last_failed_time)::int8 as seconds_since_last_failure + from + pg_stat_archiver + where + current_setting('archive_mode') in ('on', 'always') gauges: - - calls - master_only: false - is_instance_level: false - metric_storage_name: pgss_calls - node_status: primary - statement_timeout_seconds: 5 - - archiver: - description: > - This metric retrieves key statistics from the PostgreSQL `pg_stat_archiver` view providing insights into the status of WAL file archiving. - It returns the total number of successfully archived files and failed archiving attempts. Additionally, it identifies if the most recent - attempt resulted in a failure and calculates how many seconds have passed since the last failure. The metric only considers data if WAL - archiving is enabled in the system, helping administrators monitor and diagnose issues related to the archiving process. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - archived_count, - failed_count, - case when coalesce(last_failed_time, '1970-01-01'::timestamptz) > coalesce(last_archived_time, '1970-01-01'::timestamptz) then 1 else 0 end as is_failing_int, - extract(epoch from now() - last_failed_time)::int8 as seconds_since_last_failure - from - pg_stat_archiver - where - current_setting('archive_mode') in ('on', 'always') - gauges: - - is_failing_int - - seconds_since_last_failure - is_instance_level: true + - is_failing_int + - seconds_since_last_failure + is_instance_level: true archiver_pending_count: - description: > - This metric retrieves the count of WAL files waiting to be archived by checking the pg_wal/archive_status directory - for files with .ready extension. It helps monitor the archiving backlog and potential issues with WAL archiving. - sqls: - 10: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - count(*) as archiver_pending_count - from - (select pg_ls_dir('pg_wal/archive_status')) a - where - pg_ls_dir ~ '[0-9A-F]{24}.ready' - 9.4: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - count(*) as archiver_pending_count - from - (select pg_ls_dir('pg_xlog/archive_status')) a - where - pg_ls_dir ~ '[0-9A-F]{24}.ready' - gauges: - - archiver_pending_count - is_instance_level: true + description: > + This metric retrieves the count of WAL files waiting to be archived by checking the pg_wal/archive_status directory + for files with .ready extension. It helps monitor the archiving backlog and potential issues with WAL archiving. + sqls: + 10: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as archiver_pending_count + from + (select pg_ls_dir('pg_wal/archive_status')) a + where + pg_ls_dir ~ '[0-9A-F]{24}.ready' + 9.4: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as archiver_pending_count + from + (select pg_ls_dir('pg_xlog/archive_status')) a + where + pg_ls_dir ~ '[0-9A-F]{24}.ready' + gauges: + - archiver_pending_count + is_instance_level: true backends: - description: > - This metric gathers detailed information from the PostgreSQL pg_stat_activity view, providing an overview of the database's current session - and activity state. It tracks the total number of client backends, active sessions, idle sessions, sessions waiting on locks, and background workers. - The metric also calculates statistics on blocked sessions, most extended waiting times, average and longest session durations, transaction times, - and query durations. Additionally, it monitors autovacuum worker activity and provides the age of the oldest transaction (measured by xmin). - This metric helps administrators monitor session states, detect bottlenecks, and ensure the system is within its connection limits, - providing visibility into database performance and contention. - sqls: - 11: | - with sa_snapshot as ( - select * from pg_stat_activity - where pid != pg_backend_pid() - and datname = current_database() - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - (select count(*) from sa_snapshot where backend_type = 'client backend') as total, - (select count(*) from pg_stat_activity where pid != pg_backend_pid()) as instance_total, - current_setting('max_connections')::int as max_connections, - (select count(*) from sa_snapshot where backend_type = 'background worker') as background_workers, - (select count(*) from sa_snapshot where state = 'active' and backend_type = 'client backend') as active, - (select count(*) from sa_snapshot where state = 'idle' and backend_type = 'client backend') as idle, - (select count(*) from sa_snapshot where state = 'idle in transaction' and backend_type = 'client backend') as idleintransaction, - (select count(*) from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as waiting, - (select coalesce(sum(case when coalesce(array_length(pg_blocking_pids(pid), 1), 0) >= 1 then 1 else 0 end), 0) from sa_snapshot where backend_type = 'client backend' and state = 'active') as blocked, - (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as longest_waiting_seconds, - (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as avg_waiting_seconds, - (select ceil(extract(epoch from (now() - backend_start)))::int from sa_snapshot where backend_type = 'client backend' order by backend_start limit 1) as longest_session_seconds, - (select round(avg(abs(extract(epoch from now() - backend_start)))::numeric, 3)::float from sa_snapshot where backend_type = 'client backend') as avg_session_seconds, - (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where xact_start is not null and backend_type = 'client backend' order by xact_start limit 1) as longest_tx_seconds, - (select round(avg(abs(extract(epoch from now() - xact_start)))::numeric, 3)::float from sa_snapshot where xact_start is not null and backend_type = 'client backend') as avg_tx_seconds, - (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where backend_type = 'autovacuum worker' order by xact_start limit 1) as longest_autovacuum_seconds, - (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where state = 'active' and backend_type = 'client backend') as longest_query_seconds, - (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where state = 'active' and backend_type = 'client backend') as avg_query_seconds, - (select max(age(backend_xmin))::int8 from sa_snapshot) as max_xmin_age_tx, - (select count(*) from sa_snapshot where state = 'active' and backend_type = 'autovacuum worker') as av_workers - gauges: - - '*' + description: > + This metric gathers detailed information from the PostgreSQL pg_stat_activity view, providing an overview of the database's current session + and activity state. It tracks the total number of client backends, active sessions, idle sessions, sessions waiting on locks, and background workers. + The metric also calculates statistics on blocked sessions, most extended waiting times, average and longest session durations, transaction times, + and query durations. Additionally, it monitors autovacuum worker activity and provides the age of the oldest transaction (measured by xmin). + This metric helps administrators monitor session states, detect bottlenecks, and ensure the system is within its connection limits, + providing visibility into database performance and contention. + sqls: + 11: | + with sa_snapshot as ( + select * from pg_stat_activity + where pid != pg_backend_pid() + and datname = current_database() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select count(*) from sa_snapshot where backend_type = 'client backend') as total, + (select count(*) from pg_stat_activity where pid != pg_backend_pid()) as instance_total, + current_setting('max_connections')::int as max_connections, + (select count(*) from sa_snapshot where backend_type = 'background worker') as background_workers, + (select count(*) from sa_snapshot where state = 'active' and backend_type = 'client backend') as active, + (select count(*) from sa_snapshot where state = 'idle' and backend_type = 'client backend') as idle, + (select count(*) from sa_snapshot where state = 'idle in transaction' and backend_type = 'client backend') as idleintransaction, + (select count(*) from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as waiting, + (select coalesce(sum(case when coalesce(array_length(pg_blocking_pids(pid), 1), 0) >= 1 then 1 else 0 end), 0) from sa_snapshot where backend_type = 'client backend' and state = 'active') as blocked, + (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as longest_waiting_seconds, + (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as avg_waiting_seconds, + (select ceil(extract(epoch from (now() - backend_start)))::int from sa_snapshot where backend_type = 'client backend' order by backend_start limit 1) as longest_session_seconds, + (select round(avg(abs(extract(epoch from now() - backend_start)))::numeric, 3)::float from sa_snapshot where backend_type = 'client backend') as avg_session_seconds, + (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where xact_start is not null and backend_type = 'client backend' order by xact_start limit 1) as longest_tx_seconds, + (select round(avg(abs(extract(epoch from now() - xact_start)))::numeric, 3)::float from sa_snapshot where xact_start is not null and backend_type = 'client backend') as avg_tx_seconds, + (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where backend_type = 'autovacuum worker' order by xact_start limit 1) as longest_autovacuum_seconds, + (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where state = 'active' and backend_type = 'client backend') as longest_query_seconds, + (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where state = 'active' and backend_type = 'client backend') as avg_query_seconds, + (select max(age(backend_xmin))::int8 from sa_snapshot) as max_xmin_age_tx, + (select count(*) from sa_snapshot where state = 'active' and backend_type = 'autovacuum worker') as av_workers + gauges: + - '*' backup_age_pgbackrest: - description: > - This metric retrieves the age of the last successful pgBackRest backup in seconds. It uses the `pgbackrest --output=json info` command to fetch - the backup information and calculates the age based on the current time and the timestamp of the last backup. The metric returns a retcode of 0 - on success, along with the age in seconds and a message indicating the status. - Expects pgBackRest is correctly configured on monitored DB and "jq" tool is installed on the DB server. - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - retcode, - backup_age_seconds, - message - from - get_backup_age_pgbackrest() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_backup_age_pgbackrest(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS - $$ - import time - import json - import subprocess - - PGBACKREST_TIMEOUT = 30 - - def error(message, returncode=1): - return returncode, 1000000, 'Not OK. '+message - - pgbackrest_cmd=["pgbackrest", "--output=json", "info"] - + description: > + This metric retrieves the age of the last successful pgBackRest backup in seconds. It uses the `pgbackrest --output=json info` command to fetch + the backup information and calculates the age based on the current time and the timestamp of the last backup. The metric returns a retcode of 0 + on success, along with the age in seconds and a message indicating the status. + Expects pgBackRest is correctly configured on monitored DB and "jq" tool is installed on the DB server. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + retcode, + backup_age_seconds, + message + from + get_backup_age_pgbackrest() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_backup_age_pgbackrest(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS + $$ + import time + import json + import subprocess + + PGBACKREST_TIMEOUT = 30 + + def error(message, returncode=1): + return returncode, 1000000, 'Not OK. '+message + + pgbackrest_cmd=["pgbackrest", "--output=json", "info"] + + try: + p = subprocess.Popen(pgbackrest_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8') + stdout, stderr = p.communicate(timeout=PGBACKREST_TIMEOUT) + except OSError as e: + return error('Failed to execute pgbackrest: {}'.format(e)) + except subprocess.TimeoutExpired: + p.terminate() try: - p = subprocess.Popen(pgbackrest_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8') - stdout, stderr = p.communicate(timeout=PGBACKREST_TIMEOUT) - except OSError as e: - return error('Failed to execute pgbackrest: {}'.format(e)) + p.wait(0.5) except subprocess.TimeoutExpired: - p.terminate() - try: - p.wait(0.5) - except subprocess.TimeoutExpired: - p.kill() - return error('pgbackrest failed to respond in {} seconds'.format(PGBACKREST_TIMEOUT)) - - if p.returncode != 0: - return error('Failed on "pgbackrest info" call', returncode=p.returncode) - - try: - data = json.loads(stdout) - backup_age_seconds = int(time.time()) - data[0]['backup'][-1]['timestamp']['stop'] - return 0, backup_age_seconds, 'OK. Last backup age in seconds: {}'.format(backup_age_seconds) - except (json.JSONDecodeError, KeyError) : - return error('Failed to parse pgbackrest output') - $$ LANGUAGE plpython3u VOLATILE; - - ALTER FUNCTION get_backup_age_pgbackrest() SET statement_timeout TO '30s'; - - GRANT EXECUTE ON FUNCTION get_backup_age_pgbackrest() TO pgwatch; - - COMMENT ON FUNCTION get_backup_age_pgbackrest() is 'created for pgwatch'; - is_instance_level: true + p.kill() + return error('pgbackrest failed to respond in {} seconds'.format(PGBACKREST_TIMEOUT)) + + if p.returncode != 0: + return error('Failed on "pgbackrest info" call', returncode=p.returncode) + + try: + data = json.loads(stdout) + backup_age_seconds = int(time.time()) - data[0]['backup'][-1]['timestamp']['stop'] + return 0, backup_age_seconds, 'OK. Last backup age in seconds: {}'.format(backup_age_seconds) + except (json.JSONDecodeError, KeyError) : + return error('Failed to parse pgbackrest output') + $$ LANGUAGE plpython3u VOLATILE; + + ALTER FUNCTION get_backup_age_pgbackrest() SET statement_timeout TO '30s'; + + GRANT EXECUTE ON FUNCTION get_backup_age_pgbackrest() TO pgwatch; + + COMMENT ON FUNCTION get_backup_age_pgbackrest() is 'created for pgwatch'; + is_instance_level: true backup_age_walg: - description: > - Retrieves the age of the last successful WAL-G backup in seconds. It uses the `wal-g backup-list --json` command to fetch - the backup information and calculates the age based on the current time and the timestamp of the last backup. - The metric returns a retcode of 0 on success, along with the age in seconds and a message indicating the status. - Expects .wal-g.json is correctly configured with all necessary credentials and "jq" tool is installed on the DB server. - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - retcode, - backup_age_seconds, - message - from - get_backup_age_walg() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_backup_age_walg(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS - $$ - import subprocess - retcode=1 - backup_age_seconds=1000000 - message='' - - # get latest wal-g backup timestamp - walg_last_backup_cmd="""wal-g backup-list --json | jq -r '.[0].time'""" - p = subprocess.run(walg_last_backup_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) - if p.returncode != 0: - # plpy.notice("p.stdout: " + str(p.stderr) + str(p.stderr)) - return p.returncode, backup_age_seconds, 'Not OK. Failed on wal-g backup-list call' - - # plpy.notice("last_tz: " + last_tz) - last_tz=p.stdout.rstrip('\n\r') - - # get seconds since last backup from WAL-G timestamp in format '2020-01-22T17:50:51Z' - try: - plan = plpy.prepare("SELECT extract(epoch from now() - $1::timestamptz)::int AS backup_age_seconds;", ["text"]) - rv = plpy.execute(plan, [last_tz]) - except Exception as e: - return retcode, backup_age_seconds, 'Not OK. Failed to convert WAL-G backup timestamp to seconds' - else: - backup_age_seconds = rv[0]["backup_age_seconds"] - return 0, backup_age_seconds, 'OK. Last backup age in seconds: %s' % backup_age_seconds - - $$ LANGUAGE plpython3u VOLATILE; - - /* contacting S3 could be laggy depending on location */ - ALTER FUNCTION get_backup_age_walg() SET statement_timeout TO '30s'; - - GRANT EXECUTE ON FUNCTION get_backup_age_walg() TO pgwatch; - - COMMENT ON FUNCTION get_backup_age_walg() is 'created for pgwatch'; - is_instance_level: true + description: > + Retrieves the age of the last successful WAL-G backup in seconds. It uses the `wal-g backup-list --json` command to fetch + the backup information and calculates the age based on the current time and the timestamp of the last backup. + The metric returns a retcode of 0 on success, along with the age in seconds and a message indicating the status. + Expects .wal-g.json is correctly configured with all necessary credentials and "jq" tool is installed on the DB server. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + retcode, + backup_age_seconds, + message + from + get_backup_age_walg() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_backup_age_walg(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS + $$ + import subprocess + retcode=1 + backup_age_seconds=1000000 + message='' + + # get latest wal-g backup timestamp + walg_last_backup_cmd="""wal-g backup-list --json | jq -r '.[0].time'""" + p = subprocess.run(walg_last_backup_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + if p.returncode != 0: + # plpy.notice("p.stdout: " + str(p.stderr) + str(p.stderr)) + return p.returncode, backup_age_seconds, 'Not OK. Failed on wal-g backup-list call' + + # plpy.notice("last_tz: " + last_tz) + last_tz=p.stdout.rstrip('\n\r') + + # get seconds since last backup from WAL-G timestamp in format '2020-01-22T17:50:51Z' + try: + plan = plpy.prepare("SELECT extract(epoch from now() - $1::timestamptz)::int AS backup_age_seconds;", ["text"]) + rv = plpy.execute(plan, [last_tz]) + except Exception as e: + return retcode, backup_age_seconds, 'Not OK. Failed to convert WAL-G backup timestamp to seconds' + else: + backup_age_seconds = rv[0]["backup_age_seconds"] + return 0, backup_age_seconds, 'OK. Last backup age in seconds: %s' % backup_age_seconds + + $$ LANGUAGE plpython3u VOLATILE; + + /* contacting S3 could be laggy depending on location */ + ALTER FUNCTION get_backup_age_walg() SET statement_timeout TO '30s'; + + GRANT EXECUTE ON FUNCTION get_backup_age_walg() TO pgwatch; + + COMMENT ON FUNCTION get_backup_age_walg() is 'created for pgwatch'; + is_instance_level: true bgwriter: - description: > - Retrieves key statistics from the PostgreSQL `pg_stat_bgwriter` view, providing insights into the background writer's performance. - It returns the number of timed and requested checkpoints, checkpoint write and sync times, buffer statistics, and the last reset time. - This metric helps administrators monitor the background writer's activity and its impact on database performance. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - checkpoints_timed, - checkpoints_req, - checkpoint_write_time, - checkpoint_sync_time, - buffers_checkpoint, - buffers_clean, - maxwritten_clean, - buffers_backend, - buffers_backend_fsync, - buffers_alloc - from - pg_stat_bgwriter - 17: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - buffers_clean, - maxwritten_clean, - buffers_alloc, - (extract(epoch from now() - stats_reset))::int as last_reset_s - from - pg_stat_bgwriter - node_status: primary - is_instance_level: true + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_bgwriter` view, providing insights into the background writer's performance. + It returns the number of timed and requested checkpoints, checkpoint write and sync times, buffer statistics, and the last reset time. + This metric helps administrators monitor the background writer's activity and its impact on database performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + checkpoints_timed, + checkpoints_req, + checkpoint_write_time, + checkpoint_sync_time, + buffers_checkpoint, + buffers_clean, + maxwritten_clean, + buffers_backend, + buffers_backend_fsync, + buffers_alloc + from + pg_stat_bgwriter + 17: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + buffers_clean, + maxwritten_clean, + buffers_alloc, + (extract(epoch from now() - stats_reset))::int as last_reset_s + from + pg_stat_bgwriter + node_status: primary + is_instance_level: true buffercache_by_db: - description: > - Retrieves buffer cache statistics grouped by database, providing insights into the size of buffers used by each database. - It calculates the total size of buffers in bytes for each database. - This metric helps administrators monitor buffer usage across different databases in the PostgreSQL instance. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - datname as tag_database, - count(*) * (current_setting('block_size')::int8) as size_b - FROM - pg_buffercache AS b, - pg_database AS d - WHERE - d.oid = b.reldatabase - GROUP BY - datname - gauges: - - '*' - is_instance_level: true + description: > + Retrieves buffer cache statistics grouped by database, providing insights into the size of buffers used by each database. + It calculates the total size of buffers in bytes for each database. + This metric helps administrators monitor buffer usage across different databases in the PostgreSQL instance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + datname as tag_database, + count(*) * (current_setting('block_size')::int8) as size_b + FROM + pg_buffercache AS b, + pg_database AS d + WHERE + d.oid = b.reldatabase + GROUP BY + datname + gauges: + - '*' + is_instance_level: true buffercache_by_type: - description: > - Retrieves buffer cache statistics grouped by relation type, providing insights into the size of buffers used - by different relation kinds. It calculates the total size of buffers in bytes for each relation kind - (e.g., Table, Index, Toast, Materialized view). This metric helps administrators monitor buffer usage across - different relation types in the PostgreSQL instance. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - CASE - WHEN relkind = 'r' THEN 'Table' -- TODO all relkinds covered? - WHEN relkind = 'i' THEN 'Index' - WHEN relkind = 't' THEN 'Toast' - WHEN relkind = 'm' THEN 'Materialized view' - ELSE 'Other' - END as tag_relkind, - count(*) * (current_setting('block_size')::int8) size_b - FROM - pg_buffercache AS b, - pg_class AS d - WHERE - d.oid = b.relfilenode - GROUP BY - relkind - gauges: - - '*' - is_instance_level: true + description: > + Retrieves buffer cache statistics grouped by relation type, providing insights into the size of buffers used + by different relation kinds. It calculates the total size of buffers in bytes for each relation kind + (e.g., Table, Index, Toast, Materialized view). This metric helps administrators monitor buffer usage across + different relation types in the PostgreSQL instance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + CASE + WHEN relkind = 'r' THEN 'Table' -- TODO all relkinds covered? + WHEN relkind = 'i' THEN 'Index' + WHEN relkind = 't' THEN 'Toast' + WHEN relkind = 'm' THEN 'Materialized view' + ELSE 'Other' + END as tag_relkind, + count(*) * (current_setting('block_size')::int8) size_b + FROM + pg_buffercache AS b, + pg_class AS d + WHERE + d.oid = b.relfilenode + GROUP BY + relkind + gauges: + - '*' + is_instance_level: true change_events: - description: > - The "change_events" built-in metric tracks DDL & config changes. Internally, it uses some other * - _hashes metrics that are not meant to be used independently. Such metrics should not be removed. - sqls: - 11: "" + description: > + The "change_events" built-in metric tracks DDL & config changes. Internally, it uses some other * + _hashes metrics that are not meant to be used independently. Such metrics should not be removed. + sqls: + 11: "" checkpointer: - description: > - Retrieves key statistics from the PostgreSQL `pg_stat_checkpointer` view, providing insights into the checkpointer's performance. - It returns the number of timed and requested checkpoints, restart points, write and sync times, and buffer statistics. - This metric helps administrators monitor the checkpointer's activity and its impact on database performance. - sqls: - 11: "; -- covered by bgwriter" - 17: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - num_timed, - num_requested, - restartpoints_timed, - restartpoints_req, - restartpoints_done, - write_time, - sync_time, - buffers_written, - (extract(epoch from now() - stats_reset))::int as last_reset_s - from - pg_stat_checkpointer + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_checkpointer` view, providing insights into the checkpointer's performance. + It returns the number of timed and requested checkpoints, restart points, write and sync times, and buffer statistics. + This metric helps administrators monitor the checkpointer's activity and its impact on database performance. + sqls: + 11: "; -- covered by bgwriter" + 17: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + num_timed, + num_requested, + restartpoints_timed, + restartpoints_req, + restartpoints_done, + write_time, + sync_time, + buffers_written, + (extract(epoch from now() - stats_reset))::int as last_reset_s + from + pg_stat_checkpointer configuration_hashes: - description: > - Retrieves configuration settings from the PostgreSQL `pg_settings` view, providing insights into the current configuration of the database. - This metric helps administrators monitor changes applied to the database configuration. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - name as tag_setting, - coalesce(reset_val, '') as value - from - pg_settings - where - name <> 'connection_ID' + description: > + Retrieves configuration settings from the PostgreSQL `pg_settings` view, providing insights into the current configuration of the database. + This metric helps administrators monitor changes applied to the database configuration. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + name as tag_setting, + coalesce(reset_val, '') as value + from + pg_settings + where + name <> 'connection_ID' cpu_load: - description: > - Retrieves the system load average for the last 1, 5, and 15 minutes using a custom PL/Python function. - This metric provides insights into the CPU load on the PostgreSQL server, helping administrators monitor system performance. - The function uses the `os.getloadavg()` method to fetch the load averages. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - round(load_1min::numeric, 2)::float as load_1min, - round(load_5min::numeric, 2)::float as load_5min, - round(load_15min::numeric, 2)::float as load_15min - from - get_load_average(); - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - CREATE OR REPLACE FUNCTION get_load_average(OUT load_1min float, OUT load_5min float, OUT load_15min float) AS - $$ - from os import getloadavg - la = getloadavg() - return [la[0], la[1], la[2]] - $$ LANGUAGE plpython3u VOLATILE; - GRANT EXECUTE ON FUNCTION get_load_average() TO pgwatch; - COMMENT ON FUNCTION get_load_average() is 'created for pgwatch'; - gauges: - - '*' - is_instance_level: true + description: > + Retrieves the system load average for the last 1, 5, and 15 minutes using a custom PL/Python function. + This metric provides insights into the CPU load on the PostgreSQL server, helping administrators monitor system performance. + The function uses the `os.getloadavg()` method to fetch the load averages. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + round(load_1min::numeric, 2)::float as load_1min, + round(load_5min::numeric, 2)::float as load_5min, + round(load_15min::numeric, 2)::float as load_15min + from + get_load_average(); + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + CREATE OR REPLACE FUNCTION get_load_average(OUT load_1min float, OUT load_5min float, OUT load_15min float) AS + $$ + from os import getloadavg + la = getloadavg() + return [la[0], la[1], la[2]] + $$ LANGUAGE plpython3u VOLATILE; + GRANT EXECUTE ON FUNCTION get_load_average() TO pgwatch; + COMMENT ON FUNCTION get_load_average() is 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true database_conflicts: - description: > - Retrieves conflict statistics from the PostgreSQL `pg_stat_database_conflicts` view, providing insights into conflicts that have occurred - in the current database. It returns the number of conflicts related to tablespace, lock, snapshot, buffer pin, and deadlock. - This metric helps administrators monitor and diagnose issues related to database conflicts. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - confl_tablespace, - confl_lock, - confl_snapshot, - confl_bufferpin, - confl_deadlock - FROM - pg_stat_database_conflicts - WHERE - datname = current_database() - node_status: standby + description: > + Retrieves conflict statistics from the PostgreSQL `pg_stat_database_conflicts` view, providing insights into conflicts that have occurred + in the current database. It returns the number of conflicts related to tablespace, lock, snapshot, buffer pin, and deadlock. + This metric helps administrators monitor and diagnose issues related to database conflicts. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + confl_tablespace, + confl_lock, + confl_snapshot, + confl_bufferpin, + confl_deadlock + FROM + pg_stat_database_conflicts + WHERE + datname = current_database() + node_status: standby datfrozenxid: - description: > - This metric tracks transaction ID and multixact ID ages to monitor wraparound risk. It retrieves the age - of the oldest datfrozenxid and datminmxid from pg_database for the current database, helping administrators - monitor and prevent transaction ID wraparound which can cause database shutdowns. - sqls: - 9.3: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - age(datfrozenxid) as datfrozenxid_age, - mxid_age(datminmxid) as datminmxid_age - from - pg_database - where - datname = current_database() - gauges: - - datfrozenxid_age - - datminmxid_age + description: > + This metric tracks transaction ID and multixact ID ages to monitor wraparound risk. It retrieves the age + of the oldest datfrozenxid and datminmxid from pg_database for the current database, helping administrators + monitor and prevent transaction ID wraparound which can cause database shutdowns. + sqls: + 9.3: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + age(datfrozenxid) as datfrozenxid_age, + mxid_age(datminmxid) as datminmxid_age + from + pg_database + where + datname = current_database() + gauges: + - datfrozenxid_age + - datminmxid_age db_size: - description: > - Retrieves the size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. - It returns the size in bytes for both the current database and the catalog schema. - This metric helps administrators monitor database size and storage consumption. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - pg_database_size(current_database()) as size_b, - (select sum(pg_total_relation_size(c.oid))::int8 - from pg_class c join pg_namespace n on n.oid = c.relnamespace - where nspname = 'pg_catalog' and relkind = 'r' - ) as catalog_size_b - gauges: - - '*' - statement_timeout_seconds: 300 + description: > + Retrieves the size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. + It returns the size in bytes for both the current database and the catalog schema. + This metric helps administrators monitor database size and storage consumption. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pg_database_size(current_database()) as size_b, + (select sum(pg_total_relation_size(c.oid))::int8 + from pg_class c join pg_namespace n on n.oid = c.relnamespace + where nspname = 'pg_catalog' and relkind = 'r' + ) as catalog_size_b + gauges: + - '*' + statement_timeout_seconds: 300 db_size_approx: - description: > - Retrieves an approximate size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. - It returns the size in bytes for both the current database and the catalog schema. - This metric helps administrators monitor database size and storage consumption. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - current_setting('block_size')::int8 * ( - select sum(relpages) from pg_class c - join pg_namespace n on n.oid = c.relnamespace - where c.relpersistence != 't' - ) as size_b, - current_setting('block_size')::int8 * ( - select sum(c.relpages + coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0)) - from pg_class c - join pg_namespace n on n.oid = c.relnamespace - left join pg_class ct on ct.oid = c.reltoastrelid - left join pg_index ti on ti.indrelid = ct.oid - left join pg_class cti on cti.oid = ti.indexrelid - where nspname = 'pg_catalog' - and (c.relkind = 'r' - or c.relkind = 'i' and not c.relname ~ '^pg_toast') - ) as catalog_size_b - gauges: - - '*' - metric_storage_name: db_size + description: > + Retrieves an approximate size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. + It returns the size in bytes for both the current database and the catalog schema. + This metric helps administrators monitor database size and storage consumption. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_setting('block_size')::int8 * ( + select sum(relpages) from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where c.relpersistence != 't' + ) as size_b, + current_setting('block_size')::int8 * ( + select sum(c.relpages + coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0)) + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + left join pg_class ct on ct.oid = c.reltoastrelid + left join pg_index ti on ti.indrelid = ct.oid + left join pg_class cti on cti.oid = ti.indexrelid + where nspname = 'pg_catalog' + and (c.relkind = 'r' + or c.relkind = 'i' and not c.relname ~ '^pg_toast') + ) as catalog_size_b + gauges: + - '*' + metric_storage_name: db_size db_stats: - description: > - Retrieves key statistics from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. - It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, - deadlocks, block read and write times, postmaster uptime, backup duration, recovery status, system identifier, and invalid indexes. - This metric helps administrators monitor database activity and performance. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - numbackends, - xact_commit, - xact_rollback, - blks_read, - blks_hit, - tup_returned, - tup_fetched, - tup_inserted, - tup_updated, - tup_deleted, - conflicts, - temp_files, - temp_bytes, - deadlocks, - blk_read_time, - blk_write_time, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - system_identifier::text as tag_sys_id, - (select count(*) from pg_index i - where not indisvalid - and not exists ( /* leave out ones that are being actively rebuilt */ - select * from pg_locks l - join pg_stat_activity a using (pid) - where l.relation = i.indexrelid - and a.state = 'active' - and a.query ~* 'concurrently' - )) as invalid_indexes - from - pg_stat_database, pg_control_system() - where - datname = current_database() - 12: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - numbackends, - xact_commit, - xact_rollback, - blks_read, - blks_hit, - tup_returned, - tup_fetched, - tup_inserted, - tup_updated, - tup_deleted, - conflicts, - temp_files, - temp_bytes, - deadlocks, - blk_read_time, - blk_write_time, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, - checksum_failures, - extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - system_identifier::text as tag_sys_id, - (select count(*) from pg_index i - where not indisvalid - and not exists ( /* leave out ones that are being actively rebuilt */ - select * from pg_locks l - join pg_stat_activity a using (pid) - where l.relation = i.indexrelid - and a.state = 'active' - and a.query ~* 'concurrently' - )) as invalid_indexes - from - pg_stat_database, pg_control_system() - where - datname = current_database() - 14: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - numbackends, - xact_commit, - xact_rollback, - blks_read, - blks_hit, - tup_returned, - tup_fetched, - tup_inserted, - tup_updated, - tup_deleted, - conflicts, - temp_files, - temp_bytes, - deadlocks, - blk_read_time, - blk_write_time, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, - checksum_failures, - extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - system_identifier::text as tag_sys_id, - session_time::int8, - active_time::int8, - idle_in_transaction_time::int8, - sessions, - sessions_abandoned, - sessions_fatal, - sessions_killed, - (select count(*) from pg_index i - where not indisvalid - and not exists ( /* leave out ones that are being actively rebuilt */ - select * from pg_locks l - join pg_stat_activity a using (pid) - where l.relation = i.indexrelid - and a.state = 'active' - and a.query ~* 'concurrently' - )) as invalid_indexes - from - pg_stat_database, pg_control_system() - where - datname = current_database() - 15: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - numbackends, - xact_commit, - xact_rollback, - blks_read, - blks_hit, - tup_returned, - tup_fetched, - tup_inserted, - tup_updated, - tup_deleted, - conflicts, - temp_files, - temp_bytes, - deadlocks, - blk_read_time, - blk_write_time, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - checksum_failures, - extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - system_identifier::text as tag_sys_id, - session_time::int8, - active_time::int8, - idle_in_transaction_time::int8, - sessions, - sessions_abandoned, - sessions_fatal, - sessions_killed, - (select count(*) from pg_index i - where not indisvalid - and not exists ( /* leave out ones that are being actively rebuilt */ - select * from pg_locks l - join pg_stat_activity a using (pid) - where l.relation = i.indexrelid - and a.state = 'active' - and a.query ~* 'concurrently' - )) as invalid_indexes - from - pg_stat_database, pg_control_system() - where - datname = current_database() - gauges: - - numbackends - - postmaster_uptime_s - - backup_duration_s - - backup_duration_s - - checksum_last_failure_s + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. + It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, + deadlocks, block read and write times, postmaster uptime, backup duration, recovery status, system identifier, and invalid indexes. + This metric helps administrators monitor database activity and performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 12: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + session_time::int8, + active_time::int8, + idle_in_transaction_time::int8, + sessions, + sessions_abandoned, + sessions_fatal, + sessions_killed, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 15: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + session_time::int8, + active_time::int8, + idle_in_transaction_time::int8, + sessions, + sessions_abandoned, + sessions_fatal, + sessions_killed, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + gauges: + - numbackends + - postmaster_uptime_s + - backup_duration_s + - backup_duration_s + - checksum_last_failure_s db_stats_aurora: - description: > - Retrieves key statistics from the PostgreSQL `pg_stat_database` view for Amazon Aurora PostgreSQL, providing insights into the current database's performance. - It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, - deadlocks, block read and write times, postmaster uptime, recovery status, system identifier, and invalid indexes. - This metric helps administrators monitor database activity and performance in an Aurora PostgreSQL environment. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - numbackends, - xact_commit, - xact_rollback, - blks_read, - blks_hit, - tup_returned, - tup_fetched, - tup_inserted, - tup_updated, - tup_deleted, - conflicts, - temp_files, - temp_bytes, - deadlocks, - blk_read_time, - blk_write_time, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - system_identifier::text as tag_sys_id - from - pg_stat_database, pg_control_system() - where - datname = current_database() - gauges: - - numbackends - - postmaster_uptime_s - - backup_duration_s - - checksum_last_failure_s - metric_storage_name: db_stats + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_database` view for Amazon Aurora PostgreSQL, providing insights into the current database's performance. + It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, + deadlocks, block read and write times, postmaster uptime, recovery status, system identifier, and invalid indexes. + This metric helps administrators monitor database activity and performance in an Aurora PostgreSQL environment. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id + from + pg_stat_database, pg_control_system() + where + datname = current_database() + gauges: + - numbackends + - postmaster_uptime_s + - backup_duration_s + - checksum_last_failure_s + metric_storage_name: db_stats index_hashes: - description: > - Retrieves the hash of index definitions in the PostgreSQL database, providing a way to track changes in index definitions over time. - This metric helps administrators monitor index changes and ensure consistency in index definitions. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - quote_ident(nspname)||'.'||quote_ident(c.relname) as tag_index, - quote_ident(nspname)||'.'||quote_ident(r.relname) as "table", - i.indisvalid::text as is_valid, - coalesce(md5(pg_get_indexdef(i.indexrelid)), random()::text) as md5 - from - pg_index i - join - pg_class c on c.oid = i.indexrelid - join - pg_class r on r.oid = i.indrelid - join - pg_namespace n on n.oid = c.relnamespace - where - c.relnamespace not in (select oid from pg_namespace where nspname like any(array[E'pg\\_%', 'information_schema'])) + description: > + Retrieves the hash of index definitions in the PostgreSQL database, providing a way to track changes in index definitions over time. + This metric helps administrators monitor index changes and ensure consistency in index definitions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(nspname)||'.'||quote_ident(c.relname) as tag_index, + quote_ident(nspname)||'.'||quote_ident(r.relname) as "table", + i.indisvalid::text as is_valid, + coalesce(md5(pg_get_indexdef(i.indexrelid)), random()::text) as md5 + from + pg_index i + join + pg_class c on c.oid = i.indexrelid + join + pg_class r on r.oid = i.indrelid + join + pg_namespace n on n.oid = c.relnamespace + where + c.relnamespace not in (select oid from pg_namespace where nspname like any(array[E'pg\\_%', 'information_schema'])) index_stats: - description: > - Retrieves detailed statistics about indexes in the PostgreSQL database, including index size, scan counts, tuple read and fetch counts, - block read and hit counts, and index validity. It also identifies the largest, most scanned, and unused indexes. - This metric helps administrators monitor index performance and identify potential issues with unused or invalid indexes. - sqls: - 11: |- - /* does not return all index stats but biggest, top scanned and biggest unused ones */ - WITH q_locked_rels AS ( - select relation from pg_locks where mode = 'AccessExclusiveLock' - ), - q_index_details AS ( - select - sui.schemaname, - sui.indexrelname, - sui.relname, - sui.indexrelid, - coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, - sui.idx_scan, - sui.idx_tup_read, - sui.idx_tup_fetch, - io.idx_blks_read, - io.idx_blks_hit, - i.indisvalid, - i.indisprimary, - i.indisunique, - i.indisexclusion - from - pg_stat_user_indexes sui - join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid - join pg_index i on i.indexrelid = sui.indexrelid - where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) - and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) - ), - q_top_indexes AS ( - /* biggest */ - select * - from ( - select indexrelid - from q_index_details - where idx_scan > 1 - order by index_size_b desc - limit 200 - ) x - union - /* most block traffic */ - select * - from ( - select indexrelid - from q_index_details - order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc - limit 200 - ) y - union - /* most scans */ - select * - from ( - select indexrelid - from q_index_details - order by idx_scan desc nulls last - limit 200 - ) z - union - /* biggest unused non-constraint */ - select * - from ( - select q.indexrelid - from q_index_details q - where idx_scan = 0 - and not (indisprimary or indisunique or indisexclusion) - order by index_size_b desc - limit 200 - ) z - union - /* all invalid */ - select * - from ( - select q.indexrelid - from q_index_details q - where not indisvalid - ) zz - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - schemaname::text as tag_schema, - indexrelname::text as tag_index_name, - quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, - relname::text as tag_table_name, - quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, - coalesce(idx_scan, 0) as idx_scan, - coalesce(idx_tup_read, 0) as idx_tup_read, - coalesce(idx_tup_fetch, 0) as idx_tup_fetch, - coalesce(index_size_b, 0) as index_size_b, - quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, - md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, - regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, - case when not indisvalid then 1 else 0 end as is_invalid_int, - case when indisprimary then 1 else 0 end as is_pk_int, - case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, - system_identifier::text as tag_sys_id - FROM - q_index_details id - JOIN - pg_control_system() ON true - WHERE - indexrelid IN (select indexrelid from q_top_indexes) - ORDER BY - id.schemaname, id.relname, id.indexrelname - 16: |- - /* NB! does not return all index stats but biggest, top scanned and biggest unused ones */ - WITH q_locked_rels AS ( /* pgwatch_generated */ - select relation from pg_locks where mode = 'AccessExclusiveLock' - ), - q_index_details AS ( - select - sui.schemaname, - sui.indexrelname, - sui.relname, - sui.indexrelid, - coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, - sui.idx_scan, - sui.idx_tup_read, - sui.idx_tup_fetch, - io.idx_blks_read, - io.idx_blks_hit, - i.indisvalid, - i.indisprimary, - i.indisunique, - i.indisexclusion, - extract(epoch from now() - last_idx_scan)::int as last_idx_scan_s - from - pg_stat_user_indexes sui - join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid - join pg_index i on i.indexrelid = sui.indexrelid - where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) - and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) - ), - q_top_indexes AS ( - /* biggest */ - select * - from ( - select indexrelid - from q_index_details - where idx_scan > 1 - order by index_size_b desc - limit 200 - ) x - union - /* most block traffic */ - select * - from ( - select indexrelid - from q_index_details - order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc - limit 200 - ) y - union - /* most scans */ - select * - from ( - select indexrelid - from q_index_details - order by idx_scan desc nulls last - limit 200 - ) z - union - /* biggest unused non-constraint */ - select * - from ( - select q.indexrelid - from q_index_details q - where idx_scan = 0 - and not (indisprimary or indisunique or indisexclusion) - order by index_size_b desc - limit 200 - ) z - union - /* all invalid */ - select * - from ( - select q.indexrelid - from q_index_details q - where not indisvalid - ) zz - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - schemaname::text as tag_schema, - indexrelname::text as tag_index_name, - quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, - relname::text as tag_table_name, - quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, - coalesce(idx_scan, 0) as idx_scan, - coalesce(idx_tup_read, 0) as idx_tup_read, - coalesce(idx_tup_fetch, 0) as idx_tup_fetch, - coalesce(index_size_b, 0) as index_size_b, - quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, - md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, - regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, - case when not indisvalid then 1 else 0 end as is_invalid_int, - case when indisprimary then 1 else 0 end as is_pk_int, - case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, - system_identifier::text as tag_sys_id, - last_idx_scan_s - FROM - q_index_details id - JOIN - pg_control_system() ON true - WHERE - indexrelid IN (select indexrelid from q_top_indexes) - ORDER BY - id.schemaname, id.relname, id.indexrelname + description: > + Retrieves detailed statistics about indexes in the PostgreSQL database, including index size, scan counts, tuple read and fetch counts, + block read and hit counts, and index validity. It also identifies the largest, most scanned, and unused indexes. + This metric helps administrators monitor index performance and identify potential issues with unused or invalid indexes. + sqls: + 11: |- + /* does not return all index stats but biggest, top scanned and biggest unused ones */ + WITH q_locked_rels AS ( + select relation from pg_locks where mode = 'AccessExclusiveLock' + ), + q_index_details AS ( + select + sui.schemaname, + sui.indexrelname, + sui.relname, + sui.indexrelid, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + sui.idx_scan, + sui.idx_tup_read, + sui.idx_tup_fetch, + io.idx_blks_read, + io.idx_blks_hit, + i.indisvalid, + i.indisprimary, + i.indisunique, + i.indisexclusion + from + pg_stat_user_indexes sui + join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid + join pg_index i on i.indexrelid = sui.indexrelid + where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) + and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) + ), + q_top_indexes AS ( + /* biggest */ + select * + from ( + select indexrelid + from q_index_details + where idx_scan > 1 + order by index_size_b desc + limit 200 + ) x + union + /* most block traffic */ + select * + from ( + select indexrelid + from q_index_details + order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc + limit 200 + ) y + union + /* most scans */ + select * + from ( + select indexrelid + from q_index_details + order by idx_scan desc nulls last + limit 200 + ) z + union + /* biggest unused non-constraint */ + select * + from ( + select q.indexrelid + from q_index_details q + where idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + order by index_size_b desc + limit 200 + ) z + union + /* all invalid */ + select * + from ( + select q.indexrelid + from q_index_details q + where not indisvalid + ) zz + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text as tag_schema, + indexrelname::text as tag_index_name, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, + relname::text as tag_table_name, + quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_read, 0) as idx_tup_read, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + coalesce(index_size_b, 0) as index_size_b, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, + md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, + regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, + case when not indisvalid then 1 else 0 end as is_invalid_int, + case when indisprimary then 1 else 0 end as is_pk_int, + case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, + system_identifier::text as tag_sys_id + FROM + q_index_details id + JOIN + pg_control_system() ON true + WHERE + indexrelid IN (select indexrelid from q_top_indexes) + ORDER BY + id.schemaname, id.relname, id.indexrelname + 16: |- + /* NB! does not return all index stats but biggest, top scanned and biggest unused ones */ + WITH q_locked_rels AS ( /* pgwatch_generated */ + select relation from pg_locks where mode = 'AccessExclusiveLock' + ), + q_index_details AS ( + select + sui.schemaname, + sui.indexrelname, + sui.relname, + sui.indexrelid, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + sui.idx_scan, + sui.idx_tup_read, + sui.idx_tup_fetch, + io.idx_blks_read, + io.idx_blks_hit, + i.indisvalid, + i.indisprimary, + i.indisunique, + i.indisexclusion, + extract(epoch from now() - last_idx_scan)::int as last_idx_scan_s + from + pg_stat_user_indexes sui + join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid + join pg_index i on i.indexrelid = sui.indexrelid + where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) + and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) + ), + q_top_indexes AS ( + /* biggest */ + select * + from ( + select indexrelid + from q_index_details + where idx_scan > 1 + order by index_size_b desc + limit 200 + ) x + union + /* most block traffic */ + select * + from ( + select indexrelid + from q_index_details + order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc + limit 200 + ) y + union + /* most scans */ + select * + from ( + select indexrelid + from q_index_details + order by idx_scan desc nulls last + limit 200 + ) z + union + /* biggest unused non-constraint */ + select * + from ( + select q.indexrelid + from q_index_details q + where idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + order by index_size_b desc + limit 200 + ) z + union + /* all invalid */ + select * + from ( + select q.indexrelid + from q_index_details q + where not indisvalid + ) zz + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text as tag_schema, + indexrelname::text as tag_index_name, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, + relname::text as tag_table_name, + quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_read, 0) as idx_tup_read, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + coalesce(index_size_b, 0) as index_size_b, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, + md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, + regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, + case when not indisvalid then 1 else 0 end as is_invalid_int, + case when indisprimary then 1 else 0 end as is_pk_int, + case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, + system_identifier::text as tag_sys_id, + last_idx_scan_s + FROM + q_index_details id + JOIN + pg_control_system() ON true + WHERE + indexrelid IN (select indexrelid from q_top_indexes) + ORDER BY + id.schemaname, id.relname, id.indexrelname instance_up: - description: > - This metric has some special handling attached to it - it will store a 0 value if the database is not accessible. - Thus it can be used to for example calculate some percentual "uptime" indicator. - For standard metrics there will be no data rows stored when the DB is not reachable, but for this one, - there will be a zero stored for the "is_up" column that, under normal operations, would always be 1. - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 1::int as is_up + description: > + This metric has some special handling attached to it - it will store a 0 value if the database is not accessible. + Thus it can be used to for example calculate some percentual "uptime" indicator. + For standard metrics there will be no data rows stored when the DB is not reachable, but for this one, + there will be a zero stored for the "is_up" column that, under normal operations, would always be 1. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 1::int as is_up invalid_indexes: - description: > - Retrieves a list of invalid indexes in the PostgreSQL database, providing insights into indexes that are not valid. - It returns the index name, schema, and whether the index is valid or not. This metric helps administrators identify and address issues with invalid indexes. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - format('%I.%I', n.nspname , ci.relname) as tag_index_full_name, - coalesce(pg_relation_size(indexrelid), 0) as index_size_b - from - pg_index i - join pg_class ci on ci.oid = i.indexrelid - join pg_class cr on cr.oid = i.indrelid - join pg_namespace n on n.oid = ci.relnamespace - where not n.nspname like E'pg\\_temp%' - and not indisvalid - and not exists ( /* leave out ones that are being actively rebuilt */ - select * from pg_locks l - join pg_stat_activity a using (pid) - where l.relation = i.indexrelid - and a.state = 'active' - and a.query ~* 'concurrently' - ) - and not exists (select * from pg_locks where relation = indexrelid and mode = 'AccessExclusiveLock') /* can't get size then */ - order by index_size_b desc - limit 100 + description: > + Retrieves a list of invalid indexes in the PostgreSQL database, providing insights into indexes that are not valid. + It returns the index name, schema, and whether the index is valid or not. This metric helps administrators identify and address issues with invalid indexes. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + format('%I.%I', n.nspname , ci.relname) as tag_index_full_name, + coalesce(pg_relation_size(indexrelid), 0) as index_size_b + from + pg_index i + join pg_class ci on ci.oid = i.indexrelid + join pg_class cr on cr.oid = i.indrelid + join pg_namespace n on n.oid = ci.relnamespace + where not n.nspname like E'pg\\_temp%' + and not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + ) + and not exists (select * from pg_locks where relation = indexrelid and mode = 'AccessExclusiveLock') /* can't get size then */ + order by index_size_b desc + limit 100 kpi: - description: > - Retrieves key performance indicators (KPIs) from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. - It returns the number of backends, active and blocked backends, oldest transaction age, transactions per second (TPS), commit and rollback counts, - buffer read and hit counts, temporary bytes, sequence scans on tables larger than 10MB, tuple statistics, stored procedure calls, - block read and write times, deadlocks, recovery status, and postmaster uptime. - This metric helps administrators monitor database activity and performance. - sqls: - 11: | - WITH q_stat_tables AS ( - SELECT * FROM pg_stat_user_tables t - JOIN pg_class c ON c.oid = t.relid - WHERE NOT schemaname LIKE E'pg\\_temp%' - AND c.relpages > (1e7 / 8) -- >10MB - ), - q_stat_activity AS ( - SELECT * FROM pg_stat_activity - WHERE datname = current_database() AND pid != pg_backend_pid() - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - case - when pg_is_in_recovery() = false then - pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 - else - pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 - end as wal_location_b, - numbackends - 1 as numbackends, - (select count(*) from q_stat_activity where state in ('active', 'idle in transaction')) AS active_backends, - (select count(*) from q_stat_activity where wait_event_type in ('LWLock', 'Lock', 'BufferPin')) AS blocked_backends, - (select round(extract(epoch from now()) - extract(epoch from (select xact_start from q_stat_activity - where datid = d.datid and not query like 'autovacuum:%' order by xact_start limit 1))))::int AS kpi_oldest_tx_s, - xact_commit + xact_rollback AS tps, - xact_commit, - xact_rollback, - blks_read, - blks_hit, - temp_bytes, - (select sum(seq_scan) from q_stat_tables)::int8 AS seq_scans_on_tbls_gt_10mb, - tup_inserted, - tup_updated, - tup_deleted, - (select sum(calls) from pg_stat_user_functions where not schemaname like any(array[E'pg\\_%', 'information_schema']))::int8 AS sproc_calls, - blk_read_time, - blk_write_time, - deadlocks, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s - FROM - pg_stat_database d - WHERE - datname = current_database() - gauges: - - numbackends - - active_backends - - blocked_backends - - kpi_oldest_tx_s + description: > + Retrieves key performance indicators (KPIs) from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. + It returns the number of backends, active and blocked backends, oldest transaction age, transactions per second (TPS), commit and rollback counts, + buffer read and hit counts, temporary bytes, sequence scans on tables larger than 10MB, tuple statistics, stored procedure calls, + block read and write times, deadlocks, recovery status, and postmaster uptime. + This metric helps administrators monitor database activity and performance. + sqls: + 11: | + WITH q_stat_tables AS ( + SELECT * FROM pg_stat_user_tables t + JOIN pg_class c ON c.oid = t.relid + WHERE NOT schemaname LIKE E'pg\\_temp%' + AND c.relpages > (1e7 / 8) -- >10MB + ), + q_stat_activity AS ( + SELECT * FROM pg_stat_activity + WHERE datname = current_database() AND pid != pg_backend_pid() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case + when pg_is_in_recovery() = false then + pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 + else + pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 + end as wal_location_b, + numbackends - 1 as numbackends, + (select count(*) from q_stat_activity where state in ('active', 'idle in transaction')) AS active_backends, + (select count(*) from q_stat_activity where wait_event_type in ('LWLock', 'Lock', 'BufferPin')) AS blocked_backends, + (select round(extract(epoch from now()) - extract(epoch from (select xact_start from q_stat_activity + where datid = d.datid and not query like 'autovacuum:%' order by xact_start limit 1))))::int AS kpi_oldest_tx_s, + xact_commit + xact_rollback AS tps, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + temp_bytes, + (select sum(seq_scan) from q_stat_tables)::int8 AS seq_scans_on_tbls_gt_10mb, + tup_inserted, + tup_updated, + tup_deleted, + (select sum(calls) from pg_stat_user_functions where not schemaname like any(array[E'pg\\_%', 'information_schema']))::int8 AS sproc_calls, + blk_read_time, + blk_write_time, + deadlocks, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s + FROM + pg_stat_database d + WHERE + datname = current_database() + gauges: + - numbackends + - active_backends + - blocked_backends + - kpi_oldest_tx_s locks: - description: > - Retrieves lock statistics from the PostgreSQL `pg_locks` view, providing insights into the types and modes of locks currently held in the database. - It returns the lock type, lock mode, and the count of locks for each type and mode. This metric helps administrators monitor lock contention and performance. - sqls: - 11: |- - WITH q_locks AS ( - select - * - from - pg_locks - where - pid != pg_backend_pid() - and database = (select oid from pg_database where datname = current_database()) - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - locktypes AS tag_locktype, - coalesce((select count(*) FROM q_locks WHERE locktype = locktypes), 0) AS count - FROM - unnest('{relation, extend, page, tuple, transactionid, virtualxid, object, userlock, advisory}'::text[]) locktypes - gauges: - - '*' + description: > + Retrieves lock statistics from the PostgreSQL `pg_locks` view, providing insights into the types and modes of locks currently held in the database. + It returns the lock type, lock mode, and the count of locks for each type and mode. This metric helps administrators monitor lock contention and performance. + sqls: + 11: |- + WITH q_locks AS ( + select + * + from + pg_locks + where + pid != pg_backend_pid() + and database = (select oid from pg_database where datname = current_database()) + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + locktypes AS tag_locktype, + coalesce((select count(*) FROM q_locks WHERE locktype = locktypes), 0) AS count + FROM + unnest('{relation, extend, page, tuple, transactionid, virtualxid, object, userlock, advisory}'::text[]) locktypes + gauges: + - '*' locks_mode: - description: > - Retrieves lock mode statistics from the PostgreSQL `pg_locks` view, providing insights into the different lock modes currently held in the database. - It returns the lock mode and the count of locks for each mode. This metric helps administrators monitor lock contention and performance. - sqls: - 11: |- - WITH q_locks AS ( - select - * - from - pg_locks - where - pid != pg_backend_pid() - and database = (select oid from pg_database where datname = current_database()) - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - lockmodes AS tag_lockmode, - coalesce((select count(*) FROM q_locks WHERE mode = lockmodes), 0) AS count - FROM - unnest('{AccessShareLock, ExclusiveLock, RowShareLock, RowExclusiveLock, ShareLock, ShareRowExclusiveLock, AccessExclusiveLock, ShareUpdateExclusiveLock}'::text[]) lockmodes - gauges: - - '*' + description: > + Retrieves lock mode statistics from the PostgreSQL `pg_locks` view, providing insights into the different lock modes currently held in the database. + It returns the lock mode and the count of locks for each mode. This metric helps administrators monitor lock contention and performance. + sqls: + 11: |- + WITH q_locks AS ( + select + * + from + pg_locks + where + pid != pg_backend_pid() + and database = (select oid from pg_database where datname = current_database()) + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + lockmodes AS tag_lockmode, + coalesce((select count(*) FROM q_locks WHERE mode = lockmodes), 0) AS count + FROM + unnest('{AccessShareLock, ExclusiveLock, RowShareLock, RowExclusiveLock, ShareLock, ShareRowExclusiveLock, AccessExclusiveLock, ShareUpdateExclusiveLock}'::text[]) lockmodes + gauges: + - '*' logical_subscriptions: - description: > - Retrieves information about logical subscriptions in the PostgreSQL database, including their names, enabled status, and the number of relations in each subscription. - It also provides counts of relations in different states (inserted, deleted, synchronized, and replicated). - This metric helps administrators monitor logical replication subscriptions and their statuses. - sqls: - 11: | - with q_sr as ( - select * from pg_subscription_rel - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - subname::text as tag_subname, - subenabled, - (select count(*) from q_sr where srsubid = oid) as relcount, - (select count(*) from q_sr where srsubid = oid and srsubstate = 'i') as state_i, - (select count(*) from q_sr where srsubid = oid and srsubstate = 'd') as state_d, - (select count(*) from q_sr where srsubid = oid and srsubstate = 's') as state_s, - (select count(*) from q_sr where srsubid = oid and srsubstate = 'r') as state_r - from - pg_subscription - where - subdbid = (select oid from pg_database where datname = current_database()) - gauges: - - '*' + description: > + Retrieves information about logical subscriptions in the PostgreSQL database, including their names, enabled status, and the number of relations in each subscription. + It also provides counts of relations in different states (inserted, deleted, synchronized, and replicated). + This metric helps administrators monitor logical replication subscriptions and their statuses. + sqls: + 11: | + with q_sr as ( + select * from pg_subscription_rel + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + subname::text as tag_subname, + subenabled, + (select count(*) from q_sr where srsubid = oid) as relcount, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'i') as state_i, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'd') as state_d, + (select count(*) from q_sr where srsubid = oid and srsubstate = 's') as state_s, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'r') as state_r + from + pg_subscription + where + subdbid = (select oid from pg_database where datname = current_database()) + gauges: + - '*' pgbouncer_stats: - description: > - Retrieves statistics from the PgBouncer connection pooler. - This metric helps administrators monitor PgBouncer performance and connection pooling efficiency. - sqls: - 0: show stats + description: > + Retrieves statistics from the PgBouncer connection pooler. + This metric helps administrators monitor PgBouncer performance and connection pooling efficiency. + sqls: + 0: show stats pgbouncer_clients: - description: > - Retrieves client connection statistics from the PgBouncer connection pooler, providing insights into the current state of client connections. - It returns the number of active, idle, and total client connections, as well as transaction counts and memory usage statistics. - This metric helps administrators monitor PgBouncer client connections and performance. - sqls: - 0: show clients + description: > + Retrieves client connection statistics from the PgBouncer connection pooler, providing insights into the current state of client connections. + It returns the number of active, idle, and total client connections, as well as transaction counts and memory usage statistics. + This metric helps administrators monitor PgBouncer client connections and performance. + sqls: + 0: show clients pgpool_processes: - description: > - Retrieves process statistics from the PgPool connection pooler, providing insights into the current state of PgPool processes. - It returns the number of active, idle, and total processes, as well as memory usage statistics. - This metric helps administrators monitor PgPool process performance and resource utilization. - sqls: - 3: show pool_processes + description: > + Retrieves process statistics from the PgPool connection pooler, providing insights into the current state of PgPool processes. + It returns the number of active, idle, and total processes, as well as memory usage statistics. + This metric helps administrators monitor PgPool process performance and resource utilization. + sqls: + 3: show pool_processes pgpool_stats: - description: > - Retrieves statistics from the PgPool connection pooler, providing insights into the current state of PgPool connections and transactions. - It returns the number of active, idle, and total connections, as well as transaction counts and memory usage statistics. - This metric helps administrators monitor PgPool performance and connection pooling efficiency. - sqls: - 3: show pool_nodes + description: > + Retrieves statistics from the PgPool connection pooler, providing insights into the current state of PgPool connections and transactions. + It returns the number of active, idle, and total connections, as well as transaction counts and memory usage statistics. + This metric helps administrators monitor PgPool performance and connection pooling efficiency. + sqls: + 3: show pool_nodes postgres_role: - description: > - This metric determines the PostgreSQL server role (primary, standby, or standalone) by checking - if the server is in recovery mode and if it has any active replication connections. It returns - an integer value: 0 = standalone, 1 = primary with replicas, 2 = standby/replica. - sqls: - 9.0: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - case pg_is_in_recovery() - when 't' then 2 - else (select case (select count(*) from pg_stat_replication where application_name != 'pg_basebackup') when '0' then 0 else 1 end) - end as in_recovery_int - gauges: - - in_recovery_int - is_instance_level: true + description: > + This metric determines the PostgreSQL server role (primary, standby, or standalone) by checking + if the server is in recovery mode and if it has any active replication connections. It returns + an integer value: 0 = standalone, 1 = primary with replicas, 2 = standby/replica. + sqls: + 9.0: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case pg_is_in_recovery() + when 't' then 2 + else (select case (select count(*) from pg_stat_replication where application_name != 'pg_basebackup') when '0' then 0 else 1 end) + end as in_recovery_int + gauges: + - in_recovery_int + is_instance_level: true privilege_changes: - description: > - Retrieves information about privileges granted to roles on various database objects, including tables, functions, schemas, and databases. - It returns the object type, role name, object name, and privilege type for each privilege granted. - This metric helps administrators monitor and manage database access control and privileges. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch FROM now()) * 1e9)::int8 AS epoch_ns, - * - FROM ( - SELECT - 'table'::text AS object_type, - grantee::text AS tag_role, - quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, - privilege_type - FROM - information_schema.table_privileges - /* includes also VIEW-s actually */ - WHERE - NOT grantee = ANY ( - SELECT - rolname - FROM - pg_roles - WHERE - rolsuper - OR oid < 16384) - AND NOT table_schema IN ('information_schema', 'pg_catalog') - /* - union all - - select - -- quite a heavy query currently, maybe faster directly via pg_attribute + has_column_privilege? - 'column' AS object_type, - grantee::text AS tag_role, - quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, - privilege_type - FROM - information_schema.column_privileges cp - WHERE - NOT table_schema IN ('pg_catalog', 'information_schema') - AND NOT grantee = ANY ( - SELECT - rolname - FROM - pg_roles - WHERE - rolsuper - OR oid < 16384) - AND NOT EXISTS ( - SELECT - * - FROM - information_schema.table_privileges - WHERE - table_schema = cp.table_schema - AND table_name = cp.table_name - AND grantee = cp.grantee - AND privilege_type = cp.privilege_type) */ - UNION ALL - SELECT - 'function' AS object_type, - grantee::text AS tag_role, - quote_ident(routine_schema) || '.' || quote_ident(routine_name) AS tag_object, - privilege_type - FROM - information_schema.routine_privileges - WHERE - NOT routine_schema IN ('information_schema', 'pg_catalog') - AND NOT grantee = ANY ( - SELECT - rolname - FROM - pg_roles - WHERE - rolsuper - OR oid < 16384) - UNION ALL - SELECT - 'schema' AS object_type, - r.rolname::text AS tag_role, - quote_ident(n.nspname) AS tag_object, - p.perm AS privilege_type - FROM - pg_catalog.pg_namespace AS n - CROSS JOIN pg_catalog.pg_roles AS r - CROSS JOIN ( - VALUES ('USAGE'), - ('CREATE')) AS p (perm) - WHERE - NOT n.nspname IN ('information_schema', 'pg_catalog') - AND n.nspname NOT LIKE 'pg_%' - AND NOT r.rolsuper - AND r.oid >= 16384 - AND has_schema_privilege(r.oid, n.oid, p.perm) - UNION ALL - SELECT - 'database' AS object_type, - r.rolname::text AS role_name, - quote_ident(datname) AS tag_object, - p.perm AS permission - FROM - pg_catalog.pg_database AS d - CROSS JOIN pg_catalog.pg_roles AS r - CROSS JOIN ( - VALUES ('CREATE'), - ('CONNECT'), - ('TEMPORARY')) AS p (perm) - WHERE - d.datname = current_database() - AND NOT r.rolsuper - AND r.oid >= 16384 - AND has_database_privilege(r.oid, d.oid, p.perm) - UNION ALL - SELECT - 'superusers' AS object_type, - rolname::text AS role_name, - rolname::text AS tag_object, - 'SUPERUSER' AS permission - FROM - pg_catalog.pg_roles - WHERE - rolsuper - UNION ALL - SELECT - 'login_users' AS object_type, - rolname::text AS role_name, - rolname::text AS tag_object, - 'LOGIN' AS permission - FROM - pg_catalog.pg_roles - WHERE - rolcanlogin) y + description: > + Retrieves information about privileges granted to roles on various database objects, including tables, functions, schemas, and databases. + It returns the object type, role name, object name, and privilege type for each privilege granted. + This metric helps administrators monitor and manage database access control and privileges. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + * + FROM ( + SELECT + 'table'::text AS object_type, + grantee::text AS tag_role, + quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, + privilege_type + FROM + information_schema.table_privileges + /* includes also VIEW-s actually */ + WHERE + NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + AND NOT table_schema IN ('information_schema', 'pg_catalog') + /* + union all + + select + -- quite a heavy query currently, maybe faster directly via pg_attribute + has_column_privilege? + 'column' AS object_type, + grantee::text AS tag_role, + quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, + privilege_type + FROM + information_schema.column_privileges cp + WHERE + NOT table_schema IN ('pg_catalog', 'information_schema') + AND NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + AND NOT EXISTS ( + SELECT + * + FROM + information_schema.table_privileges + WHERE + table_schema = cp.table_schema + AND table_name = cp.table_name + AND grantee = cp.grantee + AND privilege_type = cp.privilege_type) */ + UNION ALL + SELECT + 'function' AS object_type, + grantee::text AS tag_role, + quote_ident(routine_schema) || '.' || quote_ident(routine_name) AS tag_object, + privilege_type + FROM + information_schema.routine_privileges + WHERE + NOT routine_schema IN ('information_schema', 'pg_catalog') + AND NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + UNION ALL + SELECT + 'schema' AS object_type, + r.rolname::text AS tag_role, + quote_ident(n.nspname) AS tag_object, + p.perm AS privilege_type + FROM + pg_catalog.pg_namespace AS n + CROSS JOIN pg_catalog.pg_roles AS r + CROSS JOIN ( + VALUES ('USAGE'), + ('CREATE')) AS p (perm) + WHERE + NOT n.nspname IN ('information_schema', 'pg_catalog') + AND n.nspname NOT LIKE 'pg_%' + AND NOT r.rolsuper + AND r.oid >= 16384 + AND has_schema_privilege(r.oid, n.oid, p.perm) + UNION ALL + SELECT + 'database' AS object_type, + r.rolname::text AS role_name, + quote_ident(datname) AS tag_object, + p.perm AS permission + FROM + pg_catalog.pg_database AS d + CROSS JOIN pg_catalog.pg_roles AS r + CROSS JOIN ( + VALUES ('CREATE'), + ('CONNECT'), + ('TEMPORARY')) AS p (perm) + WHERE + d.datname = current_database() + AND NOT r.rolsuper + AND r.oid >= 16384 + AND has_database_privilege(r.oid, d.oid, p.perm) + UNION ALL + SELECT + 'superusers' AS object_type, + rolname::text AS role_name, + rolname::text AS tag_object, + 'SUPERUSER' AS permission + FROM + pg_catalog.pg_roles + WHERE + rolsuper + UNION ALL + SELECT + 'login_users' AS object_type, + rolname::text AS role_name, + rolname::text AS tag_object, + 'LOGIN' AS permission + FROM + pg_catalog.pg_roles + WHERE + rolcanlogin) y psutil_cpu: - description: > - This metric requires the "psutil" Python package to be installed on the PostgreSQL server. - It provides CPU utilization and load averages using the "psutil" library. - "psutil" is known to behave differently depending on the used version and operating system, so if getting - errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - round(cpu_utilization::numeric, 2)::float as cpu_utilization, - round(load_1m_norm::numeric, 2)::float as load_1m_norm, - round(load_1m::numeric, 2)::float as load_1m, - round(load_5m_norm::numeric, 2)::float as load_5m_norm, - round(load_5m::numeric, 2)::float as load_5m, - round("user"::numeric, 2)::float as "user", - round(system::numeric, 2)::float as system, - round(idle::numeric, 2)::float as idle, - round(iowait::numeric, 2)::float as iowait, - round(irqs::numeric, 2)::float as irqs, - round(other::numeric, 2)::float as other - from - get_psutil_cpu() - init_sql: | - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_psutil_cpu( - OUT cpu_utilization float8, OUT load_1m_norm float8, OUT load_1m float8, OUT load_5m_norm float8, OUT load_5m float8, - OUT "user" float8, OUT system float8, OUT idle float8, OUT iowait float8, OUT irqs float8, OUT other float8 - ) - LANGUAGE plpython3u - AS $FUNCTION$ - - from os import getloadavg - from psutil import cpu_times_percent, cpu_percent, cpu_count - from threading import Thread - - class GetCpuPercentThread(Thread): - def __init__(self, interval_seconds): - self.interval_seconds = interval_seconds - self.cpu_utilization_info = None - super(GetCpuPercentThread, self).__init__() - - def run(self): - self.cpu_utilization_info = cpu_percent(self.interval_seconds) - - t = GetCpuPercentThread(0.5) - t.start() - - ct = cpu_times_percent(0.5) - la = getloadavg() - - t.join() - - return t.cpu_utilization_info, la[0] / cpu_count(), la[0], la[1] / cpu_count(), la[1], ct.user, ct.system, ct.idle, ct.iowait, ct.irq + ct.softirq, ct.steal + ct.guest + ct.guest_nice - - $FUNCTION$; - - GRANT EXECUTE ON FUNCTION get_psutil_cpu() TO pgwatch; - COMMENT ON FUNCTION get_psutil_cpu() IS 'created for pgwatch'; - gauges: - - '*' - is_instance_level: true + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides CPU utilization and load averages using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + round(cpu_utilization::numeric, 2)::float as cpu_utilization, + round(load_1m_norm::numeric, 2)::float as load_1m_norm, + round(load_1m::numeric, 2)::float as load_1m, + round(load_5m_norm::numeric, 2)::float as load_5m_norm, + round(load_5m::numeric, 2)::float as load_5m, + round("user"::numeric, 2)::float as "user", + round(system::numeric, 2)::float as system, + round(idle::numeric, 2)::float as idle, + round(iowait::numeric, 2)::float as iowait, + round(irqs::numeric, 2)::float as irqs, + round(other::numeric, 2)::float as other + from + get_psutil_cpu() + init_sql: | + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_cpu( + OUT cpu_utilization float8, OUT load_1m_norm float8, OUT load_1m float8, OUT load_5m_norm float8, OUT load_5m float8, + OUT "user" float8, OUT system float8, OUT idle float8, OUT iowait float8, OUT irqs float8, OUT other float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + + from os import getloadavg + from psutil import cpu_times_percent, cpu_percent, cpu_count + from threading import Thread + + class GetCpuPercentThread(Thread): + def __init__(self, interval_seconds): + self.interval_seconds = interval_seconds + self.cpu_utilization_info = None + super(GetCpuPercentThread, self).__init__() + + def run(self): + self.cpu_utilization_info = cpu_percent(self.interval_seconds) + + t = GetCpuPercentThread(0.5) + t.start() + + ct = cpu_times_percent(0.5) + la = getloadavg() + + t.join() + + return t.cpu_utilization_info, la[0] / cpu_count(), la[0], la[1] / cpu_count(), la[1], ct.user, ct.system, ct.idle, ct.iowait, ct.irq + ct.softirq, ct.steal + ct.guest + ct.guest_nice + + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_cpu() TO pgwatch; + COMMENT ON FUNCTION get_psutil_cpu() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true psutil_disk: - description: > - This metric requires the "psutil" Python package to be installed on the PostgreSQL server. - It provides disk usage statistics using the "psutil" library. - "psutil" is known to behave differently depending on the used version and operating system, so if getting - errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - dir_or_tablespace as tag_dir_or_tablespace, - path as tag_path, - total, used, free, percent - from - get_psutil_disk() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_psutil_disk( - OUT dir_or_tablespace text, OUT path text, OUT total float8, OUT used float8, OUT free float8, OUT percent float8 - ) - RETURNS SETOF record - LANGUAGE plpython3u - SECURITY DEFINER - AS $FUNCTION$ - - from os import stat - from os.path import join, exists - from psutil import disk_usage - ret_list = [] - - # data_directory - r = plpy.execute("select current_setting('data_directory') as dd, current_setting('log_directory') as ld, current_setting('server_version_num')::int as pgver") - dd = r[0]['dd'] - ld = r[0]['ld'] - du_dd = disk_usage(dd) - ret_list.append(['data_directory', dd, du_dd.total, du_dd.used, du_dd.free, du_dd.percent]) - - dd_stat = stat(dd) - # log_directory - if ld: - if not ld.startswith('/'): - ld_path = join(dd, ld) - else: - ld_path = ld - if exists(ld_path): - log_stat = stat(ld_path) - if log_stat.st_dev == dd_stat.st_dev: - pass # no new info, same device - else: - du = disk_usage(ld_path) - ret_list.append(['log_directory', ld_path, du.total, du.used, du.free, du.percent]) - - # WAL / XLOG directory - # plpy.notice('pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog', r[0]['pgver']) - joined_path_wal = join(r[0]['dd'], 'pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog') - wal_stat = stat(joined_path_wal) - if wal_stat.st_dev == dd_stat.st_dev: - pass # no new info, same device + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides disk usage statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + dir_or_tablespace as tag_dir_or_tablespace, + path as tag_path, + total, used, free, percent + from + get_psutil_disk() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_disk( + OUT dir_or_tablespace text, OUT path text, OUT total float8, OUT used float8, OUT free float8, OUT percent float8 + ) + RETURNS SETOF record + LANGUAGE plpython3u + SECURITY DEFINER + AS $FUNCTION$ + + from os import stat + from os.path import join, exists + from psutil import disk_usage + ret_list = [] + + # data_directory + r = plpy.execute("select current_setting('data_directory') as dd, current_setting('log_directory') as ld, current_setting('server_version_num')::int as pgver") + dd = r[0]['dd'] + ld = r[0]['ld'] + du_dd = disk_usage(dd) + ret_list.append(['data_directory', dd, du_dd.total, du_dd.used, du_dd.free, du_dd.percent]) + + dd_stat = stat(dd) + # log_directory + if ld: + if not ld.startswith('/'): + ld_path = join(dd, ld) else: - du = disk_usage(joined_path_wal) - ret_list.append(['pg_wal', joined_path_wal, du.total, du.used, du.free, du.percent]) - - # add user created tablespaces if any - sql_tablespaces = """ - select spcname as name, pg_catalog.pg_tablespace_location(oid) as location - from pg_catalog.pg_tablespace where not spcname like any(array[E'pg\\_%'])""" - for row in plpy.cursor(sql_tablespaces): - du = disk_usage(row['location']) - ret_list.append([row['name'], row['location'], du.total, du.used, du.free, du.percent]) - return ret_list - - $FUNCTION$; - - GRANT EXECUTE ON FUNCTION get_psutil_disk() TO pgwatch; - COMMENT ON FUNCTION get_psutil_disk() IS 'created for pgwatch'; - gauges: - - '*' - is_instance_level: true + ld_path = ld + if exists(ld_path): + log_stat = stat(ld_path) + if log_stat.st_dev == dd_stat.st_dev: + pass # no new info, same device + else: + du = disk_usage(ld_path) + ret_list.append(['log_directory', ld_path, du.total, du.used, du.free, du.percent]) + + # WAL / XLOG directory + # plpy.notice('pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog', r[0]['pgver']) + joined_path_wal = join(r[0]['dd'], 'pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog') + wal_stat = stat(joined_path_wal) + if wal_stat.st_dev == dd_stat.st_dev: + pass # no new info, same device + else: + du = disk_usage(joined_path_wal) + ret_list.append(['pg_wal', joined_path_wal, du.total, du.used, du.free, du.percent]) + + # add user created tablespaces if any + sql_tablespaces = """ + select spcname as name, pg_catalog.pg_tablespace_location(oid) as location + from pg_catalog.pg_tablespace where not spcname like any(array[E'pg\\_%'])""" + for row in plpy.cursor(sql_tablespaces): + du = disk_usage(row['location']) + ret_list.append([row['name'], row['location'], du.total, du.used, du.free, du.percent]) + return ret_list + + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_disk() TO pgwatch; + COMMENT ON FUNCTION get_psutil_disk() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true psutil_disk_io_total: - description: > - This metric requires the "psutil" Python package to be installed on the PostgreSQL server. - It provides total disk I/O statistics using the "psutil" library. - "psutil" is known to behave differently depending on the used version and operating system, so if getting - errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - read_count, - write_count, - read_bytes, - write_bytes - from - get_psutil_disk_io_total() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_psutil_disk_io_total( - OUT read_count float8, OUT write_count float8, OUT read_bytes float8, OUT write_bytes float8 - ) - LANGUAGE plpython3u - AS $FUNCTION$ - from psutil import disk_io_counters - dc = disk_io_counters(perdisk=False) - if dc: - return dc.read_count, dc.write_count, dc.read_bytes, dc.write_bytes - else: - return None, None, None, None - $FUNCTION$; - - GRANT EXECUTE ON FUNCTION get_psutil_disk_io_total() TO pgwatch; - COMMENT ON FUNCTION get_psutil_disk_io_total() IS 'created for pgwatch'; - is_instance_level: true + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides total disk I/O statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + read_count, + write_count, + read_bytes, + write_bytes + from + get_psutil_disk_io_total() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_disk_io_total( + OUT read_count float8, OUT write_count float8, OUT read_bytes float8, OUT write_bytes float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from psutil import disk_io_counters + dc = disk_io_counters(perdisk=False) + if dc: + return dc.read_count, dc.write_count, dc.read_bytes, dc.write_bytes + else: + return None, None, None, None + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_disk_io_total() TO pgwatch; + COMMENT ON FUNCTION get_psutil_disk_io_total() IS 'created for pgwatch'; + is_instance_level: true psutil_mem: - description: > - This metric requires the "psutil" Python package to be installed on the PostgreSQL server. - It provides memory usage statistics using the "psutil" library. - "psutil" is known to behave differently depending on the used version and operating system, so if getting - errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - total, used, free, buff_cache, available, percent, - swap_total, swap_used, swap_free, swap_percent - from - get_psutil_mem() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; /* "plpython3u" might need changing to "plpythonu" (Python 2 everywhere for new OS-es */ - - CREATE OR REPLACE FUNCTION get_psutil_mem( - OUT total float8, OUT used float8, OUT free float8, OUT buff_cache float8, OUT available float8, OUT percent float8, - OUT swap_total float8, OUT swap_used float8, OUT swap_free float8, OUT swap_percent float8 - ) - LANGUAGE plpython3u - AS $FUNCTION$ - from psutil import virtual_memory, swap_memory - vm = virtual_memory() - sw = swap_memory() - return vm.total, vm.used, vm.free, vm.buffers + vm.cached, vm.available, vm.percent, sw.total, sw.used, sw.free, sw.percent - $FUNCTION$; - - GRANT EXECUTE ON FUNCTION get_psutil_mem() TO pgwatch; - COMMENT ON FUNCTION get_psutil_mem() IS 'created for pgwatch'; - gauges: - - '*' - is_instance_level: true + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides memory usage statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + total, used, free, buff_cache, available, percent, + swap_total, swap_used, swap_free, swap_percent + from + get_psutil_mem() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; /* "plpython3u" might need changing to "plpythonu" (Python 2 everywhere for new OS-es */ + + CREATE OR REPLACE FUNCTION get_psutil_mem( + OUT total float8, OUT used float8, OUT free float8, OUT buff_cache float8, OUT available float8, OUT percent float8, + OUT swap_total float8, OUT swap_used float8, OUT swap_free float8, OUT swap_percent float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from psutil import virtual_memory, swap_memory + vm = virtual_memory() + sw = swap_memory() + return vm.total, vm.used, vm.free, vm.buffers + vm.cached, vm.available, vm.percent, sw.total, sw.used, sw.free, sw.percent + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_mem() TO pgwatch; + COMMENT ON FUNCTION get_psutil_mem() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true reco_add_index: - description: > - Retrieves recommendations for creating indexes based on the `pg_qualstats_index_advisor()` function. - It provides insights into potential index creation opportunities to improve query performance. - This metric helps administrators optimize database performance by suggesting index creation. - sqls: - 11: |- - select /* pgwatch_generated */ - epoch_ns, - tag_reco_topic, - tag_object_name, - recommendation, - case when exists (select * from pg_inherits - where inhrelid = regclass(tag_object_name) - ) then 'Partitioned table, create the index on parent' else extra_info - end as extra_info - FROM ( - SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'create_index'::text as tag_reco_topic, - (regexp_matches(v::text, E'ON (.*?) '))[1] as tag_object_name, - v::text as recommendation, - '' as extra_info - FROM json_array_elements( - pg_qualstats_index_advisor() -> 'indexes') v - ) x - ORDER BY tag_object_name - node_status: primary - is_private: true + description: > + Retrieves recommendations for creating indexes based on the `pg_qualstats_index_advisor()` function. + It provides insights into potential index creation opportunities to improve query performance. + This metric helps administrators optimize database performance by suggesting index creation. + sqls: + 11: |- + select /* pgwatch_generated */ + epoch_ns, + tag_reco_topic, + tag_object_name, + recommendation, + case when exists (select * from pg_inherits + where inhrelid = regclass(tag_object_name) + ) then 'Partitioned table, create the index on parent' else extra_info + end as extra_info + FROM ( + SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'create_index'::text as tag_reco_topic, + (regexp_matches(v::text, E'ON (.*?) '))[1] as tag_object_name, + v::text as recommendation, + '' as extra_info + FROM json_array_elements( + pg_qualstats_index_advisor() -> 'indexes') v + ) x + ORDER BY tag_object_name + node_status: primary + is_private: true reco_default_public_schema: - description: > - Retrieves recommendations for revoking the CREATE privilege on the public schema from PUBLIC. - This metric helps enhance security by ensuring that only authorized users can create new objects in the public schema. - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'default_public_schema_privs'::text as tag_reco_topic, - nspname::text as tag_object_name, - 'REVOKE CREATE ON SCHEMA public FROM PUBLIC;'::text as recommendation, - 'only authorized users should be allowed to create new objects'::text as extra_info - from - pg_namespace - where - nspname = 'public' - and nspacl::text ~ E'[,\\{]+=U?C/' - node_status: primary - reco_disabled_triggers: - description: > - Retrieves recommendations for reviewing and potentially dropping disabled triggers in the PostgreSQL database. - It provides insights into triggers that are currently disabled, helping administrators identify and manage unused or unnecessary triggers. - This metric helps maintain database performance and reduce clutter by suggesting the removal of unused triggers. - sqls: - 11: | - /* "temporarily" disabled triggers might be forgotten about... */ - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'disabled_triggers'::text as tag_reco_topic, - quote_ident(nspname)||'.'||quote_ident(relname) as tag_object_name, - 'review usage of trigger and consider dropping it if not needed anymore'::text as recommendation, - ''::text as extra_info - from - pg_trigger t - join - pg_class c on c.oid = t.tgrelid - join - pg_namespace n on n.oid = c.relnamespace - where - tgenabled = 'D' - node_status: primary + description: > + Retrieves recommendations for revoking the CREATE privilege on the public schema from PUBLIC. + This metric helps enhance security by ensuring that only authorized users can create new objects in the public schema. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'default_public_schema_privs'::text as tag_reco_topic, + nspname::text as tag_object_name, + 'REVOKE CREATE ON SCHEMA public FROM PUBLIC;'::text as recommendation, + 'only authorized users should be allowed to create new objects'::text as extra_info + from + pg_namespace + where + nspname = 'public' + and nspacl::text ~ E'[,\\{]+=U?C/' + node_status: primary + reco_disabled_triggers: + description: > + Retrieves recommendations for reviewing and potentially dropping disabled triggers in the PostgreSQL database. + It provides insights into triggers that are currently disabled, helping administrators identify and manage unused or unnecessary triggers. + This metric helps maintain database performance and reduce clutter by suggesting the removal of unused triggers. + sqls: + 11: | + /* "temporarily" disabled triggers might be forgotten about... */ + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'disabled_triggers'::text as tag_reco_topic, + quote_ident(nspname)||'.'||quote_ident(relname) as tag_object_name, + 'review usage of trigger and consider dropping it if not needed anymore'::text as recommendation, + ''::text as extra_info + from + pg_trigger t + join + pg_class c on c.oid = t.tgrelid + join + pg_namespace n on n.oid = c.relnamespace + where + tgenabled = 'D' + node_status: primary reco_drop_index: - description: > - Retrieves recommendations for dropping unused or invalid indexes in the PostgreSQL database. - It provides insights into indexes that have not been scanned and are consuming a significant portion of the database size. - This metric helps administrators optimize database performance by suggesting the removal of unnecessary indexes. - sqls: - 11: | - /* assumes the pg_qualstats extension */ - with q_database_size as ( - select pg_database_size(current_database()) as database_size_b - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'drop_index'::text as tag_reco_topic, - quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_object_name, - ('DROP INDEX ' || quote_ident(schemaname)||'.'||quote_ident(indexrelname) || ';')::text as recommendation, - 'Make sure to also check replica pg_stat_user_indexes.idx_scan count if using them for queries'::text as extra_info - from - pg_stat_user_indexes - join - pg_index using (indexrelid) - join - q_database_size on true - where - idx_scan = 0 - and ((pg_relation_size(indexrelid)::numeric / database_size_b) > 0.005 /* 0.5% DB size threshold */ - or indisvalid) - and not indisprimary - and not indisreplident - and not schemaname like '_timescaledb%' - node_status: primary + description: > + Retrieves recommendations for dropping unused or invalid indexes in the PostgreSQL database. + It provides insights into indexes that have not been scanned and are consuming a significant portion of the database size. + This metric helps administrators optimize database performance by suggesting the removal of unnecessary indexes. + sqls: + 11: | + /* assumes the pg_qualstats extension */ + with q_database_size as ( + select pg_database_size(current_database()) as database_size_b + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'drop_index'::text as tag_reco_topic, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_object_name, + ('DROP INDEX ' || quote_ident(schemaname)||'.'||quote_ident(indexrelname) || ';')::text as recommendation, + 'Make sure to also check replica pg_stat_user_indexes.idx_scan count if using them for queries'::text as extra_info + from + pg_stat_user_indexes + join + pg_index using (indexrelid) + join + q_database_size on true + where + idx_scan = 0 + and ((pg_relation_size(indexrelid)::numeric / database_size_b) > 0.005 /* 0.5% DB size threshold */ + or indisvalid) + and not indisprimary + and not indisreplident + and not schemaname like '_timescaledb%' + node_status: primary reco_nested_views: - description: > - Retrieves recommendations for overly nested views in the PostgreSQL database. - It identifies views that depend on other views and have a nesting depth greater than 3. - This metric helps administrators optimize query performance by suggesting the reduction of view nesting. - sqls: - 11: |- - WITH RECURSIVE views AS ( - -- get the directly depending views - SELECT v.oid::regclass AS view, - format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, - 1 AS level - FROM pg_depend AS d - JOIN pg_rewrite AS r - ON r.oid = d.objid - JOIN pg_class AS v - ON v.oid = r.ev_class - JOIN pg_namespace AS n - ON n.oid = v.relnamespace - WHERE v.relkind = 'v' - AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) - AND NOT v.relname LIKE E'pg\\_%' - AND d.classid = 'pg_rewrite'::regclass - AND d.refclassid = 'pg_class'::regclass - AND d.deptype = 'n' - UNION ALL - -- add the views that depend on these - SELECT v.oid::regclass, - format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, - views.level + 1 - FROM views - JOIN pg_depend AS d - ON d.refobjid = views.view - JOIN pg_rewrite AS r - ON r.oid = d.objid - JOIN pg_class AS v - ON v.oid = r.ev_class - JOIN pg_namespace AS n - ON n.oid = v.relnamespace - WHERE v.relkind = 'v' - AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) - AND d.classid = 'pg_rewrite'::regclass - AND d.refclassid = 'pg_class'::regclass - AND d.deptype = 'n' - AND v.oid <> views.view -- avoid loop - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'overly_nested_views'::text AS tag_reco_topic, - full_name::text as tag_object_name, - 'overly nested views can affect performance'::text recommendation, - 'nesting_depth: ' || coalesce(max(level)::text, '-') AS extra_info - FROM views - GROUP BY 1, 2, 3 - HAVING max(level) > 3 - ORDER BY max(level) DESC, full_name::text - node_status: primary + description: > + Retrieves recommendations for overly nested views in the PostgreSQL database. + It identifies views that depend on other views and have a nesting depth greater than 3. + This metric helps administrators optimize query performance by suggesting the reduction of view nesting. + sqls: + 11: |- + WITH RECURSIVE views AS ( + -- get the directly depending views + SELECT v.oid::regclass AS view, + format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, + 1 AS level + FROM pg_depend AS d + JOIN pg_rewrite AS r + ON r.oid = d.objid + JOIN pg_class AS v + ON v.oid = r.ev_class + JOIN pg_namespace AS n + ON n.oid = v.relnamespace + WHERE v.relkind = 'v' + AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) + AND NOT v.relname LIKE E'pg\\_%' + AND d.classid = 'pg_rewrite'::regclass + AND d.refclassid = 'pg_class'::regclass + AND d.deptype = 'n' + UNION ALL + -- add the views that depend on these + SELECT v.oid::regclass, + format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, + views.level + 1 + FROM views + JOIN pg_depend AS d + ON d.refobjid = views.view + JOIN pg_rewrite AS r + ON r.oid = d.objid + JOIN pg_class AS v + ON v.oid = r.ev_class + JOIN pg_namespace AS n + ON n.oid = v.relnamespace + WHERE v.relkind = 'v' + AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) + AND d.classid = 'pg_rewrite'::regclass + AND d.refclassid = 'pg_class'::regclass + AND d.deptype = 'n' + AND v.oid <> views.view -- avoid loop + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'overly_nested_views'::text AS tag_reco_topic, + full_name::text as tag_object_name, + 'overly nested views can affect performance'::text recommendation, + 'nesting_depth: ' || coalesce(max(level)::text, '-') AS extra_info + FROM views + GROUP BY 1, 2, 3 + HAVING max(level) > 3 + ORDER BY max(level) DESC, full_name::text + node_status: primary reco_partial_index_candidates: - description: > - Retrieves recommendations for creating partial indexes on columns with a high fraction of NULL values. - It identifies single-column indexes that could potentially be declared as partial indexes, leaving out NULL values. - This metric helps optimize index usage and improve query performance by suggesting the creation of partial indexes. - sqls: - 11: | - select distinct /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'partial_index_candidates'::text as tag_reco_topic, - quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) as tag_object_name, - ('index ' || quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) || ' on ' || quote_ident(s.schemaname) || '.' || quote_ident(s.tablename) || ' column ' || quote_ident(s.attname) || ' could possibly be declared partial leaving out NULL-s')::text as recommendation, - 'NULL fraction: ' || round((null_frac * 100)::numeric, 1) || '%, rowcount estimate: ' || (c.reltuples)::int8 || ', current definition: ' || pg_get_indexdef(i.indexrelid) as extra_info - from - pg_stats s - join pg_attribute a using (attname) - join pg_index i on i.indkey[0] = a.attnum and i.indrelid = a.attrelid - join pg_class c on c.oid = i.indrelid - join pg_class ci on ci.oid = i.indexrelid - join pg_namespace ni on ni.oid = ci.relnamespace - where - not indisprimary - and not indisunique - and indisready - and indisvalid - and i.indnatts = 1 /* simple 1 column indexes */ - and null_frac > 0.5 /* 50% empty */ - and not pg_get_indexdef(i.indexrelid) like '% WHERE %' - and c.reltuples >= 1e5 /* ignore smaller tables */ - and not exists ( /* leave out sub-partitions */ - select * from pg_inherits where inhrelid = c.oid - ) + description: > + Retrieves recommendations for creating partial indexes on columns with a high fraction of NULL values. + It identifies single-column indexes that could potentially be declared as partial indexes, leaving out NULL values. + This metric helps optimize index usage and improve query performance by suggesting the creation of partial indexes. + sqls: + 11: | + select distinct /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'partial_index_candidates'::text as tag_reco_topic, + quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) as tag_object_name, + ('index ' || quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) || ' on ' || quote_ident(s.schemaname) || '.' || quote_ident(s.tablename) || ' column ' || quote_ident(s.attname) || ' could possibly be declared partial leaving out NULL-s')::text as recommendation, + 'NULL fraction: ' || round((null_frac * 100)::numeric, 1) || '%, rowcount estimate: ' || (c.reltuples)::int8 || ', current definition: ' || pg_get_indexdef(i.indexrelid) as extra_info + from + pg_stats s + join pg_attribute a using (attname) + join pg_index i on i.indkey[0] = a.attnum and i.indrelid = a.attrelid + join pg_class c on c.oid = i.indrelid + join pg_class ci on ci.oid = i.indexrelid + join pg_namespace ni on ni.oid = ci.relnamespace + where + not indisprimary + and not indisunique + and indisready + and indisvalid + and i.indnatts = 1 /* simple 1 column indexes */ + and null_frac > 0.5 /* 50% empty */ + and not pg_get_indexdef(i.indexrelid) like '% WHERE %' + and c.reltuples >= 1e5 /* ignore smaller tables */ + and not exists ( /* leave out sub-partitions */ + select * from pg_inherits where inhrelid = c.oid + ) reco_sprocs_wo_search_path: - description: > - Retrieves recommendations for stored procedures that do not have a fixed `search_path` set. - It identifies stored procedures that could potentially be abused by malicious users if used objects are not fully qualified. - This metric helps enhance security by suggesting the setting of a fixed search_path for stored procedures. - sqls: - 11: |- - with q_sprocs as ( - select /* pgwatch_generated */ - format('%s.%s', quote_ident(nspname), quote_ident(proname)) as sproc_name, - 'alter function ' || proname || '(' || pg_get_function_arguments(p.oid) || ') set search_path = X;' as fix_sql - from - pg_proc p - join pg_namespace n on n.oid = p.pronamespace - where prosecdef and not 'search_path' = ANY(coalesce(proconfig, '{}'::text[])) - and not pg_catalog.obj_description(p.oid, 'pg_proc') ~ 'pgwatch' - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'sprocs_wo_search_path'::text as tag_reco_topic, - sproc_name::text as tag_object_name, - fix_sql::text as recommendation, - 'functions without fixed search_path can be potentially abused by malicious users if used objects are not fully qualified'::text as extra_info - from - q_sprocs - order by - tag_object_name, extra_info - node_status: primary + description: > + Retrieves recommendations for stored procedures that do not have a fixed `search_path` set. + It identifies stored procedures that could potentially be abused by malicious users if used objects are not fully qualified. + This metric helps enhance security by suggesting the setting of a fixed search_path for stored procedures. + sqls: + 11: |- + with q_sprocs as ( + select /* pgwatch_generated */ + format('%s.%s', quote_ident(nspname), quote_ident(proname)) as sproc_name, + 'alter function ' || proname || '(' || pg_get_function_arguments(p.oid) || ') set search_path = X;' as fix_sql + from + pg_proc p + join pg_namespace n on n.oid = p.pronamespace + where prosecdef and not 'search_path' = ANY(coalesce(proconfig, '{}'::text[])) + and not pg_catalog.obj_description(p.oid, 'pg_proc') ~ 'pgwatch' + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'sprocs_wo_search_path'::text as tag_reco_topic, + sproc_name::text as tag_object_name, + fix_sql::text as recommendation, + 'functions without fixed search_path can be potentially abused by malicious users if used objects are not fully qualified'::text as extra_info + from + q_sprocs + order by + tag_object_name, extra_info + node_status: primary reco_superusers: - description: > - Retrieves recommendations for reviewing the number of superusers in the PostgreSQL database. - It identifies if there are too many superusers, which can pose a security risk. - This metric helps maintain database security by suggesting a review of superuser accounts. - sqls: - 11: | - with q_su as ( - select count(*) from pg_roles where rolcanlogin and rolsuper - ), - q_total as ( - select count(*) from pg_roles where rolcanlogin - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - 'superuser_count'::text as tag_reco_topic, - '-'::text as tag_object_name, - 'too many superusers detected - review recommended'::text as recommendation, - format('%s active superusers, %s total active users', q_su.count, q_total.count) as extra_info - from - q_su, q_total - where - q_su.count >= 10 - node_status: primary + description: > + Retrieves recommendations for reviewing the number of superusers in the PostgreSQL database. + It identifies if there are too many superusers, which can pose a security risk. + This metric helps maintain database security by suggesting a review of superuser accounts. + sqls: + 11: | + with q_su as ( + select count(*) from pg_roles where rolcanlogin and rolsuper + ), + q_total as ( + select count(*) from pg_roles where rolcanlogin + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'superuser_count'::text as tag_reco_topic, + '-'::text as tag_object_name, + 'too many superusers detected - review recommended'::text as recommendation, + format('%s active superusers, %s total active users', q_su.count, q_total.count) as extra_info + from + q_su, q_total + where + q_su.count >= 10 + node_status: primary recommendations: - description: > - When enabled, this metric will find all other metrics starting with `reco_*` and execute those queries. - The metric targets performance, security, and other "best practices" violations. - Users can add new `reco_*` queries freely. - init_sql: CREATE EXTENSION IF NOT EXISTS pg_qualstats; - sqls: - 11: /* dummy placeholder - special handling in code to collect other metrics named reco_* */ + description: > + When enabled, this metric will find all other metrics starting with `reco_*` and execute those queries. + The metric targets performance, security, and other "best practices" violations. + Users can add new `reco_*` queries freely. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_qualstats; + sqls: + 11: /* dummy placeholder - special handling in code to collect other metrics named reco_* */ replication: - description: > - This metric collects replication statistics from the `pg_stat_replication` view. - It provides insights into the status of replication connections, including lag times and states. - This metric is useful for monitoring replication health and performance. - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - application_name as tag_application_name, - usename AS tag_usename, - concat(coalesce(client_addr::text, client_hostname), '_', client_port::text) as tag_client_info, - coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, sent_lsn)::int8, 0) as sent_lag_b, - coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, write_lsn)::int8, 0) as write_lag_b, - coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, flush_lsn)::int8, 0) as flush_lag_b, - coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, replay_lsn)::int8, 0) as replay_lag_b, - (extract(epoch from write_lag) * 1000)::int8 as write_lag_ms, - (extract(epoch from flush_lag) * 1000)::int8 as flush_lag_ms, - (extract(epoch from replay_lag) * 1000)::int8 as replay_lag_ms, - state, - sync_state, - case when sync_state in ('sync', 'quorum') then 1 else 0 end as is_sync_int, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int - from - pg_stat_replication - where - coalesce(application_name, '') not in ('pg_basebackup', 'pg_rewind'); - gauges: - - '*' - is_instance_level: true + description: > + This metric collects replication statistics from the `pg_stat_replication` view. + It provides insights into the status of replication connections, including lag times and states. + This metric is useful for monitoring replication health and performance. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + application_name as tag_application_name, + usename AS tag_usename, + concat(coalesce(client_addr::text, client_hostname), '_', client_port::text) as tag_client_info, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, sent_lsn)::int8, 0) as sent_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, write_lsn)::int8, 0) as write_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, flush_lsn)::int8, 0) as flush_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, replay_lsn)::int8, 0) as replay_lag_b, + (extract(epoch from write_lag) * 1000)::int8 as write_lag_ms, + (extract(epoch from flush_lag) * 1000)::int8 as flush_lag_ms, + (extract(epoch from replay_lag) * 1000)::int8 as replay_lag_ms, + state, + sync_state, + case when sync_state in ('sync', 'quorum') then 1 else 0 end as is_sync_int, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int + from + pg_stat_replication + where + coalesce(application_name, '') not in ('pg_basebackup', 'pg_rewind'); + gauges: + - '*' + is_instance_level: true replication_slot_stats: - description: > - This metric collects statistics from the `pg_stat_replication_slots` view. - It provides insights into the status of replication slots, including transaction counts and byte usage. - This metric is useful for monitoring replication slot health and performance. - sqls: - 14: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - slot_name::text as tag_slot_name, - spill_txns, - spill_count, - spill_bytes, - stream_txns, - stream_count, - stream_bytes, - total_txns, - total_bytes - from - pg_stat_replication_slots + description: > + This metric collects statistics from the `pg_stat_replication_slots` view. + It provides insights into the status of replication slots, including transaction counts and byte usage. + This metric is useful for monitoring replication slot health and performance. + sqls: + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + slot_name::text as tag_slot_name, + spill_txns, + spill_count, + spill_bytes, + stream_txns, + stream_count, + stream_bytes, + total_txns, + total_bytes + from + pg_stat_replication_slots replication_slots: - description: > - This metric collects information about replication slots from the `pg_replication_slots` view. - It provides insights into the status of replication slots, including their activity and lag times. - This metric is useful for monitoring replication slot health and performance. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - slot_name::text as tag_slot_name, - coalesce(plugin, 'physical')::text as tag_plugin, - active, - case when active then 0 else 1 end as non_active_int, - pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::int8 as restart_lsn_lag_b, - greatest(age(xmin), age(catalog_xmin))::int8 as xmin_age_tx - from - pg_replication_slots - node_status: primary - gauges: - - '*' - is_instance_level: true + description: > + This metric collects information about replication slots from the `pg_replication_slots` view. + It provides insights into the status of replication slots, including their activity and lag times. + This metric is useful for monitoring replication slot health and performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + slot_name::text as tag_slot_name, + coalesce(plugin, 'physical')::text as tag_plugin, + active, + case when active then 0 else 1 end as non_active_int, + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::int8 as restart_lsn_lag_b, + greatest(age(xmin), age(catalog_xmin))::int8 as xmin_age_tx + from + pg_replication_slots + node_status: primary + gauges: + - '*' + is_instance_level: true sequence_health: - description: > - This metric collects health statistics for sequences in the PostgreSQL database. - It provides insights into the usage and status of sequences, including maximum usage percentages and counts of sequences that are heavily used. - This metric is useful for monitoring sequence health and performance. - sqls: - 11: |- - with q_seq_data as ( - select * from pg_sequences - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - (select round(100.0 * coalesce(max(last_value::numeric / max_value), 0), 2)::float from q_seq_data where not cycle) as max_used_pct, - (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.5) as p50_used_seq_count, - (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.75) as p75_used_seq_count + description: > + This metric collects health statistics for sequences in the PostgreSQL database. + It provides insights into the usage and status of sequences, including maximum usage percentages and counts of sequences that are heavily used. + This metric is useful for monitoring sequence health and performance. + sqls: + 11: |- + with q_seq_data as ( + select * from pg_sequences + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select round(100.0 * coalesce(max(last_value::numeric / max_value), 0), 2)::float from q_seq_data where not cycle) as max_used_pct, + (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.5) as p50_used_seq_count, + (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.75) as p75_used_seq_count server_log_event_counts: - description: > - This metric enables the Postgres server log "tailing" for errors. It can't be used for remote setups, though, - unless the DB logs are somehow mounted or copied over, as real file access is needed! - sqls: - 11: |- - /* - Dummy placeholder - special handling in gatherer code for log parsing - */ + description: > + This metric enables the Postgres server log "tailing" for errors. It can't be used for remote setups, though, + unless the DB logs are somehow mounted or copied over, as real file access is needed! + sqls: + 11: |- + /* + Dummy placeholder - special handling in gatherer code for log parsing + */ settings: - description: > - This metric collects various PostgreSQL server settings and configurations. - It provides insights into the server's configuration, including version, memory settings, and other important parameters. - This metric is useful for monitoring server settings and ensuring optimal performance. - sqls: - 11: | - with qs as ( - select name, setting from pg_settings - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - current_setting('server_version') as server_version, - current_setting('server_version_num')::int8 as server_version_num, - (regexp_matches(regexp_replace(current_setting('server_version'), '(beta|devel).*', '', 'g'), E'\\d+'))[1]::float8 as major_version, - current_setting('block_size')::int as block_size, - current_setting('max_connections')::int as max_connections, - current_setting('hot_standby') as hot_standby, - (select setting from qs where name = 'hot_standby_feedback') as hot_standby_feedback, - current_setting('fsync') as fsync, - current_setting('full_page_writes') as full_page_writes, - current_setting('synchronous_commit') as synchronous_commit, - (select setting from qs where name = 'wal_compression') as wal_compression, - (select setting from qs where name = 'wal_log_hints') as wal_log_hints, - (select setting from qs where name = 'synchronous_standby_names') as synchronous_standby_names, - current_setting('shared_buffers') as shared_buffers, - current_setting('work_mem') as work_mem, - current_setting('maintenance_work_mem') as maintenance_work_mem, - current_setting('effective_cache_size') as effective_cache_size, - (select setting::int8 from qs where name = 'default_statistics_target') as default_statistics_target, - (select setting::float8 from qs where name = 'random_page_cost') as random_page_cost, - pg_size_pretty(((select setting::int8 from qs where name = 'min_wal_size') * 1024^2)::int8) as min_wal_size, - pg_size_pretty(((select setting::int8 from qs where name = 'max_wal_size') * 1024^2)::int8) as max_wal_size, - (select setting from qs where name = 'checkpoint_segments') as checkpoint_segments, - current_setting('checkpoint_timeout') as checkpoint_timeout, - current_setting('checkpoint_completion_target') as checkpoint_completion_target, - (select setting::int8 from qs where name = 'max_worker_processes') as max_worker_processes, - (select setting::int8 from qs where name = 'max_parallel_workers') as max_parallel_workers, - (select setting::int8 from qs where name = 'max_parallel_workers_per_gather') as max_parallel_workers_per_gather, - (select case when setting = 'on' then 1 else 0 end from qs where name = 'jit') as jit, - (select case when setting = 'on' then 1 else 0 end from qs where name = 'ssl') as ssl, - current_setting('statement_timeout') as statement_timeout, - current_setting('deadlock_timeout') as deadlock_timeout, - (select setting from qs where name = 'data_checksums') as data_checksums, - (select setting::int8 from qs where name = 'max_connections') as max_connections, - (select setting::int8 from qs where name = 'max_wal_senders') as max_wal_senders, - (select setting::int8 from qs where name = 'max_replication_slots') as max_replication_slots, - (select setting::int8 from qs where name = 'max_prepared_transactions') as max_prepared_transactions, - (select setting::int8 from qs where name = 'lock_timeout') || ' (ms)' as lock_timeout, - (select setting from qs where name = 'archive_mode') as archive_mode, - (select setting from qs where name = 'archive_command') as archive_command, - current_setting('archive_timeout') as archive_timeout, - (select setting from qs where name = 'shared_preload_libraries') as shared_preload_libraries, - (select setting from qs where name = 'listen_addresses') as listen_addresses, - (select setting from qs where name = 'ssl') as ssl, - (select setting from qs where name = 'autovacuum') as autovacuum, - (select setting::int8 from qs where name = 'autovacuum_max_workers') as autovacuum_max_workers, - (select setting::float8 from qs where name = 'autovacuum_vacuum_scale_factor') as autovacuum_vacuum_scale_factor, - (select setting::float8 from qs where name = 'autovacuum_vacuum_threshold') as autovacuum_vacuum_threshold, - (select setting::float8 from qs where name = 'autovacuum_analyze_scale_factor') as autovacuum_analyze_scale_factor, - (select setting::float8 from qs where name = 'autovacuum_analyze_threshold') as autovacuum_analyze_scale_factor + description: > + This metric collects various PostgreSQL server settings and configurations. + It provides insights into the server's configuration, including version, memory settings, and other important parameters. + This metric is useful for monitoring server settings and ensuring optimal performance. + sqls: + 11: | + with qs as ( + select name, setting from pg_settings + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_setting('server_version') as server_version, + current_setting('server_version_num')::int8 as server_version_num, + (regexp_matches(regexp_replace(current_setting('server_version'), '(beta|devel).*', '', 'g'), E'\\d+'))[1]::float8 as major_version, + current_setting('block_size')::int as block_size, + current_setting('max_connections')::int as max_connections, + current_setting('hot_standby') as hot_standby, + (select setting from qs where name = 'hot_standby_feedback') as hot_standby_feedback, + current_setting('fsync') as fsync, + current_setting('full_page_writes') as full_page_writes, + current_setting('synchronous_commit') as synchronous_commit, + (select setting from qs where name = 'wal_compression') as wal_compression, + (select setting from qs where name = 'wal_log_hints') as wal_log_hints, + (select setting from qs where name = 'synchronous_standby_names') as synchronous_standby_names, + current_setting('shared_buffers') as shared_buffers, + current_setting('work_mem') as work_mem, + current_setting('maintenance_work_mem') as maintenance_work_mem, + current_setting('effective_cache_size') as effective_cache_size, + (select setting::int8 from qs where name = 'default_statistics_target') as default_statistics_target, + (select setting::float8 from qs where name = 'random_page_cost') as random_page_cost, + pg_size_pretty(((select setting::int8 from qs where name = 'min_wal_size') * 1024^2)::int8) as min_wal_size, + pg_size_pretty(((select setting::int8 from qs where name = 'max_wal_size') * 1024^2)::int8) as max_wal_size, + (select setting from qs where name = 'checkpoint_segments') as checkpoint_segments, + current_setting('checkpoint_timeout') as checkpoint_timeout, + current_setting('checkpoint_completion_target') as checkpoint_completion_target, + (select setting::int8 from qs where name = 'max_worker_processes') as max_worker_processes, + (select setting::int8 from qs where name = 'max_parallel_workers') as max_parallel_workers, + (select setting::int8 from qs where name = 'max_parallel_workers_per_gather') as max_parallel_workers_per_gather, + (select case when setting = 'on' then 1 else 0 end from qs where name = 'jit') as jit, + (select case when setting = 'on' then 1 else 0 end from qs where name = 'ssl') as ssl, + current_setting('statement_timeout') as statement_timeout, + current_setting('deadlock_timeout') as deadlock_timeout, + (select setting from qs where name = 'data_checksums') as data_checksums, + (select setting::int8 from qs where name = 'max_connections') as max_connections, + (select setting::int8 from qs where name = 'max_wal_senders') as max_wal_senders, + (select setting::int8 from qs where name = 'max_replication_slots') as max_replication_slots, + (select setting::int8 from qs where name = 'max_prepared_transactions') as max_prepared_transactions, + (select setting::int8 from qs where name = 'lock_timeout') || ' (ms)' as lock_timeout, + (select setting from qs where name = 'archive_mode') as archive_mode, + (select setting from qs where name = 'archive_command') as archive_command, + current_setting('archive_timeout') as archive_timeout, + (select setting from qs where name = 'shared_preload_libraries') as shared_preload_libraries, + (select setting from qs where name = 'listen_addresses') as listen_addresses, + (select setting from qs where name = 'ssl') as ssl, + (select setting from qs where name = 'autovacuum') as autovacuum, + (select setting::int8 from qs where name = 'autovacuum_max_workers') as autovacuum_max_workers, + (select setting::float8 from qs where name = 'autovacuum_vacuum_scale_factor') as autovacuum_vacuum_scale_factor, + (select setting::float8 from qs where name = 'autovacuum_vacuum_threshold') as autovacuum_vacuum_threshold, + (select setting::float8 from qs where name = 'autovacuum_analyze_scale_factor') as autovacuum_analyze_scale_factor, + (select setting::float8 from qs where name = 'autovacuum_analyze_threshold') as autovacuum_analyze_scale_factor show_plans_realtime: - description: > - This metric collects real-time query plans from the `pg_show_plans` extension. - It provides insights into the execution plans of currently running queries, helping to identify performance issues and optimize query execution. - This metric is useful for monitoring query performance and understanding how queries are executed in real-time. - sqls: - 11: | - /* assumes pg_show_plans extension */ - select /* pgwatch_generated */ - max((extract(epoch from now()) * 1e9)::int8) as epoch_ns, - max(extract(epoch from now() - query_start))::int as max_s, - avg(extract(epoch from now() - query_start))::int as avg_s, - count(*), - array_to_string(array_agg(distinct usename order by usename), ',') as "users", - max(md5(plan)) as tag_hash, /* needed for influx */ - plan, - max(query) as query - from - pg_show_plans p - join - pg_stat_activity a - using (pid) - where - p.pid != pg_backend_pid() - and datname = current_database() - and now() - query_start > '1s'::interval - and backend_type = 'client backend' - group by - plan - order by - max_s desc - limit - 10 + description: > + This metric collects real-time query plans from the `pg_show_plans` extension. + It provides insights into the execution plans of currently running queries, helping to identify performance issues and optimize query execution. + This metric is useful for monitoring query performance and understanding how queries are executed in real-time. + sqls: + 11: | + /* assumes pg_show_plans extension */ + select /* pgwatch_generated */ + max((extract(epoch from now()) * 1e9)::int8) as epoch_ns, + max(extract(epoch from now() - query_start))::int as max_s, + avg(extract(epoch from now() - query_start))::int as avg_s, + count(*), + array_to_string(array_agg(distinct usename order by usename), ',') as "users", + max(md5(plan)) as tag_hash, /* needed for influx */ + plan, + max(query) as query + from + pg_show_plans p + join + pg_stat_activity a + using (pid) + where + p.pid != pg_backend_pid() + and datname = current_database() + and now() - query_start > '1s'::interval + and backend_type = 'client backend' + group by + plan + order by + max_s desc + limit + 10 smart_health_per_disk: - description: > - This metric collects SMART health status for all disk devices using the `smartmontools` utility. - It provides insights into the health of disk devices, including their SMART status and return codes. - This metric is useful for monitoring disk health and identifying potential issues with disk devices. - This helper is always meant to be tested and adjusted to make sure all disk are detected. - Most likely smartctl privileges must be escalated to give postgres access: `sudo chmod u+s /usr/local/sbin/smartctl` - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - device as tag_device, - retcode - from - get_smart_health_per_device() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_smart_health_per_device(OUT device text, OUT retcode int) RETURNS SETOF record AS - $$ - import subprocess - ret_list = [] - - #disk_detect_cmd='smartctl --scan | cut -d " " -f3 | grep mega' # for Lenovo ServerRAID M1210 - disk_detect_cmd='lsblk -io KNAME,TYPE | grep '' disk'' | cut -d " " -f1 | sort' - p = subprocess.run(disk_detect_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) - if p.returncode != 0: - return ret_list - disks = p.stdout.splitlines() - - for disk in disks: - # health_cmd = 'smartctl -d $disk -a -q silent /dev/sda' % disk # for Lenovo ServerRAID M1210 members - health_cmd = 'smartctl -a -q silent /dev/%s' % disk - p = subprocess.run(health_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) - ret_list.append((disk, p.returncode)) - + description: > + This metric collects SMART health status for all disk devices using the `smartmontools` utility. + It provides insights into the health of disk devices, including their SMART status and return codes. + This metric is useful for monitoring disk health and identifying potential issues with disk devices. + This helper is always meant to be tested and adjusted to make sure all disk are detected. + Most likely smartctl privileges must be escalated to give postgres access: `sudo chmod u+s /usr/local/sbin/smartctl` + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + device as tag_device, + retcode + from + get_smart_health_per_device() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_smart_health_per_device(OUT device text, OUT retcode int) RETURNS SETOF record AS + $$ + import subprocess + ret_list = [] + + #disk_detect_cmd='smartctl --scan | cut -d " " -f3 | grep mega' # for Lenovo ServerRAID M1210 + disk_detect_cmd='lsblk -io KNAME,TYPE | grep '' disk'' | cut -d " " -f1 | sort' + p = subprocess.run(disk_detect_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + if p.returncode != 0: return ret_list - - $$ LANGUAGE plpython3u VOLATILE; - - GRANT EXECUTE ON FUNCTION get_smart_health_per_device() TO pgwatch; - - COMMENT ON FUNCTION get_smart_health_per_device() is 'created for pgwatch'; + disks = p.stdout.splitlines() + + for disk in disks: + # health_cmd = 'smartctl -d $disk -a -q silent /dev/sda' % disk # for Lenovo ServerRAID M1210 members + health_cmd = 'smartctl -a -q silent /dev/%s' % disk + p = subprocess.run(health_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + ret_list.append((disk, p.returncode)) + + return ret_list + + $$ LANGUAGE plpython3u VOLATILE; + + GRANT EXECUTE ON FUNCTION get_smart_health_per_device() TO pgwatch; + + COMMENT ON FUNCTION get_smart_health_per_device() is 'created for pgwatch'; sproc_hashes: - description: > - This metric collects hashes of all stored procedures in the database. - It provides a way to track changes in stored procedures over time by comparing their hashes. - This metric is useful for monitoring stored procedure integrity and detecting changes. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - p.oid::text as tag_oid, - quote_ident(nspname)||'.'||quote_ident(proname) as tag_sproc, - md5(prosrc) - from - pg_proc p - join - pg_namespace n on n.oid = pronamespace - where - not nspname like any(array[E'pg\\_%', 'information_schema']) + description: > + This metric collects hashes of all stored procedures in the database. + It provides a way to track changes in stored procedures over time by comparing their hashes. + This metric is useful for monitoring stored procedure integrity and detecting changes. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + p.oid::text as tag_oid, + quote_ident(nspname)||'.'||quote_ident(proname) as tag_sproc, + md5(prosrc) + from + pg_proc p + join + pg_namespace n on n.oid = pronamespace + where + not nspname like any(array[E'pg\\_%', 'information_schema']) sproc_stats: - description: > - This metric collects statistics about user-defined functions (stored procedures) in the database. - It provides insights into function usage, including call counts and execution times. - This metric is useful for monitoring function performance and identifying potential bottlenecks. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - schemaname::text AS tag_schema, - funcname::text AS tag_function_name, - quote_ident(schemaname)||'.'||quote_ident(funcname) as tag_function_full_name, - p.oid::text as tag_oid, -- for overloaded funcs - calls as sp_calls, - self_time, - total_time - FROM - pg_stat_user_functions f - JOIN - pg_proc p ON p.oid = f.funcid - ORDER BY - total_time DESC - LIMIT - 300 + description: > + This metric collects statistics about user-defined functions (stored procedures) in the database. + It provides insights into function usage, including call counts and execution times. + This metric is useful for monitoring function performance and identifying potential bottlenecks. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text AS tag_schema, + funcname::text AS tag_function_name, + quote_ident(schemaname)||'.'||quote_ident(funcname) as tag_function_full_name, + p.oid::text as tag_oid, -- for overloaded funcs + calls as sp_calls, + self_time, + total_time + FROM + pg_stat_user_functions f + JOIN + pg_proc p ON p.oid = f.funcid + ORDER BY + total_time DESC + LIMIT + 300 stat_activity: - description: > - This metric collects statistics about currently active queries in the database. - It provides insights into the state of active queries, including their duration and blocking status. - This metric is useful for monitoring query performance and identifying long-running or blocked queries. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - s.query as query, - count(*) as count - from pg_stat_activity s - where s.datname = current_database() - and s.state = 'active' - and s.backend_type = 'client backend' - and s.pid != pg_backend_pid() - and now() - s.query_start > '100ms'::interval - group by s.query + description: > + This metric collects statistics about currently active queries in the database. + It provides insights into the state of active queries, including their duration and blocking status. + This metric is useful for monitoring query performance and identifying long-running or blocked queries. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + s.query as query, + count(*) as count + from pg_stat_activity s + where s.datname = current_database() + and s.state = 'active' + and s.backend_type = 'client backend' + and s.pid != pg_backend_pid() + and now() - s.query_start > '100ms'::interval + group by s.query stat_activity_realtime: - description: > - This metric collects real-time statistics about currently active queries in the database. - It provides insights into the state of active queries, including their duration and blocking status. - This metric is useful for monitoring query performance and identifying long-running or blocked queries in real-time. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - pid as tag_pid, - usename::text AS user, - application_name AS appname, - coalesce(client_addr::text, 'local') AS ip, - extract(epoch FROM (now() - query_start))::int AS duration_s, - (coalesce(wait_event_type, '') IN ('LWLockNamed', 'Lock', 'BufferPin'))::int AS waiting, - array_to_string(pg_blocking_pids(pid), ',') as blocking_pids, - ltrim(regexp_replace(query, E'[ \\t\\n\\r]+' , ' ', 'g'))::varchar(300) AS query - FROM - pg_stat_activity - WHERE - state != 'idle' - AND backend_type IN ('client backend', 'autovacuum worker') - AND pid != pg_backend_pid() - AND datname = current_database() - AND now() - query_start > '500ms'::interval - ORDER BY - now() - query_start DESC - LIMIT 25 + description: > + This metric collects real-time statistics about currently active queries in the database. + It provides insights into the state of active queries, including their duration and blocking status. + This metric is useful for monitoring query performance and identifying long-running or blocked queries in real-time. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pid as tag_pid, + usename::text AS user, + application_name AS appname, + coalesce(client_addr::text, 'local') AS ip, + extract(epoch FROM (now() - query_start))::int AS duration_s, + (coalesce(wait_event_type, '') IN ('LWLockNamed', 'Lock', 'BufferPin'))::int AS waiting, + array_to_string(pg_blocking_pids(pid), ',') as blocking_pids, + ltrim(regexp_replace(query, E'[ \\t\\n\\r]+' , ' ', 'g'))::varchar(300) AS query + FROM + pg_stat_activity + WHERE + state != 'idle' + AND backend_type IN ('client backend', 'autovacuum worker') + AND pid != pg_backend_pid() + AND datname = current_database() + AND now() - query_start > '500ms'::interval + ORDER BY + now() - query_start DESC + LIMIT 25 stat_io: - description: > - This metric collects I/O statistics from the `pg_stat_io` view. - It provides insights into read and write operations, including the number of reads, writes, and their associated times. - This metric is useful for monitoring I/O performance and identifying potential bottlenecks in disk operations. - sqls: - 16: |- - SELECT /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - coalesce(backend_type, 'total') as tag_backend_type, - sum(coalesce(reads, 0))::int8 as reads, - (sum(coalesce(reads, 0) * op_bytes) / 1e6)::int8 as read_bytes_mb, - sum(coalesce(read_time, 0))::int8 as read_time_ms, - sum(coalesce(writes, 0))::int8 as writes, - (sum(coalesce(writes, 0) * op_bytes) / 1e6)::int8 as write_bytes_mb, - sum(coalesce(write_time, 0))::int8 as write_time_ms, - sum(coalesce(writebacks, 0))::int8 as writebacks, - (sum(coalesce(writebacks, 0) * op_bytes) / 1e6)::int8 as writeback_bytes_mb, - sum(coalesce(writeback_time, 0))::int8 as writeback_time_ms, - sum(coalesce(fsyncs, 0))::int8 fsyncs, - sum(coalesce(fsync_time, 0))::int8 fsync_time_ms, - max(extract(epoch from now() - stats_reset)::int) as stats_reset_s - FROM - pg_stat_io - GROUP BY - ROLLUP (backend_type) - is_instance_level: true + description: > + This metric collects I/O statistics from the `pg_stat_io` view. + It provides insights into read and write operations, including the number of reads, writes, and their associated times. + This metric is useful for monitoring I/O performance and identifying potential bottlenecks in disk operations. + sqls: + 16: |- + SELECT /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(backend_type, 'total') as tag_backend_type, + sum(coalesce(reads, 0))::int8 as reads, + (sum(coalesce(reads, 0) * op_bytes) / 1e6)::int8 as read_bytes_mb, + sum(coalesce(read_time, 0))::int8 as read_time_ms, + sum(coalesce(writes, 0))::int8 as writes, + (sum(coalesce(writes, 0) * op_bytes) / 1e6)::int8 as write_bytes_mb, + sum(coalesce(write_time, 0))::int8 as write_time_ms, + sum(coalesce(writebacks, 0))::int8 as writebacks, + (sum(coalesce(writebacks, 0) * op_bytes) / 1e6)::int8 as writeback_bytes_mb, + sum(coalesce(writeback_time, 0))::int8 as writeback_time_ms, + sum(coalesce(fsyncs, 0))::int8 fsyncs, + sum(coalesce(fsync_time, 0))::int8 fsync_time_ms, + max(extract(epoch from now() - stats_reset)::int) as stats_reset_s + FROM + pg_stat_io + GROUP BY + ROLLUP (backend_type) + is_instance_level: true stat_ssl: - description: > - This metric collects SSL connection statistics from the `pg_stat_ssl` view. - It provides insights into the number of SSL connections, including those that are encrypted and those that are not. - This metric is useful for monitoring SSL usage and ensuring secure connections in the PostgreSQL database. - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - count(*) as total, - count(*) FILTER (WHERE ssl) as "on", - count(*) FILTER (WHERE NOT ssl) as "off" - FROM - pg_stat_ssl AS s, - pg_stat_activity AS a - WHERE - a.pid = s.pid - AND a.datname = current_database() - AND a.pid <> pg_backend_pid() - AND NOT (a.client_addr = '127.0.0.1' OR client_port = -1) - gauges: - - '*' + description: > + This metric collects SSL connection statistics from the `pg_stat_ssl` view. + It provides insights into the number of SSL connections, including those that are encrypted and those that are not. + This metric is useful for monitoring SSL usage and ensuring secure connections in the PostgreSQL database. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as total, + count(*) FILTER (WHERE ssl) as "on", + count(*) FILTER (WHERE NOT ssl) as "off" + FROM + pg_stat_ssl AS s, + pg_stat_activity AS a + WHERE + a.pid = s.pid + AND a.datname = current_database() + AND a.pid <> pg_backend_pid() + AND NOT (a.client_addr = '127.0.0.1' OR client_port = -1) + gauges: + - '*' stat_statements: - description: > - This metric collects statistics from the `pg_stat_statements` extension. - It provides insights into query performance, including execution times, block reads/writes, and user information. - This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. - init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; - sqls: - 11: |- - WITH q_data AS ( - SELECT - coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, - /* - if security conscious about exposing query texts replace the below expression with a dash ('-') OR - use the stat_statements_no_query_text metric instead, created specifically for this use case. - */ - array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, - sum(s.calls)::int8 AS calls, - round(sum(s.total_time)::numeric, 3)::double precision AS total_time, - sum(shared_blks_hit)::int8 AS shared_blks_hit, - sum(shared_blks_read)::int8 AS shared_blks_read, - sum(shared_blks_written)::int8 AS shared_blks_written, - sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, - sum(temp_blks_read)::int8 AS temp_blks_read, - sum(temp_blks_written)::int8 AS temp_blks_written, - round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, - round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, - max(query::varchar(8000)) AS query - FROM - pg_stat_statements s - WHERE - calls > 5 - AND total_time > 5 - AND dbid = ( - SELECT - oid - FROM - pg_database - WHERE - datname = current_database()) - AND NOT upper(s.query::varchar(50)) - LIKE ANY (ARRAY['DEALLOCATE%', - 'SET %', - 'RESET %', - 'BEGIN%', - 'BEGIN;', - 'COMMIT%', - 'END%', - 'ROLLBACK%', - 'SHOW%']) - GROUP BY - queryid - ) - SELECT (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, - b.tag_queryid, - b.users, - b.calls, - b.total_time, - b.shared_blks_hit, - b.shared_blks_read, - b.shared_blks_written, - b.shared_blks_dirtied, - b.temp_blks_read, - b.temp_blks_written, - b.blk_read_time, - b.blk_write_time, - ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) tag_query - FROM ( - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - total_time > 0 - ORDER BY - total_time DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - ORDER BY - calls DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_read > 0 - ORDER BY - shared_blks_read DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_written > 0 - ORDER BY - shared_blks_written DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_read > 0 - ORDER BY - temp_blks_read DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_written > 0 - ORDER BY - temp_blks_written DESC - LIMIT 100) a) b - 13: |- - WITH q_data AS ( - SELECT - coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, - /* - if security conscious about exposing query texts replace the below expression with a dash ('-') OR - use the stat_statements_no_query_text metric instead, created specifically for this use case. - */ - array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, - sum(s.calls)::int8 AS calls, - round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, - sum(shared_blks_hit)::int8 AS shared_blks_hit, - sum(shared_blks_read)::int8 AS shared_blks_read, - sum(shared_blks_written)::int8 AS shared_blks_written, - sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, - sum(temp_blks_read)::int8 AS temp_blks_read, - sum(temp_blks_written)::int8 AS temp_blks_written, - round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, - round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, - sum(wal_fpi)::int8 AS wal_fpi, - sum(wal_bytes)::int8 AS wal_bytes, - round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, - max(query::varchar(8000)) AS query - FROM - pg_stat_statements s - WHERE - calls > 5 - AND total_exec_time > 5 - AND dbid = ( - SELECT - oid - FROM - pg_database - WHERE - datname = current_database()) - AND NOT upper(s.query::varchar(50)) - LIKE ANY (ARRAY['DEALLOCATE%', - 'SET %', - 'RESET %', - 'BEGIN%', - 'BEGIN;', - 'COMMIT%', - 'END%', - 'ROLLBACK%', - 'SHOW%']) - GROUP BY - queryid - ) - select /* pgwatch_generated */ - (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, - b.tag_queryid, - b.users, - b.calls, - b.total_time, - b.shared_blks_hit, - b.shared_blks_read, - b.shared_blks_written, - b.shared_blks_dirtied, - b.temp_blks_read, - b.temp_blks_written, - b.blk_read_time, - b.blk_write_time, - b.wal_fpi, - b.wal_bytes, - b.total_plan_time, - ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query - FROM ( - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - total_time > 0 - ORDER BY - total_time DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - ORDER BY - calls DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_read > 0 - ORDER BY - shared_blks_read DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_written > 0 - ORDER BY - shared_blks_written DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_read > 0 - ORDER BY - temp_blks_read DESC - LIMIT 100) a - UNION - select /* pgwatch_generated */ - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_written > 0 - ORDER BY - temp_blks_written DESC - LIMIT 100) a) b - 15: |- - WITH /* pgwatch_generated */ q_data AS ( - SELECT - queryid::text AS tag_queryid, - /* - if security conscious about exposing query texts replace the below expression with a dash ('-') OR - use the stat_statements_no_query_text metric instead, created specifically for this use case. - */ - array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, - sum(s.calls)::int8 AS calls, - round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, - sum(shared_blks_hit)::int8 AS shared_blks_hit, - sum(shared_blks_read)::int8 AS shared_blks_read, - sum(shared_blks_written)::int8 AS shared_blks_written, - sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, - sum(temp_blks_read)::int8 AS temp_blks_read, - sum(temp_blks_written)::int8 AS temp_blks_written, - round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, - round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, - round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, - round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, - sum(wal_fpi)::int8 AS wal_fpi, - sum(wal_bytes)::int8 AS wal_bytes, - round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, - max(query::varchar(8000)) AS query - FROM - pg_stat_statements s - WHERE - calls > 5 - AND total_exec_time > 5 - AND dbid = ( - SELECT - oid - FROM - pg_database - WHERE - datname = current_database()) - AND NOT upper(s.query::varchar(50)) - LIKE ANY (ARRAY['DEALLOCATE%', - 'SET %', - 'RESET %', - 'BEGIN%', - 'BEGIN;', - 'COMMIT%', - 'END%', - 'ROLLBACK%', - 'SHOW%']) - GROUP BY - queryid - ) - SELECT - (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, - b.tag_queryid, - b.users, - b.calls, - b.total_time, - b.shared_blks_hit, - b.shared_blks_read, - b.shared_blks_written, - b.shared_blks_dirtied, - b.temp_blks_read, - b.temp_blks_written, - b.blk_read_time, - b.blk_write_time, - b.temp_blk_read_time, - b.temp_blk_write_time, - b.wal_fpi, - b.wal_bytes, - b.total_plan_time, - ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query - FROM ( - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - total_time > 0 - ORDER BY - total_time DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - ORDER BY - calls DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_read > 0 - ORDER BY - shared_blks_read DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_written > 0 - ORDER BY - shared_blks_written DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_read > 0 - ORDER BY - temp_blks_read DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_written > 0 - ORDER BY - temp_blks_written DESC - LIMIT 100) a) b - 17: |- - WITH /* pgwatch_generated */ q_data AS ( + description: > + This metric collects statistics from the `pg_stat_statements` extension. + It provides insights into query performance, including execution times, block reads/writes, and user information. + This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: |- + WITH q_data AS ( + SELECT + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) tag_query + FROM ( + SELECT + * + FROM ( SELECT - queryid::text AS tag_queryid, - /* - NB! if security conscious about exposing query texts replace the below expression with a dash ('-') OR - use the stat_statements_no_query_text metric instead, created specifically for this use case. - */ - array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, - sum(s.calls)::int8 AS calls, - round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, - sum(shared_blks_hit)::int8 AS shared_blks_hit, - sum(shared_blks_read)::int8 AS shared_blks_read, - sum(shared_blks_written)::int8 AS shared_blks_written, - sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, - sum(temp_blks_read)::int8 AS temp_blks_read, - sum(temp_blks_written)::int8 AS temp_blks_written, - round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, - round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, - round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, - round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, - sum(wal_fpi)::int8 AS wal_fpi, - sum(wal_bytes)::int8 AS wal_bytes, - round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, - max(query::varchar(8000)) AS query + * FROM - pg_stat_statements s + q_data WHERE - calls > 5 - AND total_exec_time > 5 - AND dbid = ( - SELECT - oid - FROM - pg_database - WHERE - datname = current_database()) - AND NOT upper(s.query::varchar(50)) - LIKE ANY (ARRAY['DEALLOCATE%', - 'SET %', - 'RESET %', - 'BEGIN%', - 'BEGIN;', - 'COMMIT%', - 'END%', - 'ROLLBACK%', - 'SHOW%']) - GROUP BY - queryid - ) - SELECT - (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, - b.tag_queryid, - b.users, - b.calls, - b.total_time, - b.shared_blks_hit, - b.shared_blks_read, - b.shared_blks_written, - b.shared_blks_dirtied, - b.temp_blks_read, - b.temp_blks_written, - b.blk_read_time, - b.blk_write_time, - b.temp_blk_read_time, - b.temp_blk_write_time, - b.wal_fpi, - b.wal_bytes, - b.total_plan_time, - ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query - FROM ( - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - total_time > 0 - ORDER BY - total_time DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - ORDER BY - calls DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_read > 0 - ORDER BY - shared_blks_read DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - shared_blks_written > 0 - ORDER BY - shared_blks_written DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( - SELECT - * - FROM - q_data - WHERE - temp_blks_read > 0 - ORDER BY - temp_blks_read DESC - LIMIT 100) a - UNION - SELECT - * - FROM ( + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 13: |- + WITH q_data AS ( + SELECT + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + select /* pgwatch_generated */ + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 15: |- + WITH /* pgwatch_generated */ q_data AS ( + SELECT + queryid::text AS tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.temp_blk_read_time, + b.temp_blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 17: |- + WITH /* pgwatch_generated */ q_data AS ( + SELECT + queryid::text AS tag_queryid, + /* + NB! if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, + round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( SELECT - * + oid FROM - q_data + pg_database WHERE - temp_blks_written > 0 - ORDER BY - temp_blks_written DESC - LIMIT 100) a) b; + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.temp_blk_read_time, + b.temp_blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b; stat_statements_calls: - description: > - This metric collects statistics from the `pg_stat_statements` extension, focusing on the number of calls and total execution time. - It provides insights into query performance, including execution times and call counts. - This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. - init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; - sqls: - 11: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - coalesce(sum(calls), 0)::int8 as calls, - coalesce(round(sum(total_time)::numeric, 3), 0)::float8 as total_time - from - pg_stat_statements - where - dbid = (select oid from pg_database where datname = current_database()) - 13: | - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - coalesce(sum(calls), 0)::int8 as calls, - coalesce(round(sum(total_exec_time)::numeric, 3), 0)::float8 as total_time, - round(sum(total_plan_time)::numeric, 3)::double precision as total_plan_time - from - pg_stat_statements - where - dbid = (select oid from pg_database where datname = current_database()) + description: > + This metric collects statistics from the `pg_stat_statements` extension, focusing on the number of calls and total execution time. + It provides insights into query performance, including execution times and call counts. + This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(sum(calls), 0)::int8 as calls, + coalesce(round(sum(total_time)::numeric, 3), 0)::float8 as total_time + from + pg_stat_statements + where + dbid = (select oid from pg_database where datname = current_database()) + 13: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(sum(calls), 0)::int8 as calls, + coalesce(round(sum(total_exec_time)::numeric, 3), 0)::float8 as total_time, + round(sum(total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements + where + dbid = (select oid from pg_database where datname = current_database()) stat_statements_no_query_text: - description: > - This metric collects statistics from the `pg_stat_statements` extension without including the query text. - It provides insights into query performance, including execution times, block reads/writes, and user information, - while omitting the actual query text for security or privacy reasons. - This metric is useful for monitoring query performance without exposing sensitive query details. - init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; - sqls: - 11: |- - with q_data as ( - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - '-'::text as tag_query, - coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, - array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, - sum(s.calls)::int8 as calls, - round(sum(s.total_time)::numeric, 3)::double precision as total_time, - sum(shared_blks_hit)::int8 as shared_blks_hit, - sum(shared_blks_read)::int8 as shared_blks_read, - sum(shared_blks_written)::int8 as shared_blks_written, - sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, - sum(temp_blks_read)::int8 as temp_blks_read, - sum(temp_blks_written)::int8 as temp_blks_written, - round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, - round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time - from - pg_stat_statements s - where - calls > 5 - and total_time > 0 - and dbid = (select oid from pg_database where datname = current_database()) - and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', - 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) - group by - queryid - ) - select * from ( - select - * - from - q_data - where - total_time > 0 - order by - total_time desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - order by - calls desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_read > 0 - order by - shared_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_written > 0 - order by - shared_blks_written desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_read > 0 - order by - temp_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_written > 0 - order by - temp_blks_written desc - limit 100 - ) a - 13: |- - with q_data as ( - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - '-' as tag_query, - coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, - array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, - sum(s.calls)::int8 as calls, - round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, - sum(shared_blks_hit)::int8 as shared_blks_hit, - sum(shared_blks_read)::int8 as shared_blks_read, - sum(shared_blks_written)::int8 as shared_blks_written, - sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, - sum(temp_blks_read)::int8 as temp_blks_read, - sum(temp_blks_written)::int8 as temp_blks_written, - round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, - round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, - sum(wal_fpi)::int8 as wal_fpi, - sum(wal_bytes)::int8 as wal_bytes, - round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time - from - pg_stat_statements s - where - calls > 5 - and total_exec_time > 0 - and dbid = (select oid from pg_database where datname = current_database()) - and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', - 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) - group by - queryid - ) - select * from ( - select - * - from - q_data - where - total_time > 0 - order by - total_time desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - order by - calls desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_read > 0 - order by - shared_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_written > 0 - order by - shared_blks_written desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_read > 0 - order by - temp_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_written > 0 - order by - temp_blks_written desc - limit 100 - ) a - 15: |- - with /* pgwatch_generated */ q_data as ( - select - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - '-' as tag_query, - queryid::text as tag_queryid, - array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, - sum(s.calls)::int8 as calls, - round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, - sum(shared_blks_hit)::int8 as shared_blks_hit, - sum(shared_blks_read)::int8 as shared_blks_read, - sum(shared_blks_written)::int8 as shared_blks_written, - sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, - sum(temp_blks_read)::int8 as temp_blks_read, - sum(temp_blks_written)::int8 as temp_blks_written, - round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, - round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, - round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, - round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, - sum(wal_fpi) as wal_fpi, - sum(wal_bytes) as wal_bytes, - round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time - from - pg_stat_statements s - where - calls > 5 - and total_exec_time > 0 - and dbid = (select oid from pg_database where datname = current_database()) - and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', - 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) - group by - queryid - ) - select * from ( - select - * - from - q_data - where - total_time > 0 - order by - total_time desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - order by - calls desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_read > 0 - order by - shared_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_written > 0 - order by - shared_blks_written desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_read > 0 - order by - temp_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_written > 0 - order by - temp_blks_written desc - limit 100 - ) a - 17: |- - with /* pgwatch_generated */ q_data as ( - select - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - '-' as tag_query, - queryid::text as tag_queryid, - array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, - sum(s.calls)::int8 as calls, - round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, - sum(shared_blks_hit)::int8 as shared_blks_hit, - sum(shared_blks_read)::int8 as shared_blks_read, - sum(shared_blks_written)::int8 as shared_blks_written, - sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, - sum(temp_blks_read)::int8 as temp_blks_read, - sum(temp_blks_written)::int8 as temp_blks_written, - round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, - round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, - round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, - round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, - sum(wal_fpi)::int8 as wal_fpi, - sum(wal_bytes)::int8 as wal_bytes, - round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time - from - pg_stat_statements s - where - calls > 5 - and total_exec_time > 0 - and dbid = (select oid from pg_database where datname = current_database()) - and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', - 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) - group by - queryid - ) - select * from ( - select - * - from - q_data - where - total_time > 0 - order by - total_time desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - order by - calls desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_read > 0 - order by - shared_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - shared_blks_written > 0 - order by - shared_blks_written desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_read > 0 - order by - temp_blks_read desc - limit 100 - ) a - union - select * from ( - select - * - from - q_data - where - temp_blks_written > 0 - order by - temp_blks_written desc - limit 100 - ) a; - metric_storage_name: stat_statements + description: > + This metric collects statistics from the `pg_stat_statements` extension without including the query text. + It provides insights into query performance, including execution times, block reads/writes, and user information, + while omitting the actual query text for security or privacy reasons. + This metric is useful for monitoring query performance without exposing sensitive query details. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: |- + with q_data as ( + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-'::text as tag_query, + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time + from + pg_stat_statements s + where + calls > 5 + and total_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 13: |- + with q_data as ( + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 15: |- + with /* pgwatch_generated */ q_data as ( + select + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + queryid::text as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, + sum(wal_fpi) as wal_fpi, + sum(wal_bytes) as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 17: |- + with /* pgwatch_generated */ q_data as ( + select + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + queryid::text as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, + round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a; + metric_storage_name: stat_statements subscription_stats: - description: > - This metric collects statistics from the `pg_stat_subscription_stats` view, which provides information about the status of logical replication subscriptions. - It includes details such as the number of apply and sync errors, which can help in monitoring the health of logical replication. - sqls: - 15: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - subname::text as tag_subname, - apply_error_count, - sync_error_count - from - pg_stat_subscription_stats + description: > + This metric collects statistics from the `pg_stat_subscription_stats` view, which provides information about the status of logical replication subscriptions. + It includes details such as the number of apply and sync errors, which can help in monitoring the health of logical replication. + sqls: + 15: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + subname::text as tag_subname, + apply_error_count, + sync_error_count + from + pg_stat_subscription_stats table_bloat_approx_stattuple: - description: > - This metric collects approximate table bloat statistics using the `pgstattuple_approx` function. - It provides insights into the amount of free space and dead tuples in tables, which can help in identifying bloat issues. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - quote_ident(n.nspname)||'.'||quote_ident(c.relname) as tag_full_table_name, - approx_free_percent, - approx_free_space as approx_free_space_b, - approx_tuple_count, - dead_tuple_percent, - dead_tuple_len as dead_tuple_len_b - from + description: > + This metric collects approximate table bloat statistics using the `pgstattuple_approx` function. + It provides insights into the amount of free space and dead tuples in tables, which can help in identifying bloat issues. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(n.nspname)||'.'||quote_ident(c.relname) as tag_full_table_name, + approx_free_percent, + approx_free_space as approx_free_space_b, + approx_tuple_count, + dead_tuple_percent, + dead_tuple_len as dead_tuple_len_b + from + pg_class c + join lateral pgstattuple_approx(c.oid) st on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables, + join pg_namespace n on n.oid = c.relnamespace + where + relkind in ('r', 'm') + and c.relpages >= 128 -- tables > 1mb + and not n.nspname like any (array[E'pg\\_%', 'information_schema']) + node_status: primary + gauges: + - '*' + table_bloat_approx_summary: + description: > + This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. + It aggregates data from multiple tables to give an overview of bloat across the database. + sqls: + 11: |- + /* accessing pgstattuple_approx directly requires superuser or pg_stat_scan_tables/pg_monitor builtin roles or + execute grant on pgstattuple_approx(regclass) + */ + with table_bloat_approx as ( + select + avg(approx_free_percent)::double precision as approx_free_percent, + sum(approx_free_space)::double precision as approx_free_space, + avg(dead_tuple_percent)::double precision as dead_tuple_percent, + sum(dead_tuple_len)::double precision as dead_tuple_len + from pg_class c - join lateral pgstattuple_approx(c.oid) st on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables, - join pg_namespace n on n.oid = c.relnamespace - where + join + pg_namespace n on n.oid = c.relnamespace + join lateral pgstattuple_approx(c.oid) on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables + where relkind in ('r', 'm') - and c.relpages >= 128 -- tables > 1mb - and not n.nspname like any (array[E'pg\\_%', 'information_schema']) - node_status: primary - gauges: - - '*' - table_bloat_approx_summary: - description: > - This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. - It aggregates data from multiple tables to give an overview of bloat across the database. - sqls: - 11: |- - /* accessing pgstattuple_approx directly requires superuser or pg_stat_scan_tables/pg_monitor builtin roles or - execute grant on pgstattuple_approx(regclass) - */ - with table_bloat_approx as ( - select - avg(approx_free_percent)::double precision as approx_free_percent, - sum(approx_free_space)::double precision as approx_free_space, - avg(dead_tuple_percent)::double precision as dead_tuple_percent, - sum(dead_tuple_len)::double precision as dead_tuple_len - from - pg_class c - join - pg_namespace n on n.oid = c.relnamespace - join lateral pgstattuple_approx(c.oid) on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables - where - relkind in ('r', 'm') - and c.relpages >= 128 -- tables >1mb - and not n.nspname != 'information_schema' - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - approx_free_percent, - approx_free_space as approx_free_space_b, - dead_tuple_percent, - dead_tuple_len as dead_tuple_len_b - from - table_bloat_approx - where - approx_free_space > 0 - gauges: - - '*' + and c.relpages >= 128 -- tables >1mb + and not n.nspname != 'information_schema' + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + approx_free_percent, + approx_free_space as approx_free_space_b, + dead_tuple_percent, + dead_tuple_len as dead_tuple_len_b + from + table_bloat_approx + where + approx_free_space > 0 + gauges: + - '*' table_bloat_approx_summary_sql: - description: > - This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. - It aggregates data from multiple tables to give an overview of bloat across the database. - sqls: - 11: | - WITH q_bloat AS ( - SELECT - quote_ident(schemaname)||'.'||quote_ident(tblname) as full_table_name, - bloat_ratio as approx_bloat_percent, - bloat_size as approx_bloat_bytes, - fillfactor - FROM ( - - /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. - * This query is compatible with PostgreSQL 9.0 and more - */ - SELECT current_database(), - schemaname, - tblname, - bs * tblpages AS real_size, - (tblpages - est_tblpages) * bs AS extra_size, - CASE - WHEN tblpages - est_tblpages > 0 - THEN 100 * (tblpages - est_tblpages) / tblpages::float - ELSE 0 - END AS extra_ratio, - fillfactor, - CASE - WHEN tblpages - est_tblpages_ff > 0 - THEN (tblpages - est_tblpages_ff) * bs - ELSE 0 - END AS bloat_size, - CASE - WHEN tblpages - est_tblpages_ff > 0 - THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float - ELSE 0 - END AS bloat_ratio, - is_na - -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag - FROM ( - SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, - ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + - ceil(toasttuples / 4) AS est_tblpages_ff, - tblpages, - fillfactor, - bs, - tblid, - schemaname, - tblname, - heappages, - toastpages, - is_na - -- , stattuple.pgstattuple(tblid) AS pst - FROM ( - SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) - - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END - - CASE - WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma - ELSE ceil(tpl_data_size)::int % ma END - ) AS tpl_size, - bs - page_hdr AS size_per_block, - (heappages + toastpages) AS tblpages, - heappages, - toastpages, - reltuples, - toasttuples, - bs, - page_hdr, - tblid, - schemaname, - tblname, - fillfactor, - is_na - FROM ( - SELECT tbl.oid AS tblid, - ns.nspname AS schemaname, - tbl.relname AS tblname, - tbl.reltuples, - tbl.relpages AS heappages, - coalesce(toast.relpages, 0) AS toastpages, - coalesce(toast.reltuples, 0) AS toasttuples, - coalesce(substring( - array_to_string(tbl.reloptions, ' ') - FROM 'fillfactor=([0-9]+)')::smallint, - 100) AS fillfactor, - current_setting('block_size')::numeric AS bs, - CASE - WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' - THEN 8 - ELSE 4 END AS ma, - 24 AS page_hdr, - 23 + CASE - WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 - ELSE 0::int END - + - CASE WHEN tbl.relhasoids THEN 4 ELSE 0 END AS tpl_hdr_size, - sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, - bool_or(att.atttypid = 'pg_catalog.name'::regtype) - OR count(att.attname) <> count(s.attname) AS is_na - FROM pg_attribute AS att - JOIN pg_class AS tbl ON att.attrelid = tbl.oid - JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace - LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname - AND s.tablename = tbl.relname AND s.inherited = false AND - s.attname = att.attname - LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid - WHERE att.attnum > 0 - AND NOT att.attisdropped - AND tbl.relkind IN ('r', 'm') - AND ns.nspname != 'information_schema' - GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, tbl.relhasoids - ORDER BY 2, 3 - ) AS s - ) AS s2 - ) AS s3 - -- WHERE NOT is_na - ) s4 - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, - ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage - 12: | - WITH q_bloat AS ( - SELECT quote_ident(schemaname) || '.' || quote_ident(tblname) as full_table_name, - bloat_ratio as approx_bloat_percent, - bloat_size as approx_bloat_bytes, - fillfactor - FROM ( - - /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. - * This query is compatible with PostgreSQL 9.0 and more - */ - SELECT current_database(), - schemaname, - tblname, - bs * tblpages AS real_size, - (tblpages - est_tblpages) * bs AS extra_size, - CASE - WHEN tblpages > 0 AND tblpages - est_tblpages > 0 - THEN 100 * (tblpages - est_tblpages) / tblpages::float - ELSE 0 - END AS extra_ratio, - fillfactor, - CASE - WHEN tblpages - est_tblpages_ff > 0 - THEN (tblpages - est_tblpages_ff) * bs - ELSE 0 - END AS bloat_size, - CASE - WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0 - THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float - ELSE 0 - END AS bloat_ratio, - is_na - -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag - FROM ( - SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, - ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + - ceil(toasttuples / 4) AS est_tblpages_ff, - tblpages, - fillfactor, - bs, - tblid, - schemaname, - tblname, - heappages, - toastpages, - is_na - -- , stattuple.pgstattuple(tblid) AS pst - FROM ( - SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) - - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END - - CASE - WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma - ELSE ceil(tpl_data_size)::int % ma END - ) AS tpl_size, - bs - page_hdr AS size_per_block, - (heappages + toastpages) AS tblpages, - heappages, - toastpages, - reltuples, - toasttuples, - bs, - page_hdr, - tblid, - schemaname, - tblname, - fillfactor, - is_na - FROM ( - SELECT tbl.oid AS tblid, - ns.nspname AS schemaname, - tbl.relname AS tblname, - tbl.reltuples, - tbl.relpages AS heappages, - coalesce(toast.relpages, 0) AS toastpages, - coalesce(toast.reltuples, 0) AS toasttuples, - coalesce(substring( - array_to_string(tbl.reloptions, ' ') - FROM 'fillfactor=([0-9]+)')::smallint, - 100) AS fillfactor, - current_setting('block_size')::numeric AS bs, - CASE - WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' - THEN 8 - ELSE 4 END AS ma, - 24 AS page_hdr, - 23 + CASE - WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 - ELSE 0::int END - + - 0 AS tpl_hdr_size, - sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, - bool_or(att.atttypid = 'pg_catalog.name'::regtype) - OR - count(att.attname) <> count(s.attname) AS is_na - FROM pg_attribute AS att - JOIN pg_class AS tbl ON att.attrelid = tbl.oid - JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace - LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname - AND s.tablename = tbl.relname AND s.inherited = false AND - s.attname = att.attname - LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid - WHERE att.attnum > 0 - AND NOT att.attisdropped - AND tbl.relkind IN ('r', 'm') - AND ns.nspname != 'information_schema' - GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 - ORDER BY 2, 3 - ) AS s - ) AS s2 - ) AS s3 - -- WHERE NOT is_na - ) s4 - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, - ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage - gauges: - - '*' + description: > + This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. + It aggregates data from multiple tables to give an overview of bloat across the database. + sqls: + 11: | + WITH q_bloat AS ( + SELECT + quote_ident(schemaname)||'.'||quote_ident(tblname) as full_table_name, + bloat_ratio as approx_bloat_percent, + bloat_size as approx_bloat_bytes, + fillfactor + FROM ( + + /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. + * This query is compatible with PostgreSQL 9.0 and more + */ + SELECT current_database(), + schemaname, + tblname, + bs * tblpages AS real_size, + (tblpages - est_tblpages) * bs AS extra_size, + CASE + WHEN tblpages - est_tblpages > 0 + THEN 100 * (tblpages - est_tblpages) / tblpages::float + ELSE 0 + END AS extra_ratio, + fillfactor, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages - est_tblpages_ff) * bs + ELSE 0 + END AS bloat_size, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float + ELSE 0 + END AS bloat_ratio, + is_na + -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag + FROM ( + SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, + ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + + ceil(toasttuples / 4) AS est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + -- , stattuple.pgstattuple(tblid) AS pst + FROM ( + SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END + - CASE + WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma + ELSE ceil(tpl_data_size)::int % ma END + ) AS tpl_size, + bs - page_hdr AS size_per_block, + (heappages + toastpages) AS tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + FROM ( + SELECT tbl.oid AS tblid, + ns.nspname AS schemaname, + tbl.relname AS tblname, + tbl.reltuples, + tbl.relpages AS heappages, + coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, + 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' + THEN 8 + ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE + WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 + ELSE 0::int END + + + CASE WHEN tbl.relhasoids THEN 4 ELSE 0 END AS tpl_hdr_size, + sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR count(att.attname) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname + AND s.tablename = tbl.relname AND s.inherited = false AND + s.attname = att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE att.attnum > 0 + AND NOT att.attisdropped + AND tbl.relkind IN ('r', 'm') + AND ns.nspname != 'information_schema' + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, tbl.relhasoids + ORDER BY 2, 3 + ) AS s + ) AS s2 + ) AS s3 + -- WHERE NOT is_na + ) s4 + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, + ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage + 12: | + WITH q_bloat AS ( + SELECT quote_ident(schemaname) || '.' || quote_ident(tblname) as full_table_name, + bloat_ratio as approx_bloat_percent, + bloat_size as approx_bloat_bytes, + fillfactor + FROM ( + + /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. + * This query is compatible with PostgreSQL 9.0 and more + */ + SELECT current_database(), + schemaname, + tblname, + bs * tblpages AS real_size, + (tblpages - est_tblpages) * bs AS extra_size, + CASE + WHEN tblpages > 0 AND tblpages - est_tblpages > 0 + THEN 100 * (tblpages - est_tblpages) / tblpages::float + ELSE 0 + END AS extra_ratio, + fillfactor, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages - est_tblpages_ff) * bs + ELSE 0 + END AS bloat_size, + CASE + WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float + ELSE 0 + END AS bloat_ratio, + is_na + -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag + FROM ( + SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, + ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + + ceil(toasttuples / 4) AS est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + -- , stattuple.pgstattuple(tblid) AS pst + FROM ( + SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END + - CASE + WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma + ELSE ceil(tpl_data_size)::int % ma END + ) AS tpl_size, + bs - page_hdr AS size_per_block, + (heappages + toastpages) AS tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + FROM ( + SELECT tbl.oid AS tblid, + ns.nspname AS schemaname, + tbl.relname AS tblname, + tbl.reltuples, + tbl.relpages AS heappages, + coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, + 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' + THEN 8 + ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE + WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 + ELSE 0::int END + + + 0 AS tpl_hdr_size, + sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR + count(att.attname) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname + AND s.tablename = tbl.relname AND s.inherited = false AND + s.attname = att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE att.attnum > 0 + AND NOT att.attisdropped + AND tbl.relkind IN ('r', 'm') + AND ns.nspname != 'information_schema' + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + ORDER BY 2, 3 + ) AS s + ) AS s2 + ) AS s3 + -- WHERE NOT is_na + ) s4 + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, + ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage + gauges: + - '*' table_hashes: - description: > - This metric collects hashes of table definitions to detect changes in the schema. - It uses the `pg_catalog.pg_tables` view to gather information about tables and their columns. - The hash is computed based on the table schema, name, and column definitions. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - quote_ident(table_schema)||'.'||quote_ident(table_name) as tag_table, - md5((array_agg((c.*)::text order by ordinal_position))::text) - from ( - SELECT current_database()::information_schema.sql_identifier AS table_catalog, - nc.nspname::information_schema.sql_identifier AS table_schema, - c.relname::information_schema.sql_identifier AS table_name, - a.attname::information_schema.sql_identifier AS column_name, - a.attnum::information_schema.cardinal_number AS ordinal_position, - pg_get_expr(ad.adbin, ad.adrelid)::information_schema.character_data AS column_default, - CASE - WHEN a.attnotnull OR t.typtype = 'd'::"char" AND t.typnotnull THEN 'NO'::text - ELSE 'YES'::text - END::information_schema.yes_or_no AS is_nullable, - CASE - WHEN t.typtype = 'd'::"char" THEN - CASE - WHEN bt.typelem <> 0::oid AND bt.typlen = '-1'::integer THEN 'ARRAY'::text - WHEN nbt.nspname = 'pg_catalog'::name THEN format_type(t.typbasetype, NULL::integer) - ELSE 'USER-DEFINED'::text - END - ELSE - CASE - WHEN t.typelem <> 0::oid AND t.typlen = '-1'::integer THEN 'ARRAY'::text - WHEN nt.nspname = 'pg_catalog'::name THEN format_type(a.atttypid, NULL::integer) - ELSE 'USER-DEFINED'::text - END - END::information_schema.character_data AS data_type, - information_schema._pg_char_max_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_maximum_length, - information_schema._pg_char_octet_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_octet_length, - information_schema._pg_numeric_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision, - information_schema._pg_numeric_precision_radix(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision_radix, - information_schema._pg_numeric_scale(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_scale, - information_schema._pg_datetime_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS datetime_precision, - information_schema._pg_interval_type(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.character_data AS interval_type, - NULL::integer::information_schema.cardinal_number AS interval_precision, - NULL::character varying::information_schema.sql_identifier AS character_set_catalog, - NULL::character varying::information_schema.sql_identifier AS character_set_schema, - NULL::character varying::information_schema.sql_identifier AS character_set_name, - CASE - WHEN nco.nspname IS NOT NULL THEN current_database() - ELSE NULL::name - END::information_schema.sql_identifier AS collation_catalog, - nco.nspname::information_schema.sql_identifier AS collation_schema, - co.collname::information_schema.sql_identifier AS collation_name, - CASE - WHEN t.typtype = 'd'::"char" THEN current_database() - ELSE NULL::name - END::information_schema.sql_identifier AS domain_catalog, - CASE - WHEN t.typtype = 'd'::"char" THEN nt.nspname - ELSE NULL::name - END::information_schema.sql_identifier AS domain_schema, - CASE - WHEN t.typtype = 'd'::"char" THEN t.typname - ELSE NULL::name - END::information_schema.sql_identifier AS domain_name, - current_database()::information_schema.sql_identifier AS udt_catalog, - COALESCE(nbt.nspname, nt.nspname)::information_schema.sql_identifier AS udt_schema, - COALESCE(bt.typname, t.typname)::information_schema.sql_identifier AS udt_name, - NULL::character varying::information_schema.sql_identifier AS scope_catalog, - NULL::character varying::information_schema.sql_identifier AS scope_schema, - NULL::character varying::information_schema.sql_identifier AS scope_name, - NULL::integer::information_schema.cardinal_number AS maximum_cardinality, - a.attnum::information_schema.sql_identifier AS dtd_identifier, - 'NO'::character varying::information_schema.yes_or_no AS is_self_referencing, - 'NO'::character varying::information_schema.yes_or_no AS is_identity, - NULL::character varying::information_schema.character_data AS identity_generation, - NULL::character varying::information_schema.character_data AS identity_start, - NULL::character varying::information_schema.character_data AS identity_increment, - NULL::character varying::information_schema.character_data AS identity_maximum, - NULL::character varying::information_schema.character_data AS identity_minimum, - NULL::character varying::information_schema.yes_or_no AS identity_cycle, - 'NEVER'::character varying::information_schema.character_data AS is_generated, - NULL::character varying::information_schema.character_data AS generation_expression, - CASE - WHEN c.relkind = 'r'::"char" OR (c.relkind = ANY (ARRAY['v'::"char", 'f'::"char"])) AND pg_column_is_updatable(c.oid::regclass, a.attnum, false) THEN 'YES'::text - ELSE 'NO'::text - END::information_schema.yes_or_no AS is_updatable - FROM pg_attribute a - LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum - JOIN (pg_class c - JOIN pg_namespace nc ON c.relnamespace = nc.oid) ON a.attrelid = c.oid - JOIN (pg_type t - JOIN pg_namespace nt ON t.typnamespace = nt.oid) ON a.atttypid = t.oid - LEFT JOIN (pg_type bt - JOIN pg_namespace nbt ON bt.typnamespace = nbt.oid) ON t.typtype = 'd'::"char" AND t.typbasetype = bt.oid - LEFT JOIN (pg_collation co - JOIN pg_namespace nco ON co.collnamespace = nco.oid) ON a.attcollation = co.oid AND (nco.nspname <> 'pg_catalog'::name OR co.collname <> 'default'::name) - WHERE NOT pg_is_other_temp_schema(nc.oid) AND a.attnum > 0 AND NOT a.attisdropped AND (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'f'::"char"])) - - ) c - where - not table_schema like any (array[E'pg\\_%', 'information_schema']) - group by - table_schema, table_name - order by - table_schema, table_name + description: > + This metric collects hashes of table definitions to detect changes in the schema. + It uses the `pg_catalog.pg_tables` view to gather information about tables and their columns. + The hash is computed based on the table schema, name, and column definitions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(table_schema)||'.'||quote_ident(table_name) as tag_table, + md5((array_agg((c.*)::text order by ordinal_position))::text) + from ( + SELECT current_database()::information_schema.sql_identifier AS table_catalog, + nc.nspname::information_schema.sql_identifier AS table_schema, + c.relname::information_schema.sql_identifier AS table_name, + a.attname::information_schema.sql_identifier AS column_name, + a.attnum::information_schema.cardinal_number AS ordinal_position, + pg_get_expr(ad.adbin, ad.adrelid)::information_schema.character_data AS column_default, + CASE + WHEN a.attnotnull OR t.typtype = 'd'::"char" AND t.typnotnull THEN 'NO'::text + ELSE 'YES'::text + END::information_schema.yes_or_no AS is_nullable, + CASE + WHEN t.typtype = 'd'::"char" THEN + CASE + WHEN bt.typelem <> 0::oid AND bt.typlen = '-1'::integer THEN 'ARRAY'::text + WHEN nbt.nspname = 'pg_catalog'::name THEN format_type(t.typbasetype, NULL::integer) + ELSE 'USER-DEFINED'::text + END + ELSE + CASE + WHEN t.typelem <> 0::oid AND t.typlen = '-1'::integer THEN 'ARRAY'::text + WHEN nt.nspname = 'pg_catalog'::name THEN format_type(a.atttypid, NULL::integer) + ELSE 'USER-DEFINED'::text + END + END::information_schema.character_data AS data_type, + information_schema._pg_char_max_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_maximum_length, + information_schema._pg_char_octet_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_octet_length, + information_schema._pg_numeric_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision, + information_schema._pg_numeric_precision_radix(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision_radix, + information_schema._pg_numeric_scale(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_scale, + information_schema._pg_datetime_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS datetime_precision, + information_schema._pg_interval_type(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.character_data AS interval_type, + NULL::integer::information_schema.cardinal_number AS interval_precision, + NULL::character varying::information_schema.sql_identifier AS character_set_catalog, + NULL::character varying::information_schema.sql_identifier AS character_set_schema, + NULL::character varying::information_schema.sql_identifier AS character_set_name, + CASE + WHEN nco.nspname IS NOT NULL THEN current_database() + ELSE NULL::name + END::information_schema.sql_identifier AS collation_catalog, + nco.nspname::information_schema.sql_identifier AS collation_schema, + co.collname::information_schema.sql_identifier AS collation_name, + CASE + WHEN t.typtype = 'd'::"char" THEN current_database() + ELSE NULL::name + END::information_schema.sql_identifier AS domain_catalog, + CASE + WHEN t.typtype = 'd'::"char" THEN nt.nspname + ELSE NULL::name + END::information_schema.sql_identifier AS domain_schema, + CASE + WHEN t.typtype = 'd'::"char" THEN t.typname + ELSE NULL::name + END::information_schema.sql_identifier AS domain_name, + current_database()::information_schema.sql_identifier AS udt_catalog, + COALESCE(nbt.nspname, nt.nspname)::information_schema.sql_identifier AS udt_schema, + COALESCE(bt.typname, t.typname)::information_schema.sql_identifier AS udt_name, + NULL::character varying::information_schema.sql_identifier AS scope_catalog, + NULL::character varying::information_schema.sql_identifier AS scope_schema, + NULL::character varying::information_schema.sql_identifier AS scope_name, + NULL::integer::information_schema.cardinal_number AS maximum_cardinality, + a.attnum::information_schema.sql_identifier AS dtd_identifier, + 'NO'::character varying::information_schema.yes_or_no AS is_self_referencing, + 'NO'::character varying::information_schema.yes_or_no AS is_identity, + NULL::character varying::information_schema.character_data AS identity_generation, + NULL::character varying::information_schema.character_data AS identity_start, + NULL::character varying::information_schema.character_data AS identity_increment, + NULL::character varying::information_schema.character_data AS identity_maximum, + NULL::character varying::information_schema.character_data AS identity_minimum, + NULL::character varying::information_schema.yes_or_no AS identity_cycle, + 'NEVER'::character varying::information_schema.character_data AS is_generated, + NULL::character varying::information_schema.character_data AS generation_expression, + CASE + WHEN c.relkind = 'r'::"char" OR (c.relkind = ANY (ARRAY['v'::"char", 'f'::"char"])) AND pg_column_is_updatable(c.oid::regclass, a.attnum, false) THEN 'YES'::text + ELSE 'NO'::text + END::information_schema.yes_or_no AS is_updatable + FROM pg_attribute a + LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum + JOIN (pg_class c + JOIN pg_namespace nc ON c.relnamespace = nc.oid) ON a.attrelid = c.oid + JOIN (pg_type t + JOIN pg_namespace nt ON t.typnamespace = nt.oid) ON a.atttypid = t.oid + LEFT JOIN (pg_type bt + JOIN pg_namespace nbt ON bt.typnamespace = nbt.oid) ON t.typtype = 'd'::"char" AND t.typbasetype = bt.oid + LEFT JOIN (pg_collation co + JOIN pg_namespace nco ON co.collnamespace = nco.oid) ON a.attcollation = co.oid AND (nco.nspname <> 'pg_catalog'::name OR co.collname <> 'default'::name) + WHERE NOT pg_is_other_temp_schema(nc.oid) AND a.attnum > 0 AND NOT a.attisdropped AND (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'f'::"char"])) + + ) c + where + not table_schema like any (array[E'pg\\_%', 'information_schema']) + group by + table_schema, table_name + order by + table_schema, table_name table_io_stats: - description: > - This metric collects I/O statistics for tables, including heap and index block reads and hits. - It provides insights into the performance of table access patterns. - sqls: - 11: |- - select * from ( - with recursive - q_root_part as ( - select c.oid, - c.relkind, - n.nspname root_schema, - c.relname root_relname - from pg_class c - join pg_namespace n on n.oid = c.relnamespace - where relkind in ('p', 'r') - and relpersistence != 't' - and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) - and not exists(select * from pg_inherits where inhrelid = c.oid) - and exists(select * from pg_inherits where inhparent = c.oid) - ), - q_parts (relid, relkind, level, root) as ( - select oid, relkind, 1, oid - from q_root_part - union all - select inhrelid, c.relkind, level + 1, q.root - from pg_inherits i - join q_parts q on inhparent = q.relid - join pg_class c on c.oid = i.inhrelid - ), - q_tstats as ( - SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - relid, - schemaname::text as tag_schema, - relname::text as tag_table_name, - quote_ident(schemaname) || '.' || quote_ident(relname) as tag_table_full_name, - heap_blks_read, - heap_blks_hit, - idx_blks_read, - idx_blks_hit, - toast_blks_read, - toast_blks_hit, - tidx_blks_read, - tidx_blks_hit - FROM pg_statio_user_tables - WHERE NOT schemaname LIKE E'pg\\_temp%' - AND (heap_blks_read > 0 OR heap_blks_hit > 0 OR idx_blks_read > 0 OR idx_blks_hit > 0 OR - tidx_blks_read > 0 OR - tidx_blks_hit > 0) - ) - select epoch_ns, - tag_schema, - tag_table_name, - tag_table_full_name, - 0 as is_part_root, - heap_blks_read, - heap_blks_hit, - idx_blks_read, - idx_blks_hit, - toast_blks_read, - toast_blks_hit, - tidx_blks_read, - tidx_blks_hit - from q_tstats - where not tag_schema like E'\\_timescaledb%' - and not exists (select * from q_root_part where oid = q_tstats.relid) - - union all - - select * - from ( - select epoch_ns, - quote_ident(qr.root_schema) as tag_schema, - quote_ident(qr.root_relname) as tag_table_name, - quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, - 1 as is_part_root, - sum(heap_blks_read)::int8, - sum(heap_blks_hit)::int8, - sum(idx_blks_read)::int8, - sum(idx_blks_hit)::int8, - sum(toast_blks_read)::int8, - sum(toast_blks_hit)::int8, - sum(tidx_blks_read)::int8, - sum(tidx_blks_hit)::int8 - from q_tstats ts - join q_parts qp on qp.relid = ts.relid - join q_root_part qr on qr.oid = qp.root - group by 1, 2, 3, 4 - ) x - ) y - order by - coalesce(heap_blks_read, 0) + - coalesce(heap_blks_hit, 0) + - coalesce(idx_blks_read, 0) + - coalesce(idx_blks_hit, 0) + - coalesce(toast_blks_read, 0) + - coalesce(toast_blks_hit, 0) + - coalesce(tidx_blks_read, 0) + - coalesce(tidx_blks_hit, 0) - desc limit 300 + description: > + This metric collects I/O statistics for tables, including heap and index block reads and hits. + It provides insights into the performance of table access patterns. + sqls: + 11: |- + select * from ( + with recursive + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, + schemaname::text as tag_schema, + relname::text as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(relname) as tag_table_full_name, + heap_blks_read, + heap_blks_hit, + idx_blks_read, + idx_blks_hit, + toast_blks_read, + toast_blks_hit, + tidx_blks_read, + tidx_blks_hit + FROM pg_statio_user_tables + WHERE NOT schemaname LIKE E'pg\\_temp%' + AND (heap_blks_read > 0 OR heap_blks_hit > 0 OR idx_blks_read > 0 OR idx_blks_hit > 0 OR + tidx_blks_read > 0 OR + tidx_blks_hit > 0) + ) + select epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + heap_blks_read, + heap_blks_hit, + idx_blks_read, + idx_blks_hit, + toast_blks_read, + toast_blks_hit, + tidx_blks_read, + tidx_blks_hit + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * + from ( + select epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(heap_blks_read)::int8, + sum(heap_blks_hit)::int8, + sum(idx_blks_read)::int8, + sum(idx_blks_hit)::int8, + sum(toast_blks_read)::int8, + sum(toast_blks_hit)::int8, + sum(tidx_blks_read)::int8, + sum(tidx_blks_hit)::int8 + from q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by 1, 2, 3, 4 + ) x + ) y + order by + coalesce(heap_blks_read, 0) + + coalesce(heap_blks_hit, 0) + + coalesce(idx_blks_read, 0) + + coalesce(idx_blks_hit, 0) + + coalesce(toast_blks_read, 0) + + coalesce(toast_blks_hit, 0) + + coalesce(tidx_blks_read, 0) + + coalesce(tidx_blks_hit, 0) + desc limit 300 table_stats: - description: > - This metric collects statistics about user tables, including size, vacuum status, and transaction freeze age. - It provides insights into the health and performance of tables in the database. - sqls: - 11: |- - with recursive - q_root_part as ( - select c.oid, - c.relkind, - n.nspname root_schema, - c.relname root_relname - from pg_class c - join pg_namespace n on n.oid = c.relnamespace - where relkind in ('p', 'r') - and relpersistence != 't' - and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) - and not exists(select * from pg_inherits where inhrelid = c.oid) - and exists(select * from pg_inherits where inhparent = c.oid) - ), - q_parts (relid, relkind, level, root) as ( - select oid, relkind, 1, oid - from q_root_part - union all - select inhrelid, c.relkind, level + 1, q.root - from pg_inherits i - join q_parts q on inhparent = q.relid - join pg_class c on c.oid = i.inhrelid - ), - q_tstats as ( - select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - relid, -- not sent to final output - quote_ident(schemaname) as tag_schema, - quote_ident(ut.relname) as tag_table_name, - quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, - pg_table_size(relid) as table_size_b, - abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - pg_total_relation_size(relid) as total_relation_size_b, - case when reltoastrelid != 0 then pg_total_relation_size(reltoastrelid) else 0::int8 end as toast_size_b, - (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, - (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, - case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, - seq_scan, - seq_tup_read, - coalesce(idx_scan, 0) as idx_scan, - coalesce(idx_tup_fetch, 0) as idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count, - case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age - from pg_stat_user_tables ut - join - pg_class c on c.oid = ut.relid - where - -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait - not exists(select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') - and c.relpersistence != 't' -- and temp tables - ) - - select /* pgwatch_generated */ - epoch_ns, - tag_schema, - tag_table_name, - tag_table_full_name, - 0 as is_part_root, - table_size_b, - tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - total_relation_size_b, - toast_size_b, - seconds_since_last_vacuum, - seconds_since_last_analyze, - no_autovacuum, - seq_scan, - seq_tup_read, - idx_scan, - idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count, - tx_freeze_age - from q_tstats - where not tag_schema like E'\\_timescaledb%' - and not exists (select * from q_root_part where oid = q_tstats.relid) - - union all - - select * from ( - select - epoch_ns, - quote_ident(qr.root_schema) as tag_schema, - quote_ident(qr.root_relname) as tag_table_name, - quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, - 1 as is_part_root, - sum(table_size_b)::int8 table_size_b, - abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), - 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - sum(total_relation_size_b)::int8 total_relation_size_b, - sum(toast_size_b)::int8 toast_size_b, - min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, - min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, - sum(no_autovacuum)::int8 no_autovacuum, - sum(seq_scan)::int8 seq_scan, - sum(seq_tup_read)::int8 seq_tup_read, - sum(idx_scan)::int8 idx_scan, - sum(idx_tup_fetch)::int8 idx_tup_fetch, - sum(n_tup_ins)::int8 n_tup_ins, - sum(n_tup_upd)::int8 n_tup_upd, - sum(n_tup_del)::int8 n_tup_del, - sum(n_tup_hot_upd)::int8 n_tup_hot_upd, - sum(n_live_tup)::int8 n_live_tup, - sum(n_dead_tup)::int8 n_dead_tup, - sum(vacuum_count)::int8 vacuum_count, - sum(autovacuum_count)::int8 autovacuum_count, - sum(analyze_count)::int8 analyze_count, - sum(autoanalyze_count)::int8 autoanalyze_count, - max(tx_freeze_age)::int8 tx_freeze_age - from - q_tstats ts - join q_parts qp on qp.relid = ts.relid - join q_root_part qr on qr.oid = qp.root - group by - 1, 2, 3, 4 - ) x - order by table_size_b desc nulls last limit 300 - 16: |- - with recursive /* pgwatch_generated */ - q_root_part as ( - select c.oid, - c.relkind, - n.nspname root_schema, - c.relname root_relname - from pg_class c - join pg_namespace n on n.oid = c.relnamespace - where relkind in ('p', 'r') - and relpersistence != 't' - and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) - and not exists(select * from pg_inherits where inhrelid = c.oid) - and exists(select * from pg_inherits where inhparent = c.oid) - ), - q_parts (relid, relkind, level, root) as ( - select oid, relkind, 1, oid - from q_root_part - union all - select inhrelid, c.relkind, level + 1, q.root - from pg_inherits i - join q_parts q on inhparent = q.relid - join pg_class c on c.oid = i.inhrelid - ), - q_tstats as ( - select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - relid, -- not sent to final output - quote_ident(schemaname) as tag_schema, - quote_ident(ut.relname) as tag_table_name, - quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, - pg_table_size(relid) as table_size_b, - abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - pg_total_relation_size(relid) as total_relation_size_b, - case when c.reltoastrelid != 0 then pg_total_relation_size(c.reltoastrelid) else 0::int8 end as toast_size_b, - (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, - (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, - case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, - seq_scan, - seq_tup_read, - coalesce(idx_scan, 0) as idx_scan, - coalesce(idx_tup_fetch, 0) as idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count, - case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, - extract(epoch from now() - last_seq_scan)::int8 as last_seq_scan_s - from pg_stat_user_tables ut - join pg_class c on c.oid = ut.relid - left join pg_class t on t.oid = c.reltoastrelid - left join pg_index ti on ti.indrelid = t.oid - left join pg_class tir on tir.oid = ti.indexrelid - where - -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait - not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') - and c.relpersistence != 't' -- and temp tables - order by case when c.relkind = 'p' then 1e9::int else coalesce(c.relpages, 0) + coalesce(t.relpages, 0) + coalesce(tir.relpages, 0) end desc - limit 1500 /* NB! When changing the bottom final LIMIT also adjust this limit. Should be at least 5x bigger as approx sizes depend a lot on vacuum frequency. - The general idea is to reduce filesystem "stat"-ing on tables that won't make it to final output anyways based on approximate size */ - ) - - select /* pgwatch_generated */ - epoch_ns, - tag_schema, - tag_table_name, - tag_table_full_name, - 0 as is_part_root, - table_size_b, - tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - total_relation_size_b, - toast_size_b, - seconds_since_last_vacuum, - seconds_since_last_analyze, - no_autovacuum, - seq_scan, - seq_tup_read, - idx_scan, - idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count, - tx_freeze_age, - last_seq_scan_s - from q_tstats - where not tag_schema like E'\\_timescaledb%' - and not exists (select * from q_root_part where oid = q_tstats.relid) - - union all - - select * from ( - select - epoch_ns, - quote_ident(qr.root_schema) as tag_schema, - quote_ident(qr.root_relname) as tag_table_name, - quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, - 1 as is_part_root, - sum(table_size_b)::int8 table_size_b, - abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), - 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - sum(total_relation_size_b)::int8 total_relation_size_b, - sum(toast_size_b)::int8 toast_size_b, - min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, - min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, - sum(no_autovacuum)::int8 no_autovacuum, - sum(seq_scan)::int8 seq_scan, - sum(seq_tup_read)::int8 seq_tup_read, - sum(idx_scan)::int8 idx_scan, - sum(idx_tup_fetch)::int8 idx_tup_fetch, - sum(n_tup_ins)::int8 n_tup_ins, - sum(n_tup_upd)::int8 n_tup_upd, - sum(n_tup_del)::int8 n_tup_del, - sum(n_tup_hot_upd)::int8 n_tup_hot_upd, - sum(n_live_tup)::int8 n_live_tup, - sum(n_dead_tup)::int8 n_dead_tup, - sum(vacuum_count)::int8 vacuum_count, - sum(autovacuum_count)::int8 autovacuum_count, - sum(analyze_count)::int8 analyze_count, - sum(autoanalyze_count)::int8 autoanalyze_count, - max(tx_freeze_age)::int8 tx_freeze_age, - min(last_seq_scan_s)::int8 last_seq_scan_s - from - q_tstats ts - join q_parts qp on qp.relid = ts.relid - join q_root_part qr on qr.oid = qp.root - group by - 1, 2, 3, 4 - ) x - order by table_size_b desc nulls last limit 300 - gauges: - - table_size_b - - total_relation_size_b - - toast_size_b - - seconds_since_last_vacuum - - seconds_since_last_analyze - - n_live_tup - - n_dead_tup - statement_timeout_seconds: 300 - table_stats_approx: - description: > - This metric collects approximate statistics about user tables, including size, vacuum status, and transaction freeze age. - It provides insights into the health and performance of tables in the database. - sqls: - 11: |- - with recursive /* pgwatch_generated */ - q_root_part as ( - select c.oid, + description: > + This metric collects statistics about user tables, including size, vacuum status, and transaction freeze age. + It provides insights into the health and performance of tables in the database. + sqls: + 11: |- + with recursive + q_root_part as ( + select c.oid, c.relkind, n.nspname root_schema, c.relname root_relname - from pg_class c + from pg_class c join pg_namespace n on n.oid = c.relnamespace - where relkind in ('p', 'r') - and relpersistence != 't' - and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) - and not exists(select * from pg_inherits where inhrelid = c.oid) - and exists(select * from pg_inherits where inhparent = c.oid) - ), - q_parts (relid, relkind, level, root) as ( - select oid, relkind, 1, oid - from q_root_part - union all - select inhrelid, c.relkind, level + 1, q.root - from pg_inherits i + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i join q_parts q on inhparent = q.relid join pg_class c on c.oid = i.inhrelid - ), - q_tstats as ( - with q_tbls_by_total_associated_relpages_approx as ( - select * from ( - select - c.oid, - c.relname, - c.relpages, - coalesce((select sum(relpages) from pg_class ci join pg_index i on i.indexrelid = ci.oid where i.indrelid = c.oid), 0) as index_relpages, - coalesce((select coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0) from pg_class ct left join pg_index ti on ti.indrelid = ct.oid left join pg_class cti on cti.oid = ti.indexrelid where ct.oid = c.reltoastrelid), 0) as toast_relpages, - case when 'autovacuum_enabled=off' = ANY(c.reloptions) then 1 else 0 end as no_autovacuum, - case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, - c.relpersistence - from - pg_class c - join pg_namespace n on n.oid = c.relnamespace - where - not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) - and c.relkind = 'r' - and c.relpersistence != 't' - ) x - order by relpages + index_relpages + toast_relpages desc limit 300 - ), q_block_size as ( - select current_setting('block_size')::int8 as bs - ) - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - relid, - quote_ident(schemaname)||'.'||quote_ident(ut.relname) as tag_table_full_name, - bs * relpages as table_size_b, - abs(greatest(ceil(log((bs*relpages+1) / 10^6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - bs * (relpages + index_relpages + toast_relpages) as total_relation_size_b, - bs * toast_relpages as toast_size_b, - (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, - (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, - no_autovacuum, - seq_scan, - seq_tup_read, - coalesce(idx_scan, 0) as idx_scan, - coalesce(idx_tup_fetch, 0) as idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count, - tx_freeze_age, - relpersistence - from - pg_stat_user_tables ut - join q_tbls_by_total_associated_relpages_approx t on t.oid = ut.relid - join q_block_size on true + ), + q_tstats as ( + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, -- not sent to final output + quote_ident(schemaname) as tag_schema, + quote_ident(ut.relname) as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, + pg_table_size(relid) as table_size_b, + abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + pg_total_relation_size(relid) as total_relation_size_b, + case when reltoastrelid != 0 then pg_total_relation_size(reltoastrelid) else 0::int8 end as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age + from pg_stat_user_tables ut + join + pg_class c on c.oid = ut.relid + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists(select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + and c.relpersistence != 't' -- and temp tables + ) + + select /* pgwatch_generated */ + epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2, 3, 4 + ) x + order by table_size_b desc nulls last limit 300 + 16: |- + with recursive /* pgwatch_generated */ + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, -- not sent to final output + quote_ident(schemaname) as tag_schema, + quote_ident(ut.relname) as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, + pg_table_size(relid) as table_size_b, + abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + pg_total_relation_size(relid) as total_relation_size_b, + case when c.reltoastrelid != 0 then pg_total_relation_size(c.reltoastrelid) else 0::int8 end as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, + extract(epoch from now() - last_seq_scan)::int8 as last_seq_scan_s + from pg_stat_user_tables ut + join pg_class c on c.oid = ut.relid + left join pg_class t on t.oid = c.reltoastrelid + left join pg_index ti on ti.indrelid = t.oid + left join pg_class tir on tir.oid = ti.indexrelid where -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') - order by relpages desc - ) - select /* pgwatch_generated */ + and c.relpersistence != 't' -- and temp tables + order by case when c.relkind = 'p' then 1e9::int else coalesce(c.relpages, 0) + coalesce(t.relpages, 0) + coalesce(tir.relpages, 0) end desc + limit 1500 /* NB! When changing the bottom final LIMIT also adjust this limit. Should be at least 5x bigger as approx sizes depend a lot on vacuum frequency. + The general idea is to reduce filesystem "stat"-ing on tables that won't make it to final output anyways based on approximate size */ + ) + + select /* pgwatch_generated */ + epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age, + last_seq_scan_s + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * from ( + select epoch_ns, - tag_table_full_name, - 0 as is_part_root, - table_size_b, - tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - total_relation_size_b, - toast_size_b, - seconds_since_last_vacuum, - seconds_since_last_analyze, - no_autovacuum, - seq_scan, - seq_tup_read, - idx_scan, - idx_tup_fetch, - n_tup_ins, - n_tup_upd, - n_tup_del, - n_tup_hot_upd, - n_live_tup, - n_dead_tup, - vacuum_count, - autovacuum_count, - analyze_count, - autoanalyze_count, - tx_freeze_age - from q_tstats - where not exists (select * from q_root_part where oid = q_tstats.relid) - union all - select * from ( - select - epoch_ns, - quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, - 1 as is_part_root, - sum(table_size_b)::int8 table_size_b, - abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), - 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. - sum(total_relation_size_b)::int8 total_relation_size_b, - sum(toast_size_b)::int8 toast_size_b, - min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, - min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, - sum(no_autovacuum)::int8 no_autovacuum, - sum(seq_scan)::int8 seq_scan, - sum(seq_tup_read)::int8 seq_tup_read, - sum(idx_scan)::int8 idx_scan, - sum(idx_tup_fetch)::int8 idx_tup_fetch, - sum(n_tup_ins)::int8 n_tup_ins, - sum(n_tup_upd)::int8 n_tup_upd, - sum(n_tup_del)::int8 n_tup_del, - sum(n_tup_hot_upd)::int8 n_tup_hot_upd, - sum(n_live_tup)::int8 n_live_tup, - sum(n_dead_tup)::int8 n_dead_tup, - sum(vacuum_count)::int8 vacuum_count, - sum(autovacuum_count)::int8 autovacuum_count, - sum(analyze_count)::int8 analyze_count, - sum(autoanalyze_count)::int8 autoanalyze_count, - max(tx_freeze_age)::int8 tx_freeze_age - from - q_tstats ts - join q_parts qp on qp.relid = ts.relid - join q_root_part qr on qr.oid = qp.root - group by - 1, 2 - ) x; - - gauges: - - table_size_b - - total_relation_size_b - - toast_size_b - - seconds_since_last_vacuum - - seconds_since_last_analyze - - n_live_tup - - n_dead_tup - metric_storage_name: table_stats - unused_indexes: - description: > - This metric collects information about unused indexes in the database. - It helps identify indexes that are not being used and can potentially be dropped to improve performance. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - * - from ( - select - format('%I.%I', sui.schemaname, sui.indexrelname) as tag_index_full_name, - sui.idx_scan, - coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, - system_identifier::text as tag_sys_id /* to easily check also all replicas as could be still used there */ - from - pg_stat_user_indexes sui - join pg_index i on i.indexrelid = sui.indexrelid - join pg_control_system() on true - where not sui.schemaname like E'pg\\_temp%' - and idx_scan = 0 - and not (indisprimary or indisunique or indisexclusion) - and not exists (select * from pg_locks where relation = sui.relid and mode = 'AccessExclusiveLock') - ) x - where index_size_b > 100*1024^2 /* list >100MB only */ - order by index_size_b desc - limit 25 - vmstat: - description: > - This metric collects system-level statistics using the `vmstat` command. - It provides insights into memory usage, CPU load, and other system metrics. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - r, b, swpd, free, buff, cache, si, so, bi, bo, "in", cs, us, sy, id, wa, st, cpu_count, load_1m, load_5m, load_15m, total_memory + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age, + min(last_seq_scan_s)::int8 last_seq_scan_s from - get_vmstat() - init_sql: |- - CREATE EXTENSION IF NOT EXISTS plpython3u; - - CREATE OR REPLACE FUNCTION get_vmstat( - IN delay int default 1, - OUT r int, OUT b int, OUT swpd int8, OUT free int8, OUT buff int8, OUT cache int8, OUT si int8, OUT so int8, OUT bi int8, - OUT bo int8, OUT "in" int, OUT cs int, OUT us int, OUT sy int, OUT id int, OUT wa int, OUT st int, - OUT cpu_count int, OUT load_1m float4, OUT load_5m float4, OUT load_15m float4, OUT total_memory int8 + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2, 3, 4 + ) x + order by table_size_b desc nulls last limit 300 + gauges: + - table_size_b + - total_relation_size_b + - toast_size_b + - seconds_since_last_vacuum + - seconds_since_last_analyze + - n_live_tup + - n_dead_tup + statement_timeout_seconds: 300 + table_stats_approx: + description: > + This metric collects approximate statistics about user tables, including size, vacuum status, and transaction freeze age. + It provides insights into the health and performance of tables in the database. + sqls: + 11: |- + with recursive /* pgwatch_generated */ + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + with q_tbls_by_total_associated_relpages_approx as ( + select * from ( + select + c.oid, + c.relname, + c.relpages, + coalesce((select sum(relpages) from pg_class ci join pg_index i on i.indexrelid = ci.oid where i.indrelid = c.oid), 0) as index_relpages, + coalesce((select coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0) from pg_class ct left join pg_index ti on ti.indrelid = ct.oid left join pg_class cti on cti.oid = ti.indexrelid where ct.oid = c.reltoastrelid), 0) as toast_relpages, + case when 'autovacuum_enabled=off' = ANY(c.reloptions) then 1 else 0 end as no_autovacuum, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, + c.relpersistence + from + pg_class c + join pg_namespace n on n.oid = c.relnamespace + where + not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and c.relkind = 'r' + and c.relpersistence != 't' + ) x + order by relpages + index_relpages + toast_relpages desc limit 300 + ), q_block_size as ( + select current_setting('block_size')::int8 as bs ) - LANGUAGE plpython3u - AS $FUNCTION$ - from os import cpu_count, popen - unit = 1024 # 'vmstat' default block byte size - - cpu_count = cpu_count() - vmstat_lines = popen('vmstat {} 2'.format(delay)).readlines() - vm = [int(x) for x in vmstat_lines[-1].split()] - # plpy.notice(vm) - load_1m, load_5m, load_15m = None, None, None - with open('/proc/loadavg', 'r') as f: - la_line = f.readline() - if la_line: - splits = la_line.split() - if len(splits) == 5: - load_1m, load_5m, load_15m = splits[0], splits[1], splits[2] - - total_memory = None - with open('/proc/meminfo', 'r') as f: - mi_line = f.readline() - splits = mi_line.split() - # plpy.notice(splits) - if len(splits) == 3: - total_memory = int(splits[1]) * 1024 - - return vm[0], vm[1], vm[2] * unit, vm[3] * unit, vm[4] * unit, vm[5] * unit, vm[6] * unit, vm[7] * unit, vm[8] * unit, \ - vm[9] * unit, vm[10], vm[11], vm[12], vm[13], vm[14], vm[15], vm[16], cpu_count, load_1m, load_5m, load_15m, total_memory - $FUNCTION$; - - GRANT EXECUTE ON FUNCTION get_vmstat(int) TO pgwatch; - COMMENT ON FUNCTION get_vmstat(int) IS 'created for pgwatch'; + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, + quote_ident(schemaname)||'.'||quote_ident(ut.relname) as tag_table_full_name, + bs * relpages as table_size_b, + abs(greatest(ceil(log((bs*relpages+1) / 10^6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + bs * (relpages + index_relpages + toast_relpages) as total_relation_size_b, + bs * toast_relpages as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age, + relpersistence + from + pg_stat_user_tables ut + join q_tbls_by_total_associated_relpages_approx t on t.oid = ut.relid + join q_block_size on true + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + order by relpages desc + ) + select /* pgwatch_generated */ + epoch_ns, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age + from q_tstats + where not exists (select * from q_root_part where oid = q_tstats.relid) + union all + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2 + ) x; + + gauges: + - table_size_b + - total_relation_size_b + - toast_size_b + - seconds_since_last_vacuum + - seconds_since_last_analyze + - n_live_tup + - n_dead_tup + metric_storage_name: table_stats + unused_indexes: + description: > + This metric collects information about unused indexes in the database. + It helps identify indexes that are not being used and can potentially be dropped to improve performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + * + from ( + select + format('%I.%I', sui.schemaname, sui.indexrelname) as tag_index_full_name, + sui.idx_scan, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + system_identifier::text as tag_sys_id /* to easily check also all replicas as could be still used there */ + from + pg_stat_user_indexes sui + join pg_index i on i.indexrelid = sui.indexrelid + join pg_control_system() on true + where not sui.schemaname like E'pg\\_temp%' + and idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + and not exists (select * from pg_locks where relation = sui.relid and mode = 'AccessExclusiveLock') + ) x + where index_size_b > 100*1024^2 /* list >100MB only */ + order by index_size_b desc + limit 25 + vmstat: + description: > + This metric collects system-level statistics using the `vmstat` command. + It provides insights into memory usage, CPU load, and other system metrics. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + r, b, swpd, free, buff, cache, si, so, bi, bo, "in", cs, us, sy, id, wa, st, cpu_count, load_1m, load_5m, load_15m, total_memory + from + get_vmstat() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_vmstat( + IN delay int default 1, + OUT r int, OUT b int, OUT swpd int8, OUT free int8, OUT buff int8, OUT cache int8, OUT si int8, OUT so int8, OUT bi int8, + OUT bo int8, OUT "in" int, OUT cs int, OUT us int, OUT sy int, OUT id int, OUT wa int, OUT st int, + OUT cpu_count int, OUT load_1m float4, OUT load_5m float4, OUT load_15m float4, OUT total_memory int8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from os import cpu_count, popen + unit = 1024 # 'vmstat' default block byte size + + cpu_count = cpu_count() + vmstat_lines = popen('vmstat {} 2'.format(delay)).readlines() + vm = [int(x) for x in vmstat_lines[-1].split()] + # plpy.notice(vm) + load_1m, load_5m, load_15m = None, None, None + with open('/proc/loadavg', 'r') as f: + la_line = f.readline() + if la_line: + splits = la_line.split() + if len(splits) == 5: + load_1m, load_5m, load_15m = splits[0], splits[1], splits[2] + + total_memory = None + with open('/proc/meminfo', 'r') as f: + mi_line = f.readline() + splits = mi_line.split() + # plpy.notice(splits) + if len(splits) == 3: + total_memory = int(splits[1]) * 1024 + + return vm[0], vm[1], vm[2] * unit, vm[3] * unit, vm[4] * unit, vm[5] * unit, vm[6] * unit, vm[7] * unit, vm[8] * unit, \ + vm[9] * unit, vm[10], vm[11], vm[12], vm[13], vm[14], vm[15], vm[16], cpu_count, load_1m, load_5m, load_15m, total_memory + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_vmstat(int) TO pgwatch; + COMMENT ON FUNCTION get_vmstat(int) IS 'created for pgwatch'; wal: - description: > - This metric collects information about the Write-Ahead Logging (WAL) system in PostgreSQL. - It provides insights into WAL activity, including the current WAL location, replay lag, and other related metrics. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - case - when pg_is_in_recovery() = false then - pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 - else - pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 - end as xlog_location_b, - case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, - extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - system_identifier::text as tag_sys_id, - case - when pg_is_in_recovery() = false then - ('x'||substr(pg_walfile_name(pg_current_wal_lsn()), 1, 8))::bit(32)::int - else - (select min_recovery_end_timeline::int from pg_control_recovery()) - end as timeline - from pg_control_system() - gauges: - - '*' - is_instance_level: true + description: > + This metric collects information about the Write-Ahead Logging (WAL) system in PostgreSQL. + It provides insights into WAL activity, including the current WAL location, replay lag, and other related metrics. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case + when pg_is_in_recovery() = false then + pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 + else + pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 + end as xlog_location_b, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + system_identifier::text as tag_sys_id, + case + when pg_is_in_recovery() = false then + ('x'||substr(pg_walfile_name(pg_current_wal_lsn()), 1, 8))::bit(32)::int + else + (select min_recovery_end_timeline::int from pg_control_recovery()) + end as timeline + from pg_control_system() + gauges: + - '*' + is_instance_level: true wal_receiver: - description: > - This metric collects information about the WAL receiver process in PostgreSQL. - It provides insights into the status of the WAL receiver, including replay lag and last replay timestamp. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())::int8 as replay_lag_b, - extract(epoch from (now() - pg_last_xact_replay_timestamp()))::int8 as last_replay_s - node_status: standby - gauges: - - '*' - is_instance_level: true + description: > + This metric collects information about the WAL receiver process in PostgreSQL. + It provides insights into the status of the WAL receiver, including replay lag and last replay timestamp. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())::int8 as replay_lag_b, + extract(epoch from (now() - pg_last_xact_replay_timestamp()))::int8 as last_replay_s + node_status: standby + gauges: + - '*' + is_instance_level: true wal_size: - description: > - This metric collects the size of the Write-Ahead Log (WAL) directory in PostgreSQL. - It provides insights into the total size of WAL files currently stored in the database. - sqls: - 11: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - sum(size)::int8 as wal_size_b - from pg_ls_waldir() - gauges: - - '*' - is_instance_level: true + description: > + This metric collects the size of the Write-Ahead Log (WAL) directory in PostgreSQL. + It provides insights into the total size of WAL files currently stored in the database. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + sum(size)::int8 as wal_size_b + from pg_ls_waldir() + gauges: + - '*' + is_instance_level: true wal_stats: - description: > - This metric collects statistics about the Write-Ahead Logging (WAL) system in PostgreSQL. - It provides insights into WAL activity, including the number of records, full page images, and write/sync times. - sqls: - 14: |- - select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - wal_records, - wal_fpi, - (wal_bytes / 1024)::int8 as wal_bytes_kb, - wal_buffers_full, - wal_write, - wal_sync, - wal_write_time::int8, - wal_sync_time::int8 - from - pg_stat_wal + description: > + This metric collects statistics about the Write-Ahead Logging (WAL) system in PostgreSQL. + It provides insights into WAL activity, including the number of records, full page images, and write/sync times. + sqls: + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + wal_records, + wal_fpi, + (wal_bytes / 1024)::int8 as wal_bytes_kb, + wal_buffers_full, + wal_write, + wal_sync, + wal_write_time::int8, + wal_sync_time::int8 + from + pg_stat_wal + gauges: + - '*' + is_instance_level: true wait_events: - query: | - SELECT datname datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as wait_event_type, count(*) as total - FROM <% if @role == "db" -%>pg_stat_activity_all<% else -%>pg_stat_activity<% end -%> - WHERE state = 'active' AND datname = current_database() - GROUP BY datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') - UNION - SELECT 'server_process', coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as wait_event_type, count(*) as total - FROM <% if @role == "db" -%>pg_stat_activity_all<% else -%>pg_stat_activity<% end -%> - WHERE state = 'active' AND datname IS NULL and current_database() = (select datname from pg_database where NOT datname = ANY(ARRAY['postgres', 'template0','template1','repmgr','history']) ORDER by datname limit 1) - GROUP BY datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') - metrics: - - datname: - usage: "LABEL" - description: "Database name" - - wait_event_type: - usage: "LABEL" - description: "Wait event type" - - total: - usage: "GAUGE" - description: "Total number of processes with specific wait event type" + description: > + This metric retrieves information about wait events for active sessions in the PostgreSQL database. + It tracks the wait event types and counts of processes currently experiencing each wait event type, + providing insights into potential bottlenecks and resource contention issues. + sqls: + 11: |- + SELECT datname as tag_datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total + FROM pg_stat_activity + WHERE state = 'active' + GROUP BY datname, tag_wait_event_type + UNION + SELECT 'server_process' as tag_datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total + FROM pg_stat_activity + WHERE state = 'active' AND datname IS NULL + GROUP BY datname, tag_wait_event_type + gauges: + - total + is_instance_level: true + long_running_transactions: + description: > + This metric retrieves information about long-running transactions in the PostgreSQL database. + It counts the number of transactions that have been running for more than one minute and provides + the age of the oldest transaction, helping administrators identify potential blocking transactions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as transactions, + max(extract(epoch from (clock_timestamp() - xact_start)))::int8 as age_in_seconds + from pg_catalog.pg_stat_activity + where state is distinct from 'idle' + and (now() - xact_start) > '1 minutes'::interval + and query not like 'autovacuum:%' + gauges: + - transactions + - age_in_seconds + is_instance_level: true + database_wraparound: + description: > + This metric retrieves information about transaction ID wraparound in PostgreSQL databases. + It tracks the age of the oldest unfrozen transaction ID and multi-transaction ID for each database, + helping administrators monitor vacuum freeze operations and prevent transaction ID wraparound issues. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + datname::text as tag_datname, + age(d.datfrozenxid)::int8 as age_datfrozenxid, + mxid_age(d.datminmxid)::int8 as age_datminmxid + from pg_catalog.pg_database d + where d.datallowconn + gauges: + - age_datfrozenxid + - age_datminmxid + pg_stat_activity: + sqls: + 11: |- + SELECT /* pgwatch_generated */ + pg_database.datname AS tag_dbname, + tmp2.tag_application_name, + tmp.tag_state, + COALESCE(count,0) as count, + COALESCE(max_tx_duration,0) as max_tx_duration + FROM + ( + VALUES ('active'), + ('idle'), + ('idle in transaction'), + ('idle in transaction (aborted)'), + ('fastpath function call'), + ('disabled') + ) AS tmp(tag_state) + CROSS JOIN pg_database + LEFT JOIN + ( + SELECT datname, + application_name as tag_application_name, + state as tag_state, + count(*) AS count, + MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration + FROM pg_stat_activity GROUP BY datname, tag_application_name, tag_state + ) AS tmp2 + ON tmp.tag_state = tmp2.tag_state AND pg_database.datname = tmp2.datname + gauges: + - count + - max_tx_duration + pg_archiver: + sqls: + 11: | + WITH + current_wal_file AS ( + SELECT CASE WHEN NOT pg_is_in_recovery() THEN pg_walfile_name(pg_current_wal_insert_lsn()) ELSE NULL END pg_walfile_name + ), + current_wal AS ( + SELECT + ('x'||substring(pg_walfile_name,9,8))::bit(32)::int log, + ('x'||substring(pg_walfile_name,17,8))::bit(32)::int seg, + pg_walfile_name + FROM current_wal_file + ), + archive_wal AS( + SELECT + ('x'||substring(last_archived_wal,9,8))::bit(32)::int log, + ('x'||substring(last_archived_wal,17,8))::bit(32)::int seg, + last_archived_wal + FROM pg_stat_archiver + ) + SELECT coalesce(((cw.log - aw.log) * 256) + (cw.seg-aw.seg), -1) as pending_wal_count FROM current_wal cw, archive_wal aw + gauges: + - pending_wal_count + pg_blocked: + sqls: + 11: | + SELECT + count(blocked.transactionid) AS queries, + '__transaction__' AS tag_table + FROM pg_catalog.pg_locks blocked + WHERE NOT blocked.granted AND locktype = 'transactionid' + GROUP BY locktype + UNION + SELECT + count(blocked.relation) AS queries, + blocked.relation::regclass::text AS tag_table + FROM pg_catalog.pg_locks blocked + WHERE NOT blocked.granted AND locktype != 'transactionid' + GROUP BY relation + gauges: + - queries + pg_database_wraparound: + sqls: + 11: | + SELECT + datname as tag_datname, + age(d.datfrozenxid) as age_datfrozenxid, + mxid_age(d.datminmxid) as age_datminmxid + FROM + pg_catalog.pg_database d + WHERE + d.datallowconn + gauges: + - age_datfrozenxid + - age_datminmxid + pg_gin_index: + sqls: + 11: | + SELECT + index_name AS tag_index_name, + pending_list_bytes AS pending_list_bytes + FROM + postgres_gin_pending_list_size() + gauges: + - pending_list_bytes + pg_indexes_with_size: + sqls: + 11: SELECT schemaname as tag_schemaname, tablename as tag_relname, indexname as tag_indexrelname, pg_class.relpages + * 8192::bigint as index_size from pg_indexes inner join pg_namespace on pg_indexes.schemaname + = pg_namespace.nspname inner join pg_class on pg_class.relnamespace = pg_namespace.oid + and pg_class.relname = pg_indexes.indexname where pg_indexes.schemaname != 'pg_catalog' and pg_indexes.schemaname != 'pg_toast' + gauges: + - index_size + pg_integer_capacity: + sqls: + 11: | + SELECT 'sent_notifications' as tag_table_name, 'id' as tag_column_name, max(id) as current, (select (2^(numeric_precision-1)-1) as maximum FROM information_schema.columns WHERE (table_name, column_name) = ('sent_notifications', 'id')) FROM sent_notifications + UNION ALL + SELECT 'notes', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('notes', 'id')) FROM notes + UNION ALL + SELECT 'system_note_metadata', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('system_note_metadata', 'id')) FROM system_note_metadata + UNION ALL + SELECT 'merge_request_diffs', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('merge_request_diffs', 'id')) FROM merge_request_diffs + UNION ALL + SELECT 'merge_request_metrics', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('merge_request_metrics', 'id')) FROM merge_request_metrics + UNION ALL + SELECT 'deployments', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('deployments', 'id')) FROM deployments + gauges: + - current + - maximum + pg_long_running_transactions: + sqls: + 11: | + SELECT COUNT(*) as transactions, + COALESCE(MAX(EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)))::int8, 0) AS age_in_seconds + FROM pg_catalog.pg_stat_activity + WHERE state is distinct from 'idle' AND (now() - xact_start) > '1 minutes'::interval AND query not like 'autovacuum:%' + gauges: + - '*' + pg_oldest_blocked: + sqls: + 11: | + SELECT coalesce(extract('epoch' from max(clock_timestamp() - state_change)), 0) age_seconds + FROM pg_catalog.pg_stat_activity + WHERE wait_event_type = 'Lock' + AND state='active' + gauges: + - age_seconds + pg_postmaster: + sqls: + 11: SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time() + gauges: + - start_time_seconds + pg_replication: + sqls: + 11: SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH + FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag, CASE WHEN pg_is_in_recovery() + THEN 1 ELSE 0 END as is_replica + gauges: + - lag + - is_replica + pg_replication_slots: + sqls: + 11: | + SELECT slot_name as tag_slot_name, slot_type as tag_slot_type, + case when active then 1.0 else 0.0 end AS active, + age(xmin) AS xmin_age, + age(catalog_xmin) AS catalog_xmin_age, + CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS restart_lsn_bytes, + CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - confirmed_flush_lsn AS confirmed_flush_lsn_bytes + FROM pg_replication_slots + gauges: + - active + - xmin_age + - catalog_xmin_age + - restart_lsn_bytes + - confirmed_flush_lsn_bytes + pg_slow: + sqls: + 11: | + SELECT COUNT(*) AS queries + FROM pg_catalog.pg_stat_activity + WHERE state = 'active' AND (now() - query_start) > '1 seconds'::interval + gauges: + - queries + pg_stat_activity_autovacuum: + sqls: + 11: | + SELECT + SPLIT_PART(query, '.', 2) AS tag_relname, + EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) AS tag_age_in_seconds + FROM + pg_catalog.pg_stat_activity + WHERE + query like 'autovacuum:%' + AND + EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) > 300 + gauges: + - age_in_seconds + pg_stat_activity_autovacuum_active: + sqls: + 11: | + SELECT v.phase as tag_phase, + CASE + when a.query ~ '^autovacuum.*to prevent wraparound' then 'wraparound' + when a.query ~* '^vacuum' then 'user' + when a.pid is null then null + ELSE 'regular' + END as tag_mode, + count(1) as workers_count + FROM pg_stat_progress_vacuum v + LEFT JOIN pg_catalog.pg_stat_activity a using (pid) + GROUP BY 1,2 + gauges: + - workers_count + pg_stat_kcache: + sqls: + 11: "WITH ranked_kcache AS (\n SELECT pg_get_userbyid(pg_stat_kcache_raw.userid)\ + \ AS tag_user,\n pg_database.datname,\n pg_stat_kcache_raw.queryid,\n \ + \ pg_stat_kcache_raw.exec_user_time,\n pg_stat_kcache_raw.exec_system_time,\n\ + \ pg_stat_kcache_raw.exec_user_time + pg_stat_kcache_raw.exec_system_time\ + \ AS tag_exec_total_time,\n row_number() over (order by pg_stat_kcache_raw.exec_user_time\ + \ + pg_stat_kcache_raw.exec_system_time desc) as rn\n FROM public.pg_stat_kcache()\ + \ pg_stat_kcache_raw\n INNER JOIN pg_database ON pg_database.oid = pg_stat_kcache_raw.dbid\n\ + ) \nSELECT\n ranked_kcache.tag_user,\n datname,\n queryid,\n exec_user_time,\n\ + \ exec_system_time,\n exec_total_time\nFROM ranked_kcache\nWHERE rn <= 500\n\ + UNION ALL\nSELECT\n 'tail_dummy_user' as tag_user,\n current_database() as datname,\n\ + \ -1 as queryid,\n sum(exec_user_time) as exec_user_time,\n sum(exec_system_time)\ + \ as exec_system_time,\n sum(exec_total_time) as exec_total_time\nFROM ranked_kcache\n\ + WHERE rn > 500\n" + gauges: + - exec_user_time + - exec_system_time + - exec_total_time + pg_stat_replication: + sqls: + 11: "SELECT application_name as tag_application_name, \n (pg_current_wal_lsn() - '0/0') % (2^52)::bigint\ + \ as current_wal_lsn, \n (sent_lsn - '0/0') % (2^52)::bigint as sent_lsn, \n\ + \ (write_lsn - '0/0') % (2^52)::bigint as write_lsn, \n (flush_lsn - '0/0')\ + \ % (2^52)::bigint as flush_lsn, \n (replay_lsn - '0/0') % (2^52)::bigint as\ + \ replay_lsn,\n EXTRACT(SECONDS FROM (now() - reply_time)) reply_time_lag \n\ + \ FROM pg_stat_replication\n" + gauges: + - current_wal_lsn + - sent_lsn + - write_lsn + - flush_lsn + - replay_lsn + - reply_time_lag + pg_stat_ssl: + sqls: + 11: | + SELECT pid as tag_pid, bits as bits, + CASE WHEN ssl THEN 1.0 ELSE 0.0 END AS active + FROM pg_stat_ssl + gauges: + - active + - bits + pg_stat_statements: + sqls: + 11: | + WITH ranked_statements AS ( + SELECT + pg_get_userbyid(userid) as user, + pg_database.datname, + pg_stat_statements.queryid , + pg_stat_statements.plans as plans_total, + pg_stat_statements.calls, + pg_stat_statements.total_exec_time as exec_time_total, + pg_stat_statements.total_plan_time as plan_time_total, + pg_stat_statements.rows, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_hit) as shared_bytes_hit_total, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_read) as shared_bytes_read_total, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_dirtied) as shared_bytes_dirtied_total, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_written) as shared_bytes_written_total, + pg_stat_statements.blk_read_time as block_read_total, + pg_stat_statements.blk_write_time as block_write_total, + pg_stat_statements.wal_records, + pg_stat_statements.wal_fpi, + pg_stat_statements.wal_bytes, + (current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, + (current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written, + row_number() over (order by total_exec_time desc) as rn + FROM pg_stat_statements + JOIN pg_database + ON pg_database.oid = pg_stat_statements.dbid + ) + SELECT + ranked_statements.user as tag_user, + datname as tag_datname, + queryid as tag_queryid, + calls::int8 as calls, + plans_total::int8 as plans_total, + exec_time_total::int8 as exec_time_total, + plan_time_total::int8 as plan_time_total, + rows::int8 as rows, + shared_bytes_hit_total::int8 as shared_bytes_hit_total, + shared_bytes_read_total::int8 as shared_bytes_read_total, + shared_bytes_dirtied_total::int8 as shared_bytes_dirtied_total, + shared_bytes_written_total::int8 as shared_bytes_written_total, + block_read_total::int8 as block_read_total, + block_write_total::int8 as block_write_total, + wal_records::int8 as wal_records, + wal_fpi::int8 as wal_fpi, + wal_bytes::int8 as wal_bytes, + temp_bytes_read::int8 as temp_bytes_read, + temp_bytes_written::int8 as temp_bytes_written + FROM ranked_statements + WHERE rn <= 500 + UNION ALL + SELECT + 'tail_dummy_user' as tag_user, + current_database() as tag_datname, + -1 as tag_qqueryid, + sum(calls)::int8 as calls, + sum(plans_total)::int8 as plans_total, + sum(exec_time_total)::int8 as exec_time_total, + sum(plan_time_total)::int8 as plan_time_total, + sum(rows)::int8 as rows, + sum(shared_bytes_hit_total)::int8 as shared_bytes_hit_total, + sum(shared_bytes_read_total)::int8 as shared_bytes_read_total, + sum(shared_bytes_dirtied_total)::int8 as shared_bytes_dirtied_total, + sum(shared_bytes_written_total)::int8 as shared_bytes_written_total, + sum(block_read_total)::int8 as block_read_total, + sum(block_write_total)::int8 as block_write_total, + sum(wal_records)::int8 as wal_records, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + sum(temp_bytes_read)::int8 as temp_bytes_read, + sum(temp_bytes_written)::int8 as temp_bytes_written + FROM ranked_statements + WHERE rn > 500 + gauges: + - calls + - plans_total + - exec_time_total + - plan_time_total + - rows + - shared_bytes_hit_total + - shared_bytes_read_total + - shared_bytes_dirtied_total + - shared_bytes_written_total + - block_read_total + - block_write_total + - wal_records + - wal_fpi + - wal_bytes + - temp_bytes_read + - temp_bytes_written + pg_stat_user_indexes: + sqls: + 11: SELECT schemaname as tag_schemaname, relname as tag_relname, indexrelname as tag_indexrelname, idx_scan, idx_tup_read, idx_tup_fetch + FROM pg_stat_user_indexes + gauges: + - idx_scan + - idx_tup_read + - idx_tup_fetch + pg_stat_user_tables: + sqls: + 11: | + SELECT + current_database() as tag_datname, + schemaname as tag_schemaname, + relname as tag_relname, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + GREATEST(last_autovacuum, last_vacuum, '1970-01-01Z') as last_vacuum, + GREATEST(last_autoanalyze, last_analyze, '1970-01-01Z') as last_analyze, + (vacuum_count + autovacuum_count) as vacuum_count, + (analyze_count + autoanalyze_count) as analyze_count + FROM + pg_stat_user_tables + gauges: + - seq_scan + - seq_tup_read + - idx_scan + - idx_tup_fetch + - n_tup_ins + - n_tup_upd + - n_tup_del + - n_tup_hot_upd + - n_live_tup + - n_dead_tup + - last_vacuum + - last_analyze + - vacuum_count + - analyze_count + pg_stat_wal_receiver: + sqls: + 11: | + SELECT case status when 'stopped' then 0 when 'starting' then 1 when 'streaming' then 2 when 'waiting' then 3 when 'restarting' then 4 when 'stopping' then 5 else -1 end as tag_status, + (receive_start_lsn- '0/0') % (2^52)::bigint as receive_start_lsn, + receive_start_tli, + (flushed_lsn- '0/0') % (2^52)::bigint as flushed_lsn, + received_tli, + extract(epoch from last_msg_send_time) as last_msg_send_time, + extract(epoch from last_msg_receipt_time) as last_msg_receipt_time, + (latest_end_lsn - '0/0') % (2^52)::bigint as latest_end_lsn, + extract(epoch from latest_end_time) as latest_end_time, + substring(slot_name from 'repmgr_slot_([0-9]*)') as upstream_node, + trim(both '''' from substring(conninfo from 'host=([^ ]*)')) as tag_upstream_host, + slot_name + FROM pg_catalog.pg_stat_wal_receiver + gauges: + - status + - receive_start_lsn + - receive_start_tli + - flushed_lsn + - received_tli + - last_msg_send_time + - last_msg_receipt_time + - latest_end_lsn + - latest_end_time + - upstream_node + pg_statio_user_indexes: + sqls: + 11: SELECT schemaname as tag_schemaname, relname as tag_relname, indexrelname as tag_indexrelname, idx_blks_read, idx_blks_hit FROM + pg_statio_user_indexes + gauges: + - idx_blks_read + - idx_blks_hit + pg_statio_user_tables: + sqls: + 11: SELECT schemaname as tag_schemaname, relname as tag_relname, heap_blks_read, heap_blks_hit, idx_blks_read, + idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit + FROM pg_statio_user_tables + gauges: + - heap_blks_read + - heap_blks_hit + - idx_blks_read + - idx_blks_hit + - toast_blks_read + - toast_blks_hit + - tidx_blks_read + - tidx_blks_hit + pg_stuck_idle_in_transaction: + sqls: + 11: | + SELECT COUNT(*) AS queries + FROM pg_catalog.pg_stat_activity + WHERE state = 'idle in transaction' AND (now() - query_start) > '10 minutes'::interval + gauges: + - queries + pg_total_relation_size: + sqls: + 11: | + SELECT relnamespace::regnamespace as tag_schemaname, + relname as tag_relname, + pg_total_relation_size(oid) bytes + FROM pg_class + WHERE relkind = 'r'; + gauges: + - bytes + pg_txid: + sqls: + 11: | + SELECT + CASE WHEN pg_is_in_recovery() THEN 'NaN'::float ELSE txid_current() % (2^52)::bigint END AS current, + CASE WHEN pg_is_in_recovery() THEN 'NaN'::float ELSE txid_snapshot_xmin(txid_current_snapshot()) % (2^52)::bigint END AS xmin, + CASE WHEN pg_is_in_recovery() THEN 'NaN'::float ELSE txid_current() - txid_snapshot_xmin(txid_current_snapshot()) END AS xmin_age + gauges: + - current + - xmin + - xmin_age + pg_wait_sampling: + sqls: + 11: | + SELECT + coalesce(p.event_type, 'CPU*') as tag_wait_type, + coalesce(p.event, 'CPU*') as tag_wait_event, + SUM(p.count) as count, + p.queryid as tag_queryid, + pgsa.backend_type as tag_backend_type + FROM pg_wait_sampling_profile p LEFT OUTER JOIN pg_stat_statements s on p.queryid = s.queryid LEFT OUTER JOIN pg_stat_activity pgsa on pgsa.pid = p.pid GROUP BY p.queryid, p.event_type, p.event, pgsa.backend_type + gauges: + - count + pg_wait_sampling_agg: + sqls: + 11: | + SELECT + coalesce(p.event_type, 'CPU*') as tag_wait_type, + coalesce(p.event, 'CPU*') as tag_wait_event, + SUM(p.count) as count + FROM pg_wait_sampling_profile p GROUP BY p.event_type, p.event + gauges: + - count + pg_xlog_position: + sqls: + 11: | + SELECT CASE + WHEN pg_is_in_recovery() + THEN (pg_last_wal_replay_lsn() - '0/0') % (2^52)::bigint + ELSE (pg_current_wal_lsn() - '0/0') % (2^52)::bigint + END AS bytes + gauges: + - bytes + presets: aiven: - description: aiven database metrics - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_col_stats: 86400 - index_stats: 900 - locks: 60 - locks_mode: 60 - recommendations: 43200 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 60 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 300 - table_stats: 300 - wal: 60 - wal_receiver: 120 + description: aiven database metrics + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_col_stats: 86400 + index_stats: 900 + locks: 60 + locks_mode: 60 + recommendations: 43200 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 60 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 300 + table_stats: 300 + wal: 60 + wal_receiver: 120 aurora: - description: AWS Aurora doesn't expose all Postgres functions and there's no WAL - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats_aurora: 60 - index_stats: 900 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - settings: 7200 - sproc_stats: 180 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal_receiver: 120 + description: AWS Aurora doesn't expose all Postgres functions and there's no WAL + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats_aurora: 60 + index_stats: 900 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + settings: 7200 + sproc_stats: 180 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal_receiver: 120 azure: - description: similar to 'exhaustive' with stuff that's not accessible on Azure Database for PostgreSQL removed - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - kpi: 120 - locks: 60 - locks_mode: 60 - replication: 60 - replication_slots: 60 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_ssl: 60 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - wal: 60 - wal_receiver: 60 - wal_size: 300 + description: similar to 'exhaustive' with stuff that's not accessible on Azure Database for PostgreSQL removed + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + kpi: 120 + locks: 60 + locks_mode: 60 + replication: 60 + replication_slots: 60 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_ssl: 60 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + wal: 60 + wal_receiver: 60 + wal_size: 300 basic: - description: only the most important metrics - WAL, DB-level statistics (size, tx and backend counts) - metrics: - instance_up: 60 - db_size: 300 - db_stats: 60 - wal: 60 + description: only the most important metrics - WAL, DB-level statistics (size, tx and backend counts) + metrics: + instance_up: 60 + db_size: 300 + db_stats: 60 + wal: 60 exhaustive: - description: all important metrics for a deeper performance understanding - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - cpu_load: 60 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - wal_size: 300 + description: all important metrics for a deeper performance understanding + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + cpu_load: 60 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + wal_size: 300 full: - description: almost all available metrics for a even deeper performance understanding - metrics: - archiver: 60 - archiver_pending_count: 300 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - cpu_load: 60 - datfrozenxid: 3600 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - kpi: 120 - locks: 60 - locks_mode: 60 - logical_subscriptions: 120 - postgres_role: 60 - psutil_cpu: 120 - psutil_disk: 120 - psutil_disk_io_total: 120 - psutil_mem: 120 - recommendations: 43200 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - server_log_event_counts: 60 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_ssl: 120 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - wal_size: 120 + description: almost all available metrics for a even deeper performance understanding + metrics: + archiver: 60 + archiver_pending_count: 300 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + cpu_load: 60 + datfrozenxid: 3600 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + kpi: 120 + locks: 60 + locks_mode: 60 + logical_subscriptions: 120 + postgres_role: 60 + psutil_cpu: 120 + psutil_disk: 120 + psutil_disk_io_total: 120 + psutil_mem: 120 + recommendations: 43200 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + server_log_event_counts: 60 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_ssl: 120 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + wal_size: 120 gce: - description: similar to 'exhaustive' with stuff not accessible on GCE managed PostgreSQL engine removed - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 + description: similar to 'exhaustive' with stuff not accessible on GCE managed PostgreSQL engine removed + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 minimal: - description: single "Key Performance Indicators" query for fast cluster/db overview - metrics: - instance_up: 60 - kpi: 60 + description: single "Key Performance Indicators" query for fast cluster/db overview + metrics: + instance_up: 60 + kpi: 60 pgbouncer: - description: pgbouncer stats - metrics: - pgbouncer_stats: 60 - pgbouncer_clients: 60 + description: pgbouncer stats + metrics: + pgbouncer_stats: 60 + pgbouncer_clients: 60 pgpool: - description: pgpool stats - metrics: - pgpool_stats: 60 - pgpool_processes: 60 + description: pgpool stats + metrics: + pgpool_stats: 60 + pgpool_processes: 60 prometheus-async: - description: Tuned for the Prometheus async scrapping - metrics: - backends: 30 - bgwriter: 60 - checkpointer: 60 - db_size: 300 - db_stats: 30 - locks_mode: 30 - replication: 120 - replication_slots: 120 - settings: 300 - sproc_stats: 180 - stat_statements_calls: 60 - table_io_stats: 300 - table_stats: 300 - wait_events: 60 - wal: 60 + description: Tuned for the Prometheus async scrapping + metrics: + backends: 30 + bgwriter: 60 + checkpointer: 60 + db_size: 300 + db_stats: 30 + locks_mode: 30 + replication: 120 + replication_slots: 120 + settings: 300 + sproc_stats: 180 + stat_statements_calls: 60 + table_io_stats: 300 + table_stats: 300 + wait_events: 60 + wal: 60 rds: - description: similar to 'exhaustive' with stuff that's not accessible on AWS RDS removed - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 + description: similar to 'exhaustive' with stuff that's not accessible on AWS RDS removed + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 standard: - description: basic level + table, index, stat_statements stats - metrics: - cpu_load: 60 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - sequence_health: 3600 - sproc_stats: 180 - stat_statements: 180 - table_stats: 300 - wal: 60 + description: basic level + table, index, stat_statements stats + metrics: + cpu_load: 60 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + sequence_health: 3600 + sproc_stats: 180 + stat_statements: 180 + table_stats: 300 + wal: 60 exhaustive_no_python: - description: like exhaustive, but no PL/Python helpers - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - wal_size: 300 + description: like exhaustive, but no PL/Python helpers + metrics: + archiver: 60 + backends: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_activity: 30 + stat_statements: 180 + stat_statements_calls: 60 + table_bloat_approx_summary_sql: 7200 + table_io_stats: 600 + table_stats: 300 + wal: 60 + wal_receiver: 120 + wal_size: 300 unprivileged: - description: no wrappers + only pg_stat_statements extension expected (developer mode) - metrics: - archiver: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_statements_calls: 60 - table_io_stats: 600 - table_stats: 300 - wal: 60 \ No newline at end of file + description: no wrappers + only pg_stat_statements extension expected (developer mode) + metrics: + archiver: 60 + bgwriter: 60 + checkpointer: 60 + change_events: 300 + db_size: 300 + db_stats: 60 + index_stats: 900 + instance_up: 60 + locks: 60 + locks_mode: 60 + replication: 120 + replication_slots: 120 + sequence_health: 3600 + settings: 7200 + sproc_stats: 180 + stat_statements_calls: 60 + table_io_stats: 600 + table_stats: 300 + wal: 60 \ No newline at end of file diff --git a/config/pgwatch-prometheus/sources.yml b/config/pgwatch-prometheus/sources.yml index c6cea94..dee971e 100644 --- a/config/pgwatch-prometheus/sources.yml +++ b/config/pgwatch-prometheus/sources.yml @@ -5,7 +5,6 @@ conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database kind: postgres custom_metrics: - pg_stat_statements_calls: 1 backends: 1 bgwriter: 1 checkpointer: 1 @@ -16,12 +15,40 @@ replication_slots: 1 settings: 1 sproc_stats: 1 - stat_statements: 1 - stat_statements_calls: 1 table_io_stats: 1 table_stats: 1 wait_events: 1 wal: 1 + pg_stat_activity: 1 + pg_replication: 1 + pg_stat_wal_receiver: 1 + pg_archiver: 1 + pg_postmaster: 1 + pg_stat_user_tables: 1 + pg_statio_user_tables: 1 + pg_stat_user_indexes: 1 + pg_statio_user_indexes: 1 + pg_indexes_with_size: 1 + pg_stat_statements: 1 + pg_stat_kcache: 1 + pg_total_relation_size: 1 + pg_blocked: 1 + pg_oldest_blocked: 1 + pg_slow: 1 + pg_long_running_transactions: 1 + pg_stuck_idle_in_transaction: 1 + pg_txid: 1 + pg_database_wraparound: 1 + pg_xlog_position: 1 + pg_replication_slots: 1 + pg_stat_ssl: 1 + pg_integer_capacity: 1 + pg_wait_sampling: 1 + pg_wait_sampling_agg: 1 + pg_stat_activity_autovacuum: 1 + pg_stat_activity_autovacuum_active: 1 + pg_gin_index: 1 + custom_tags: env: demo -- GitLab From 6a1947e27ef26ca86838b48f1bf60031a6c4c2e4 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 2 Jul 2025 20:54:06 +0300 Subject: [PATCH 6/7] Fixed sessions graphs --- .../dashboards/DBOverviewDashboard.json | 49 +++---------------- 1 file changed, 6 insertions(+), 43 deletions(-) diff --git a/config/grafana/dashboards/DBOverviewDashboard.json b/config/grafana/dashboards/DBOverviewDashboard.json index d6e7fb3..9351d21 100644 --- a/config/grafana/dashboards/DBOverviewDashboard.json +++ b/config/grafana/dashboards/DBOverviewDashboard.json @@ -53,7 +53,7 @@ "barAlignment": 0, "barWidthFactor": 0.5, "drawStyle": "bars", - "fillOpacity": 100, + "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, @@ -261,7 +261,7 @@ "refId": "D" } ], - "title": "New panel", + "title": "Active session history", "type": "timeseries" }, { @@ -443,32 +443,6 @@ } } ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "active", - "idle", - "idle in transaction" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] } ] }, @@ -687,19 +661,9 @@ ] }, { - "__systemRef": "hideSeriesFrom", "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "active", - "idle", - "idle in transaction" - ], - "prefix": "All except:", - "readOnly": true - } + "id": "byName", + "options": "Max connections" }, "properties": [ { @@ -3986,19 +3950,18 @@ } ], "preload": false, - "refresh": "auto", "schemaVersion": 41, "tags": [], "templating": { "list": [] }, "time": { - "from": "now-5m", + "from": "now-15m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Database Overview", "uid": "f90500a0-a12e-4081-a2f0-07ed96f27915", - "version": 96 + "version": 99 } \ No newline at end of file -- GitLab From 41ed31435be22eaf409f2fad5c5a4ecc55339a75 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Thu, 3 Jul 2025 15:31:46 +0300 Subject: [PATCH 7/7] Added configuration support through instances.yml --- config/pgwatch-postgres/metrics.yml | 8 +- config/pgwatch-postgres/sources.yml | 11 +- config/pgwatch-prometheus/metrics.yml | 342 ++++---------------------- config/pgwatch-prometheus/sources.yml | 56 +---- docker-compose.yml | 53 ++-- instances.yml | 10 + 6 files changed, 106 insertions(+), 374 deletions(-) create mode 100644 instances.yml diff --git a/config/pgwatch-postgres/metrics.yml b/config/pgwatch-postgres/metrics.yml index 20ac8fa..528f809 100644 --- a/config/pgwatch-postgres/metrics.yml +++ b/config/pgwatch-postgres/metrics.yml @@ -10,4 +10,10 @@ metrics: from pg_stat_statements where queryid is not null gauges: - - '*' \ No newline at end of file + - '*' + +presets: + full: + description: "Full metrics for PostgreSQL storage" + metrics: + pgss_queryid_queries: 10 \ No newline at end of file diff --git a/config/pgwatch-postgres/sources.yml b/config/pgwatch-postgres/sources.yml index df05b65..6dfdb19 100644 --- a/config/pgwatch-postgres/sources.yml +++ b/config/pgwatch-postgres/sources.yml @@ -1,14 +1,11 @@ # PGWatch Sources Configuration - PostgreSQL Instance -# This instance stores detailed metrics in PostgreSQL format - -- name: target-database +- unique_name: target-database conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database - kind: postgres + preset_metrics: full custom_metrics: - pgss_queryid_queries: 30 + is_enabled: true + group: default custom_tags: env: demo cluster: local sink_type: postgresql - is_enabled: true - stmt_timeout: 30 \ No newline at end of file diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index bb8d30c..1e8f083 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -4771,305 +4771,49 @@ metrics: presets: - aiven: - description: aiven database metrics - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_col_stats: 86400 - index_stats: 900 - locks: 60 - locks_mode: 60 - recommendations: 43200 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 60 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 300 - table_stats: 300 - wal: 60 - wal_receiver: 120 - aurora: - description: AWS Aurora doesn't expose all Postgres functions and there's no WAL - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats_aurora: 60 - index_stats: 900 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - settings: 7200 - sproc_stats: 180 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal_receiver: 120 - azure: - description: similar to 'exhaustive' with stuff that's not accessible on Azure Database for PostgreSQL removed - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - kpi: 120 - locks: 60 - locks_mode: 60 - replication: 60 - replication_slots: 60 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_ssl: 60 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - wal: 60 - wal_receiver: 60 - wal_size: 300 - basic: - description: only the most important metrics - WAL, DB-level statistics (size, tx and backend counts) - metrics: - instance_up: 60 - db_size: 300 - db_stats: 60 - wal: 60 - exhaustive: - description: all important metrics for a deeper performance understanding - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - cpu_load: 60 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - wal_size: 300 full: description: almost all available metrics for a even deeper performance understanding metrics: - archiver: 60 - archiver_pending_count: 300 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - cpu_load: 60 - datfrozenxid: 3600 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - kpi: 120 - locks: 60 - locks_mode: 60 - logical_subscriptions: 120 - postgres_role: 60 - psutil_cpu: 120 - psutil_disk: 120 - psutil_disk_io_total: 120 - psutil_mem: 120 - recommendations: 43200 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - server_log_event_counts: 60 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_ssl: 120 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - wal_size: 120 - gce: - description: similar to 'exhaustive' with stuff not accessible on GCE managed PostgreSQL engine removed - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - minimal: - description: single "Key Performance Indicators" query for fast cluster/db overview - metrics: - instance_up: 60 - kpi: 60 - pgbouncer: - description: pgbouncer stats - metrics: - pgbouncer_stats: 60 - pgbouncer_clients: 60 - pgpool: - description: pgpool stats - metrics: - pgpool_stats: 60 - pgpool_processes: 60 - prometheus-async: - description: Tuned for the Prometheus async scrapping - metrics: - backends: 30 - bgwriter: 60 - checkpointer: 60 - db_size: 300 - db_stats: 30 - locks_mode: 30 - replication: 120 - replication_slots: 120 - settings: 300 - sproc_stats: 180 - stat_statements_calls: 60 - table_io_stats: 300 - table_stats: 300 - wait_events: 60 - wal: 60 - rds: - description: similar to 'exhaustive' with stuff that's not accessible on AWS RDS removed - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - standard: - description: basic level + table, index, stat_statements stats - metrics: - cpu_load: 60 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - sequence_health: 3600 - sproc_stats: 180 - stat_statements: 180 - table_stats: 300 - wal: 60 - exhaustive_no_python: - description: like exhaustive, but no PL/Python helpers - metrics: - archiver: 60 - backends: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_activity: 30 - stat_statements: 180 - stat_statements_calls: 60 - table_bloat_approx_summary_sql: 7200 - table_io_stats: 600 - table_stats: 300 - wal: 60 - wal_receiver: 120 - wal_size: 300 - unprivileged: - description: no wrappers + only pg_stat_statements extension expected (developer mode) - metrics: - archiver: 60 - bgwriter: 60 - checkpointer: 60 - change_events: 300 - db_size: 300 - db_stats: 60 - index_stats: 900 - instance_up: 60 - locks: 60 - locks_mode: 60 - replication: 120 - replication_slots: 120 - sequence_health: 3600 - settings: 7200 - sproc_stats: 180 - stat_statements_calls: 60 - table_io_stats: 600 - table_stats: 300 - wal: 60 \ No newline at end of file + backends: 1 + bgwriter: 1 + checkpointer: 1 + db_size: 1 + db_stats: 1 + locks_mode: 1 + replication: 1 + replication_slots: 1 + settings: 1 + sproc_stats: 1 + table_io_stats: 1 + table_stats: 1 + wait_events: 1 + wal: 1 + pg_stat_activity: 1 + pg_replication: 1 + pg_stat_wal_receiver: 1 + pg_archiver: 1 + pg_postmaster: 1 + pg_stat_user_tables: 1 + pg_statio_user_tables: 1 + pg_stat_user_indexes: 1 + pg_statio_user_indexes: 1 + pg_indexes_with_size: 1 + pg_stat_statements: 1 + pg_stat_kcache: 1 + pg_total_relation_size: 1 + pg_blocked: 1 + pg_oldest_blocked: 1 + pg_slow: 1 + pg_long_running_transactions: 1 + pg_stuck_idle_in_transaction: 1 + pg_txid: 1 + pg_database_wraparound: 1 + pg_xlog_position: 1 + pg_replication_slots: 1 + pg_stat_ssl: 1 + pg_integer_capacity: 1 + pg_wait_sampling: 1 + pg_wait_sampling_agg: 1 + pg_stat_activity_autovacuum: 1 + pg_stat_activity_autovacuum_active: 1 + pg_gin_index: 1 diff --git a/config/pgwatch-prometheus/sources.yml b/config/pgwatch-prometheus/sources.yml index dee971e..998ed5e 100644 --- a/config/pgwatch-prometheus/sources.yml +++ b/config/pgwatch-prometheus/sources.yml @@ -1,58 +1,12 @@ -# PGWatch Sources Configuration - Prometheus Instance -# This instance exposes metrics in Prometheus format +# PGWatch Sources Configuration - Prometheus Instance -- name: target-database +- unique_name: target-database conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database - kind: postgres + preset_metrics: full custom_metrics: - backends: 1 - bgwriter: 1 - checkpointer: 1 - db_size: 1 - db_stats: 1 - locks_mode: 1 - replication: 1 - replication_slots: 1 - settings: 1 - sproc_stats: 1 - table_io_stats: 1 - table_stats: 1 - wait_events: 1 - wal: 1 - pg_stat_activity: 1 - pg_replication: 1 - pg_stat_wal_receiver: 1 - pg_archiver: 1 - pg_postmaster: 1 - pg_stat_user_tables: 1 - pg_statio_user_tables: 1 - pg_stat_user_indexes: 1 - pg_statio_user_indexes: 1 - pg_indexes_with_size: 1 - pg_stat_statements: 1 - pg_stat_kcache: 1 - pg_total_relation_size: 1 - pg_blocked: 1 - pg_oldest_blocked: 1 - pg_slow: 1 - pg_long_running_transactions: 1 - pg_stuck_idle_in_transaction: 1 - pg_txid: 1 - pg_database_wraparound: 1 - pg_xlog_position: 1 - pg_replication_slots: 1 - pg_stat_ssl: 1 - pg_integer_capacity: 1 - pg_wait_sampling: 1 - pg_wait_sampling_agg: 1 - pg_stat_activity_autovacuum: 1 - pg_stat_activity_autovacuum_active: 1 - pg_gin_index: 1 - - + is_enabled: true + group: default custom_tags: env: demo cluster: local sink_type: prometheus - is_enabled: true - stmt_timeout: 15 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 98e7590..1a935fa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,20 +1,39 @@ version: '3.8' services: - # Target Database - The PostgreSQL database being monitored - target-db: - image: postgres:15 - container_name: target-db - environment: - POSTGRES_DB: target_database - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"] - ports: - - "5432:5432" + # Sources Generator - Generates sources.yml files from instances.yaml template + sources-generator: + image: alpine:latest + container_name: sources-generator + working_dir: /app volumes: - - target_db_data:/var/lib/postgresql/data - - ./config/target-db/init.sql:/docker-entrypoint-initdb.d/init.sql + - ./instances.yaml:/app/instances.yaml + - ./config:/app/config + command: > + sh -c " + mkdir -p /app/config/pgwatch-postgres /app/config/pgwatch-prometheus && + echo '# PGWatch Sources Configuration - PostgreSQL Instance' > /app/config/pgwatch-postgres/sources.yml && + sed 's/~sink_type~/postgresql/g' /app/instances.yaml >> /app/config/pgwatch-postgres/sources.yml && + echo '# PGWatch Sources Configuration - Prometheus Instance' > /app/config/pgwatch-prometheus/sources.yml && + echo '' >> /app/config/pgwatch-prometheus/sources.yml && + sed 's/~sink_type~/prometheus/g' /app/instances.yaml >> /app/config/pgwatch-prometheus/sources.yml && + echo 'Generated sources.yml files for both postgres and prometheus' + " + + # Target Database - The PostgreSQL database being monitored + # target-db: + # image: postgres:15 + # container_name: target-db + # environment: + # POSTGRES_DB: target_database + # POSTGRES_USER: postgres + # POSTGRES_PASSWORD: postgres + # command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"] + # ports: + # - "5432:5432" + # volumes: + # - target_db_data:/var/lib/postgresql/data + # - ./config/target-db/init.sql:/docker-entrypoint-initdb.d/init.sql # Postgres Sink - Storage for metrics in PostgreSQL format sink-postgres: @@ -55,8 +74,9 @@ services: ports: - "8080:8080" depends_on: + - sources-generator - sink-postgres - - target-db + # - target-db volumes: - ./config/pgwatch-postgres/sources.yml:/etc/pgwatch/sources.yml - ./config/pgwatch-postgres/metrics.yml:/etc/pgwatch/metrics.yml @@ -70,8 +90,9 @@ services: - "8089:8089" - "9091:9091" depends_on: + - sources-generator - sink-prometheus - - target-db + # - target-db volumes: - ./config/pgwatch-prometheus/sources.yml:/etc/pgwatch/sources.yml - ./config/pgwatch-prometheus/metrics.yml:/etc/pgwatch/metrics.yml @@ -93,7 +114,7 @@ services: - sink-prometheus volumes: - target_db_data: + # target_db_data: sink_postgres_data: prometheus_data: grafana_data: diff --git a/instances.yml b/instances.yml new file mode 100644 index 0000000..8cdaa9d --- /dev/null +++ b/instances.yml @@ -0,0 +1,10 @@ +- unique_name: target-database + conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database + preset_metrics: full + custom_metrics: + is_enabled: true + group: default + custom_tags: + env: demo + cluster: local + sink_type: ~sink_type~ -- GitLab