diff --git a/config/grafana/dashboards/DBOverviewDashboard.json b/config/grafana/dashboards/DBOverviewDashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..9351d21467405ab485fdb34dd1525ce165e3e0c2 --- /dev/null +++ b/config/grafana/dashboards/DBOverviewDashboard.json @@ -0,0 +1,3967 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 2, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "ASH", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.5, + "drawStyle": "bars", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Timeout" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6f450c", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*CPU - .*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- Lock" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- LWLock" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- IO" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*- Client" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{datname!=\"server_process\"})", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{datname=\"server_process\", wait_event_type!~\".*- Activity.*\"})", + "hide": false, + "legendFormat": "Postgres - {{wait_event_type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{datname=\"server_process\", wait_event_type=~\".*- Activity.*\"})", + "hide": false, + "legendFormat": "Idle Internal - {{wait_event_type}}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum by (event_type) (irate(pgwatch_wait_events_sampling_total[$__rate_interval]))>0", + "hide": false, + "interval": "20", + "legendFormat": " {{event_type}}", + "range": true, + "refId": "D" + } + ], + "title": "Active session history", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 2, + "panels": [], + "title": "Host stats", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 3, + "panels": [], + "title": "Postgres stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#afafaf", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle-in-transaction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Waiting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum by (state) (pgwatch_pg_stat_activity_count) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_settings_max_connections", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max connections", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Sessions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#afafaf", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle-in-transaction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Waiting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max connections" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 39, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum by (state) (pgwatch_pg_stat_activity_count{state!=\"idle\"}) > 0", + "hide": false, + "instant": false, + "legendFormat": "{{state}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_settings_max_connections", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max connections", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Non-idle Sessions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Calls (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rollbacks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Commits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "builder", + "expr": "sum(irate(pgwatch_db_stats_xact_rollback[$__rate_interval]))", + "hide": false, + "instant": false, + "interval": "20", + "legendFormat": "Rollbacks", + "range": true, + "refId": "B" + }, + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))", + "interval": "20", + "legendFormat": "Commits", + "range": true, + "refId": "A" + } + ], + "title": "Transactions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "refId": "A" + } + ], + "title": "TODO: Postgres logs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))/(sum(irate(pgwatch_db_stats_xact_commit[$__rate_interval]))+sum(irate(pgwatch_db_stats_xact_rollback[$__rate_interval]))) * 100", + "interval": "20", + "legendFormat": "Commits", + "range": true, + "refId": "A" + } + ], + "title": "Commit ratio", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sec/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "(sum(irate(pgwatch_pg_stat_statements_exec_time_total[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_plan_time_total[$__rate_interval])))/1000", + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A" + } + ], + "title": "Statements total time (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "(sum(irate(pgwatch_pg_stat_statements_exec_time_total[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_plan_time_total[$__rate_interval]))) / sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Statements time per call (pg_stat_statements) aka latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_rows[$__rate_interval]))", + "interval": "20", + "legendFormat": "No. of rows ", + "range": true, + "refId": "A" + } + ], + "title": "Total rows (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_rows[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "interval": "20", + "legendFormat": "Rows per call", + "range": true, + "refId": "A" + } + ], + "title": "Rows per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sec/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_read_total[$__rate_interval]))", + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "B" + } + ], + "title": "blk_read_time vs blk_write_time (s/s) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sec/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_read_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "blk_read_time", + "range": true, + "refId": "B" + } + ], + "title": "blk_read_time vs blk_write_time (s/s) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_hit (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 69 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_hit (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 77 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_read (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 77 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_read (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_written (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 85 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_written (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 93 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_dirtied (bytes) (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 93 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes per call", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "shared_blks_dirtied (bytes) per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 101 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval]))/(sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total[$__rate_interval]))) * 100", + "interval": "20", + "legendFormat": "shared_blks_read_ratio", + "range": true, + "refId": "A" + } + ], + "title": "shared_blks_read_ratio (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 109 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_wal_xlog_location_b[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL bytes (pg_current_wal_lsn)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 109 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_wal_xlog_location_b[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL bytes per call (pg_current_wal_lsn)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 117 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(irate(pgwatch_pg_stat_statements_wal_fpi[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL fpi (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 117 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_wal_fpi[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "shared bytes", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "WAL fpi per call (pg_current_wal_lsn)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 125 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_read[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Temp bytes read", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_written[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "Temp bytes written", + "range": true, + "refId": "B" + } + ], + "title": "temp_bytes_read vs temp_bytes_written (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 125 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_read[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Temp bytes read", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_written[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls[$__rate_interval]))", + "hide": false, + "interval": "20", + "legendFormat": "Temp bytes written", + "range": true, + "refId": "B" + } + ], + "title": "temp_bytes_read vs temp_bytes_written per call (pg_stat_statements)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.5, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 133 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (lockmode) (rate(pgwatch_locks_mode_count[$__rate_interval]) * 60)", + "fullMetaSearch": false, + "groupBy": [ + { + "params": [ + "$interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "includeNullMetadata": true, + "interval": "20", + "intervalFactor": 1, + "legendFormat": "__auto", + "metric": "pg_locks_count", + "policy": "default", + "range": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 5, + "useBackend": false + } + ], + "title": "Locks by mode", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 133 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "pgwatch_pg_long_running_transactions_age_in_seconds", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "Longest trasaction's age in seconds", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Longest non-idle transaction age, > 1 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 141 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "pgwatch_pg_database_wraparound_age_datfrozenxid{datname!~\"template1|postgres\"}", + "legendFormat": "{{datname}}", + "range": true, + "refId": "A" + } + ], + "title": "Age of the oldest transaction ID that has not been frozen", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 141 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "pgwatch_pg_database_wraparound_age_datminmxid{datname!~\"template1|postgres\"}", + "legendFormat": "{{datname}}", + "range": true, + "refId": "A" + } + ], + "title": "Age of the oldest multi-transaction ID", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 149 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "pgwatch_pg_archiver_pending_wal_count", + "legendFormat": "No of files", + "range": true, + "refId": "A" + } + ], + "title": "TODO: No. of pending WAL files to be archived", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 149 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "pgwatch_pg_stuck_idle_in_transaction_queries", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "No. of queries", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "TODO: Number of queries stuck in idle in transaction state (>10min)", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Database Overview", + "uid": "f90500a0-a12e-4081-a2f0-07ed96f27915", + "version": 99 +} \ No newline at end of file diff --git a/config/grafana/dashboards/dash1.json b/config/grafana/dashboards/dash1.json index ac64f5d490ea72d167e203145e4213e2a92c51b6..4c14323294bd88037ae6e9bc1fa520bd71935ce7 100644 --- a/config/grafana/dashboards/dash1.json +++ b/config/grafana/dashboards/dash1.json @@ -17,10 +17,295 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, + "graphTooltip": 1, "id": 1, "links": [], "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "pgwatch_db_stats_blks_hit / (pgwatch_db_stats_blks_hit + pgwatch_db_stats_blks_read) * 100", + "legendFormat": "Buffer Cache Hit Ratio", + "range": true, + "refId": "A" + } + ], + "title": "Buffer Cache Hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(pgwatch_stat_statements_shared_blks_dirtied) * 8192", + "legendFormat": "Shared Blocks Dirtied", + "range": true, + "refId": "A" + } + ], + "title": "Shared Blocks Written (Bytes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(pgwatch_stat_statements_shared_blks_written) * 8192", + "legendFormat": "Shared Blocks Written", + "range": true, + "refId": "A" + } + ], + "title": "Shared Blocks Written (Bytes)", + "type": "timeseries" + }, { "datasource": { "type": "datasource", @@ -96,8 +381,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 0 + "x": 12, + "y": 8 }, "id": 1, "options": { @@ -121,7 +406,7 @@ "uid": "P7A0D6631BB10B34F" }, "disableTextWrap": false, - "editorMode": "code", + "editorMode": "builder", "expr": "pgwatch_pg_stat_statements_calls_calls", "fullMetaSearch": false, "hide": false, @@ -172,10 +457,6 @@ "options": {} }, { - "filter": { - "id": "byRefId", - "options": "/^(?:seriesToRows-B-B-B-B-B-B-B-B-B)$/" - }, "id": "extractFields", "options": { "delimiter": ",", @@ -212,22 +493,466 @@ } ], "type": "timeseries" - } - ], - "preload": false, - "refresh": "auto", - "schemaVersion": 41, - "tags": [], - "templating": { - "list": [] + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 8, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(pgwatch_stat_statements_total_time[$__rate_interval])) / sum(irate(pgwatch_stat_statements_calls[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "20s", + "legendFormat": "Runtime", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Average query runtime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 16, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "E" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "#4e7299", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#afafaf", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idle-in-transaction" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Waiting" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idleintransaction", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle-in-transaction", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_idle", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Idle", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_active", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Active", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_settings_max_connections", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max connections", + "range": true, + "refId": "D", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_total", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "E", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pgwatch_backends_waiting ", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "F", + "useBackend": false + } + ], + "title": "Sessions", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "auto", + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "1m", + "value": "1m" + }, + "name": "agg_interval", + "options": [ + { + "selected": false, + "text": "10s", + "value": "10s" + }, + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "10s,1m,5m,1h", + "refresh": 2, + "type": "interval" + } + ] }, "time": { - "from": "now-5m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "PoC", "uid": "00eb62a7-4b80-43cd-a890-45336979aa18", - "version": 2 + "version": 26 } \ No newline at end of file diff --git a/config/grafana/provisioning/datasources/datasources.yml b/config/grafana/provisioning/datasources/datasources.yml index 699bbbc00f6118f594d4e646219fec970b1a2b2f..91ca8782f0d45cf532bda087064121453f7621cb 100644 --- a/config/grafana/provisioning/datasources/datasources.yml +++ b/config/grafana/provisioning/datasources/datasources.yml @@ -4,6 +4,7 @@ datasources: - name: PGWatch-PostgreSQL type: postgres access: proxy + uid: P031DD592934B2F1F url: sink-postgres:5432 database: measurements user: pgwatch @@ -12,10 +13,16 @@ datasources: jsonData: sslmode: disable postgresVersion: 1500 - isDefault: true + isDefault: false - name: PGWatch-Prometheus type: prometheus access: proxy + uid: P7A0D6631BB10B34F url: http://sink-prometheus:9090 - isDefault: false \ No newline at end of file + isDefault: true + jsonData: + scrapeInterval: '5s' + queryTimeout: '5s' + timeInterval: '5s' + httpMethod: 'POST' \ No newline at end of file diff --git a/config/pgwatch-postgres/metrics.yml b/config/pgwatch-postgres/metrics.yml index 92a620cf99abde20c29527c3765e728b37eee8d9..528f80918ecf67f7db357865d5be4d65d9965097 100644 --- a/config/pgwatch-postgres/metrics.yml +++ b/config/pgwatch-postgres/metrics.yml @@ -5,10 +5,15 @@ metrics: sqls: 11: |- select /* pgwatch_generated */ - (extract(epoch from now()) * 1e9)::int8 as epoch_ns, queryid, query from pg_stat_statements where queryid is not null gauges: - - '*' \ No newline at end of file + - '*' + +presets: + full: + description: "Full metrics for PostgreSQL storage" + metrics: + pgss_queryid_queries: 10 \ No newline at end of file diff --git a/config/pgwatch-postgres/sources.yml b/config/pgwatch-postgres/sources.yml index df05b652c790adea453eb7802ca877a90e3a0661..6dfdb1946d26198273c454723b2c30bd44508672 100644 --- a/config/pgwatch-postgres/sources.yml +++ b/config/pgwatch-postgres/sources.yml @@ -1,14 +1,11 @@ # PGWatch Sources Configuration - PostgreSQL Instance -# This instance stores detailed metrics in PostgreSQL format - -- name: target-database +- unique_name: target-database conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database - kind: postgres + preset_metrics: full custom_metrics: - pgss_queryid_queries: 30 + is_enabled: true + group: default custom_tags: env: demo cluster: local sink_type: postgresql - is_enabled: true - stmt_timeout: 30 \ No newline at end of file diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index 492724b4d0aea94efa8cae75ec3d01bf66976cd3..1e8f083fb63167ec31464d21d769de03d7ad548a 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -1,21 +1,4819 @@ # Simple PGWatch Metrics for Prometheus - just queryid and calls + +# The following structure is expected for metrics and preset definitions: +# metrics: +# metric_name: +# init_sql: |- +# CREATE EXTENSION IF NOT EXISTS some_extension; +# CREATE OR REPLACE FUNCTION get_some_stat(OUT some_stat int) +# ... +# sqls: +# 11: | +# select /* pgwatch_generated */ +# (extract(epoch from now()) * 1e9)::int8 as epoch_ns, +# ... +# 14: | +# select /* pgwatch_generated */ +# (extract(epoch from now()) * 1e9)::int8 as epoch_ns, +# ... +# gauges: +# - '*' +# is_instance_level: true +# node_status: primary +# statement_timeout_seconds: 300 +# metric_storage_name: db_stats metrics: - pg_stat_statements_calls: - description: "Simple queryid and calls metric" + archiver: + description: > + This metric retrieves key statistics from the PostgreSQL `pg_stat_archiver` view providing insights into the status of WAL file archiving. + It returns the total number of successfully archived files and failed archiving attempts. Additionally, it identifies if the most recent + attempt resulted in a failure and calculates how many seconds have passed since the last failure. The metric only considers data if WAL + archiving is enabled in the system, helping administrators monitor and diagnose issues related to the archiving process. sqls: 11: |- select /* pgwatch_generated */ (extract(epoch from now()) * 1e9)::int8 as epoch_ns, - queryid::text as tag_queryid, - calls - from pg_stat_statements - where queryid is not null - order by calls desc - limit 10 + archived_count, + failed_count, + case when coalesce(last_failed_time, '1970-01-01'::timestamptz) > coalesce(last_archived_time, '1970-01-01'::timestamptz) then 1 else 0 end as is_failing_int, + extract(epoch from now() - last_failed_time)::int8 as seconds_since_last_failure + from + pg_stat_archiver + where + current_setting('archive_mode') in ('on', 'always') gauges: - - calls - master_only: false - is_instance_level: false - metric_storage_name: pgss_calls + - is_failing_int + - seconds_since_last_failure + is_instance_level: true + archiver_pending_count: + description: > + This metric retrieves the count of WAL files waiting to be archived by checking the pg_wal/archive_status directory + for files with .ready extension. It helps monitor the archiving backlog and potential issues with WAL archiving. + sqls: + 10: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as archiver_pending_count + from + (select pg_ls_dir('pg_wal/archive_status')) a + where + pg_ls_dir ~ '[0-9A-F]{24}.ready' + 9.4: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as archiver_pending_count + from + (select pg_ls_dir('pg_xlog/archive_status')) a + where + pg_ls_dir ~ '[0-9A-F]{24}.ready' + gauges: + - archiver_pending_count + is_instance_level: true + backends: + description: > + This metric gathers detailed information from the PostgreSQL pg_stat_activity view, providing an overview of the database's current session + and activity state. It tracks the total number of client backends, active sessions, idle sessions, sessions waiting on locks, and background workers. + The metric also calculates statistics on blocked sessions, most extended waiting times, average and longest session durations, transaction times, + and query durations. Additionally, it monitors autovacuum worker activity and provides the age of the oldest transaction (measured by xmin). + This metric helps administrators monitor session states, detect bottlenecks, and ensure the system is within its connection limits, + providing visibility into database performance and contention. + sqls: + 11: | + with sa_snapshot as ( + select * from pg_stat_activity + where pid != pg_backend_pid() + and datname = current_database() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select count(*) from sa_snapshot where backend_type = 'client backend') as total, + (select count(*) from pg_stat_activity where pid != pg_backend_pid()) as instance_total, + current_setting('max_connections')::int as max_connections, + (select count(*) from sa_snapshot where backend_type = 'background worker') as background_workers, + (select count(*) from sa_snapshot where state = 'active' and backend_type = 'client backend') as active, + (select count(*) from sa_snapshot where state = 'idle' and backend_type = 'client backend') as idle, + (select count(*) from sa_snapshot where state = 'idle in transaction' and backend_type = 'client backend') as idleintransaction, + (select count(*) from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as waiting, + (select coalesce(sum(case when coalesce(array_length(pg_blocking_pids(pid), 1), 0) >= 1 then 1 else 0 end), 0) from sa_snapshot where backend_type = 'client backend' and state = 'active') as blocked, + (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as longest_waiting_seconds, + (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where wait_event_type in ('LWLock', 'Lock', 'BufferPin') and backend_type = 'client backend') as avg_waiting_seconds, + (select ceil(extract(epoch from (now() - backend_start)))::int from sa_snapshot where backend_type = 'client backend' order by backend_start limit 1) as longest_session_seconds, + (select round(avg(abs(extract(epoch from now() - backend_start)))::numeric, 3)::float from sa_snapshot where backend_type = 'client backend') as avg_session_seconds, + (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where xact_start is not null and backend_type = 'client backend' order by xact_start limit 1) as longest_tx_seconds, + (select round(avg(abs(extract(epoch from now() - xact_start)))::numeric, 3)::float from sa_snapshot where xact_start is not null and backend_type = 'client backend') as avg_tx_seconds, + (select ceil(extract(epoch from (now() - xact_start)))::int from sa_snapshot where backend_type = 'autovacuum worker' order by xact_start limit 1) as longest_autovacuum_seconds, + (select ceil(extract(epoch from max(now() - query_start)))::int from sa_snapshot where state = 'active' and backend_type = 'client backend') as longest_query_seconds, + (select round(avg(abs(extract(epoch from now() - query_start)))::numeric, 3)::float from sa_snapshot where state = 'active' and backend_type = 'client backend') as avg_query_seconds, + (select max(age(backend_xmin))::int8 from sa_snapshot) as max_xmin_age_tx, + (select count(*) from sa_snapshot where state = 'active' and backend_type = 'autovacuum worker') as av_workers + gauges: + - '*' + backup_age_pgbackrest: + description: > + This metric retrieves the age of the last successful pgBackRest backup in seconds. It uses the `pgbackrest --output=json info` command to fetch + the backup information and calculates the age based on the current time and the timestamp of the last backup. The metric returns a retcode of 0 + on success, along with the age in seconds and a message indicating the status. + Expects pgBackRest is correctly configured on monitored DB and "jq" tool is installed on the DB server. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + retcode, + backup_age_seconds, + message + from + get_backup_age_pgbackrest() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_backup_age_pgbackrest(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS + $$ + import time + import json + import subprocess + + PGBACKREST_TIMEOUT = 30 + + def error(message, returncode=1): + return returncode, 1000000, 'Not OK. '+message + + pgbackrest_cmd=["pgbackrest", "--output=json", "info"] + + try: + p = subprocess.Popen(pgbackrest_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8') + stdout, stderr = p.communicate(timeout=PGBACKREST_TIMEOUT) + except OSError as e: + return error('Failed to execute pgbackrest: {}'.format(e)) + except subprocess.TimeoutExpired: + p.terminate() + try: + p.wait(0.5) + except subprocess.TimeoutExpired: + p.kill() + return error('pgbackrest failed to respond in {} seconds'.format(PGBACKREST_TIMEOUT)) + + if p.returncode != 0: + return error('Failed on "pgbackrest info" call', returncode=p.returncode) + + try: + data = json.loads(stdout) + backup_age_seconds = int(time.time()) - data[0]['backup'][-1]['timestamp']['stop'] + return 0, backup_age_seconds, 'OK. Last backup age in seconds: {}'.format(backup_age_seconds) + except (json.JSONDecodeError, KeyError) : + return error('Failed to parse pgbackrest output') + $$ LANGUAGE plpython3u VOLATILE; + + ALTER FUNCTION get_backup_age_pgbackrest() SET statement_timeout TO '30s'; + + GRANT EXECUTE ON FUNCTION get_backup_age_pgbackrest() TO pgwatch; + + COMMENT ON FUNCTION get_backup_age_pgbackrest() is 'created for pgwatch'; + is_instance_level: true + backup_age_walg: + description: > + Retrieves the age of the last successful WAL-G backup in seconds. It uses the `wal-g backup-list --json` command to fetch + the backup information and calculates the age based on the current time and the timestamp of the last backup. + The metric returns a retcode of 0 on success, along with the age in seconds and a message indicating the status. + Expects .wal-g.json is correctly configured with all necessary credentials and "jq" tool is installed on the DB server. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + retcode, + backup_age_seconds, + message + from + get_backup_age_walg() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_backup_age_walg(OUT retcode int, OUT backup_age_seconds int, OUT message text) AS + $$ + import subprocess + retcode=1 + backup_age_seconds=1000000 + message='' + + # get latest wal-g backup timestamp + walg_last_backup_cmd="""wal-g backup-list --json | jq -r '.[0].time'""" + p = subprocess.run(walg_last_backup_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + if p.returncode != 0: + # plpy.notice("p.stdout: " + str(p.stderr) + str(p.stderr)) + return p.returncode, backup_age_seconds, 'Not OK. Failed on wal-g backup-list call' + + # plpy.notice("last_tz: " + last_tz) + last_tz=p.stdout.rstrip('\n\r') + + # get seconds since last backup from WAL-G timestamp in format '2020-01-22T17:50:51Z' + try: + plan = plpy.prepare("SELECT extract(epoch from now() - $1::timestamptz)::int AS backup_age_seconds;", ["text"]) + rv = plpy.execute(plan, [last_tz]) + except Exception as e: + return retcode, backup_age_seconds, 'Not OK. Failed to convert WAL-G backup timestamp to seconds' + else: + backup_age_seconds = rv[0]["backup_age_seconds"] + return 0, backup_age_seconds, 'OK. Last backup age in seconds: %s' % backup_age_seconds + + $$ LANGUAGE plpython3u VOLATILE; + + /* contacting S3 could be laggy depending on location */ + ALTER FUNCTION get_backup_age_walg() SET statement_timeout TO '30s'; + + GRANT EXECUTE ON FUNCTION get_backup_age_walg() TO pgwatch; + + COMMENT ON FUNCTION get_backup_age_walg() is 'created for pgwatch'; + is_instance_level: true + bgwriter: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_bgwriter` view, providing insights into the background writer's performance. + It returns the number of timed and requested checkpoints, checkpoint write and sync times, buffer statistics, and the last reset time. + This metric helps administrators monitor the background writer's activity and its impact on database performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + checkpoints_timed, + checkpoints_req, + checkpoint_write_time, + checkpoint_sync_time, + buffers_checkpoint, + buffers_clean, + maxwritten_clean, + buffers_backend, + buffers_backend_fsync, + buffers_alloc + from + pg_stat_bgwriter + 17: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + buffers_clean, + maxwritten_clean, + buffers_alloc, + (extract(epoch from now() - stats_reset))::int as last_reset_s + from + pg_stat_bgwriter node_status: primary - statement_timeout_seconds: 5 + is_instance_level: true + buffercache_by_db: + description: > + Retrieves buffer cache statistics grouped by database, providing insights into the size of buffers used by each database. + It calculates the total size of buffers in bytes for each database. + This metric helps administrators monitor buffer usage across different databases in the PostgreSQL instance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + datname as tag_database, + count(*) * (current_setting('block_size')::int8) as size_b + FROM + pg_buffercache AS b, + pg_database AS d + WHERE + d.oid = b.reldatabase + GROUP BY + datname + gauges: + - '*' + is_instance_level: true + buffercache_by_type: + description: > + Retrieves buffer cache statistics grouped by relation type, providing insights into the size of buffers used + by different relation kinds. It calculates the total size of buffers in bytes for each relation kind + (e.g., Table, Index, Toast, Materialized view). This metric helps administrators monitor buffer usage across + different relation types in the PostgreSQL instance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + CASE + WHEN relkind = 'r' THEN 'Table' -- TODO all relkinds covered? + WHEN relkind = 'i' THEN 'Index' + WHEN relkind = 't' THEN 'Toast' + WHEN relkind = 'm' THEN 'Materialized view' + ELSE 'Other' + END as tag_relkind, + count(*) * (current_setting('block_size')::int8) size_b + FROM + pg_buffercache AS b, + pg_class AS d + WHERE + d.oid = b.relfilenode + GROUP BY + relkind + gauges: + - '*' + is_instance_level: true + change_events: + description: > + The "change_events" built-in metric tracks DDL & config changes. Internally, it uses some other * + _hashes metrics that are not meant to be used independently. Such metrics should not be removed. + sqls: + 11: "" + checkpointer: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_checkpointer` view, providing insights into the checkpointer's performance. + It returns the number of timed and requested checkpoints, restart points, write and sync times, and buffer statistics. + This metric helps administrators monitor the checkpointer's activity and its impact on database performance. + sqls: + 11: "; -- covered by bgwriter" + 17: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + num_timed, + num_requested, + restartpoints_timed, + restartpoints_req, + restartpoints_done, + write_time, + sync_time, + buffers_written, + (extract(epoch from now() - stats_reset))::int as last_reset_s + from + pg_stat_checkpointer + configuration_hashes: + description: > + Retrieves configuration settings from the PostgreSQL `pg_settings` view, providing insights into the current configuration of the database. + This metric helps administrators monitor changes applied to the database configuration. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + name as tag_setting, + coalesce(reset_val, '') as value + from + pg_settings + where + name <> 'connection_ID' + cpu_load: + description: > + Retrieves the system load average for the last 1, 5, and 15 minutes using a custom PL/Python function. + This metric provides insights into the CPU load on the PostgreSQL server, helping administrators monitor system performance. + The function uses the `os.getloadavg()` method to fetch the load averages. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + round(load_1min::numeric, 2)::float as load_1min, + round(load_5min::numeric, 2)::float as load_5min, + round(load_15min::numeric, 2)::float as load_15min + from + get_load_average(); + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + CREATE OR REPLACE FUNCTION get_load_average(OUT load_1min float, OUT load_5min float, OUT load_15min float) AS + $$ + from os import getloadavg + la = getloadavg() + return [la[0], la[1], la[2]] + $$ LANGUAGE plpython3u VOLATILE; + GRANT EXECUTE ON FUNCTION get_load_average() TO pgwatch; + COMMENT ON FUNCTION get_load_average() is 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + database_conflicts: + description: > + Retrieves conflict statistics from the PostgreSQL `pg_stat_database_conflicts` view, providing insights into conflicts that have occurred + in the current database. It returns the number of conflicts related to tablespace, lock, snapshot, buffer pin, and deadlock. + This metric helps administrators monitor and diagnose issues related to database conflicts. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + confl_tablespace, + confl_lock, + confl_snapshot, + confl_bufferpin, + confl_deadlock + FROM + pg_stat_database_conflicts + WHERE + datname = current_database() + node_status: standby + datfrozenxid: + description: > + This metric tracks transaction ID and multixact ID ages to monitor wraparound risk. It retrieves the age + of the oldest datfrozenxid and datminmxid from pg_database for the current database, helping administrators + monitor and prevent transaction ID wraparound which can cause database shutdowns. + sqls: + 9.3: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + age(datfrozenxid) as datfrozenxid_age, + mxid_age(datminmxid) as datminmxid_age + from + pg_database + where + datname = current_database() + gauges: + - datfrozenxid_age + - datminmxid_age + db_size: + description: > + Retrieves the size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. + It returns the size in bytes for both the current database and the catalog schema. + This metric helps administrators monitor database size and storage consumption. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pg_database_size(current_database()) as size_b, + (select sum(pg_total_relation_size(c.oid))::int8 + from pg_class c join pg_namespace n on n.oid = c.relnamespace + where nspname = 'pg_catalog' and relkind = 'r' + ) as catalog_size_b + gauges: + - '*' + statement_timeout_seconds: 300 + db_size_approx: + description: > + Retrieves an approximate size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. + It returns the size in bytes for both the current database and the catalog schema. + This metric helps administrators monitor database size and storage consumption. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_setting('block_size')::int8 * ( + select sum(relpages) from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where c.relpersistence != 't' + ) as size_b, + current_setting('block_size')::int8 * ( + select sum(c.relpages + coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0)) + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + left join pg_class ct on ct.oid = c.reltoastrelid + left join pg_index ti on ti.indrelid = ct.oid + left join pg_class cti on cti.oid = ti.indexrelid + where nspname = 'pg_catalog' + and (c.relkind = 'r' + or c.relkind = 'i' and not c.relname ~ '^pg_toast') + ) as catalog_size_b + gauges: + - '*' + metric_storage_name: db_size + db_stats: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. + It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, + deadlocks, block read and write times, postmaster uptime, backup duration, recovery status, system identifier, and invalid indexes. + This metric helps administrators monitor database activity and performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 12: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + session_time::int8, + active_time::int8, + idle_in_transaction_time::int8, + sessions, + sessions_abandoned, + sessions_fatal, + sessions_killed, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + 15: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + checksum_failures, + extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id, + session_time::int8, + active_time::int8, + idle_in_transaction_time::int8, + sessions, + sessions_abandoned, + sessions_fatal, + sessions_killed, + (select count(*) from pg_index i + where not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + )) as invalid_indexes + from + pg_stat_database, pg_control_system() + where + datname = current_database() + gauges: + - numbackends + - postmaster_uptime_s + - backup_duration_s + - backup_duration_s + - checksum_last_failure_s + db_stats_aurora: + description: > + Retrieves key statistics from the PostgreSQL `pg_stat_database` view for Amazon Aurora PostgreSQL, providing insights into the current database's performance. + It returns the number of backends, transaction commits and rollbacks, buffer reads and hits, tuple statistics, conflicts, temporary files and bytes, + deadlocks, block read and write times, postmaster uptime, recovery status, system identifier, and invalid indexes. + This metric helps administrators monitor database activity and performance in an Aurora PostgreSQL environment. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + numbackends, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + tup_returned, + tup_fetched, + tup_inserted, + tup_updated, + tup_deleted, + conflicts, + temp_files, + temp_bytes, + deadlocks, + blk_read_time, + blk_write_time, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + system_identifier::text as tag_sys_id + from + pg_stat_database, pg_control_system() + where + datname = current_database() + gauges: + - numbackends + - postmaster_uptime_s + - backup_duration_s + - checksum_last_failure_s + metric_storage_name: db_stats + index_hashes: + description: > + Retrieves the hash of index definitions in the PostgreSQL database, providing a way to track changes in index definitions over time. + This metric helps administrators monitor index changes and ensure consistency in index definitions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(nspname)||'.'||quote_ident(c.relname) as tag_index, + quote_ident(nspname)||'.'||quote_ident(r.relname) as "table", + i.indisvalid::text as is_valid, + coalesce(md5(pg_get_indexdef(i.indexrelid)), random()::text) as md5 + from + pg_index i + join + pg_class c on c.oid = i.indexrelid + join + pg_class r on r.oid = i.indrelid + join + pg_namespace n on n.oid = c.relnamespace + where + c.relnamespace not in (select oid from pg_namespace where nspname like any(array[E'pg\\_%', 'information_schema'])) + index_stats: + description: > + Retrieves detailed statistics about indexes in the PostgreSQL database, including index size, scan counts, tuple read and fetch counts, + block read and hit counts, and index validity. It also identifies the largest, most scanned, and unused indexes. + This metric helps administrators monitor index performance and identify potential issues with unused or invalid indexes. + sqls: + 11: |- + /* does not return all index stats but biggest, top scanned and biggest unused ones */ + WITH q_locked_rels AS ( + select relation from pg_locks where mode = 'AccessExclusiveLock' + ), + q_index_details AS ( + select + sui.schemaname, + sui.indexrelname, + sui.relname, + sui.indexrelid, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + sui.idx_scan, + sui.idx_tup_read, + sui.idx_tup_fetch, + io.idx_blks_read, + io.idx_blks_hit, + i.indisvalid, + i.indisprimary, + i.indisunique, + i.indisexclusion + from + pg_stat_user_indexes sui + join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid + join pg_index i on i.indexrelid = sui.indexrelid + where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) + and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) + ), + q_top_indexes AS ( + /* biggest */ + select * + from ( + select indexrelid + from q_index_details + where idx_scan > 1 + order by index_size_b desc + limit 200 + ) x + union + /* most block traffic */ + select * + from ( + select indexrelid + from q_index_details + order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc + limit 200 + ) y + union + /* most scans */ + select * + from ( + select indexrelid + from q_index_details + order by idx_scan desc nulls last + limit 200 + ) z + union + /* biggest unused non-constraint */ + select * + from ( + select q.indexrelid + from q_index_details q + where idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + order by index_size_b desc + limit 200 + ) z + union + /* all invalid */ + select * + from ( + select q.indexrelid + from q_index_details q + where not indisvalid + ) zz + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text as tag_schema, + indexrelname::text as tag_index_name, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, + relname::text as tag_table_name, + quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_read, 0) as idx_tup_read, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + coalesce(index_size_b, 0) as index_size_b, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, + md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, + regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, + case when not indisvalid then 1 else 0 end as is_invalid_int, + case when indisprimary then 1 else 0 end as is_pk_int, + case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, + system_identifier::text as tag_sys_id + FROM + q_index_details id + JOIN + pg_control_system() ON true + WHERE + indexrelid IN (select indexrelid from q_top_indexes) + ORDER BY + id.schemaname, id.relname, id.indexrelname + 16: |- + /* NB! does not return all index stats but biggest, top scanned and biggest unused ones */ + WITH q_locked_rels AS ( /* pgwatch_generated */ + select relation from pg_locks where mode = 'AccessExclusiveLock' + ), + q_index_details AS ( + select + sui.schemaname, + sui.indexrelname, + sui.relname, + sui.indexrelid, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + sui.idx_scan, + sui.idx_tup_read, + sui.idx_tup_fetch, + io.idx_blks_read, + io.idx_blks_hit, + i.indisvalid, + i.indisprimary, + i.indisunique, + i.indisexclusion, + extract(epoch from now() - last_idx_scan)::int as last_idx_scan_s + from + pg_stat_user_indexes sui + join pg_statio_user_indexes io on io.indexrelid = sui.indexrelid + join pg_index i on i.indexrelid = sui.indexrelid + where not sui.schemaname like any (array [E'pg\\_temp%', E'\\_timescaledb%']) + and not exists (select * from q_locked_rels where relation = sui.relid or relation = sui.indexrelid) + ), + q_top_indexes AS ( + /* biggest */ + select * + from ( + select indexrelid + from q_index_details + where idx_scan > 1 + order by index_size_b desc + limit 200 + ) x + union + /* most block traffic */ + select * + from ( + select indexrelid + from q_index_details + order by coalesce(idx_blks_read, 0) + coalesce(idx_blks_hit, 0) desc + limit 200 + ) y + union + /* most scans */ + select * + from ( + select indexrelid + from q_index_details + order by idx_scan desc nulls last + limit 200 + ) z + union + /* biggest unused non-constraint */ + select * + from ( + select q.indexrelid + from q_index_details q + where idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + order by index_size_b desc + limit 200 + ) z + union + /* all invalid */ + select * + from ( + select q.indexrelid + from q_index_details q + where not indisvalid + ) zz + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text as tag_schema, + indexrelname::text as tag_index_name, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_index_full_name, + relname::text as tag_table_name, + quote_ident(schemaname)||'.'||quote_ident(relname) as tag_table_full_name, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_read, 0) as idx_tup_read, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + coalesce(index_size_b, 0) as index_size_b, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as index_full_name_val, + md5(regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE')) as tag_index_def_hash, + regexp_replace(regexp_replace(pg_get_indexdef(indexrelid),indexrelname,'X'), '^CREATE UNIQUE','CREATE') as index_def, + case when not indisvalid then 1 else 0 end as is_invalid_int, + case when indisprimary then 1 else 0 end as is_pk_int, + case when indisunique or indisexclusion then 1 else 0 end as is_uq_or_exc, + system_identifier::text as tag_sys_id, + last_idx_scan_s + FROM + q_index_details id + JOIN + pg_control_system() ON true + WHERE + indexrelid IN (select indexrelid from q_top_indexes) + ORDER BY + id.schemaname, id.relname, id.indexrelname + instance_up: + description: > + This metric has some special handling attached to it - it will store a 0 value if the database is not accessible. + Thus it can be used to for example calculate some percentual "uptime" indicator. + For standard metrics there will be no data rows stored when the DB is not reachable, but for this one, + there will be a zero stored for the "is_up" column that, under normal operations, would always be 1. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 1::int as is_up + invalid_indexes: + description: > + Retrieves a list of invalid indexes in the PostgreSQL database, providing insights into indexes that are not valid. + It returns the index name, schema, and whether the index is valid or not. This metric helps administrators identify and address issues with invalid indexes. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + format('%I.%I', n.nspname , ci.relname) as tag_index_full_name, + coalesce(pg_relation_size(indexrelid), 0) as index_size_b + from + pg_index i + join pg_class ci on ci.oid = i.indexrelid + join pg_class cr on cr.oid = i.indrelid + join pg_namespace n on n.oid = ci.relnamespace + where not n.nspname like E'pg\\_temp%' + and not indisvalid + and not exists ( /* leave out ones that are being actively rebuilt */ + select * from pg_locks l + join pg_stat_activity a using (pid) + where l.relation = i.indexrelid + and a.state = 'active' + and a.query ~* 'concurrently' + ) + and not exists (select * from pg_locks where relation = indexrelid and mode = 'AccessExclusiveLock') /* can't get size then */ + order by index_size_b desc + limit 100 + kpi: + description: > + Retrieves key performance indicators (KPIs) from the PostgreSQL `pg_stat_database` view, providing insights into the current database's performance. + It returns the number of backends, active and blocked backends, oldest transaction age, transactions per second (TPS), commit and rollback counts, + buffer read and hit counts, temporary bytes, sequence scans on tables larger than 10MB, tuple statistics, stored procedure calls, + block read and write times, deadlocks, recovery status, and postmaster uptime. + This metric helps administrators monitor database activity and performance. + sqls: + 11: | + WITH q_stat_tables AS ( + SELECT * FROM pg_stat_user_tables t + JOIN pg_class c ON c.oid = t.relid + WHERE NOT schemaname LIKE E'pg\\_temp%' + AND c.relpages > (1e7 / 8) -- >10MB + ), + q_stat_activity AS ( + SELECT * FROM pg_stat_activity + WHERE datname = current_database() AND pid != pg_backend_pid() + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case + when pg_is_in_recovery() = false then + pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 + else + pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 + end as wal_location_b, + numbackends - 1 as numbackends, + (select count(*) from q_stat_activity where state in ('active', 'idle in transaction')) AS active_backends, + (select count(*) from q_stat_activity where wait_event_type in ('LWLock', 'Lock', 'BufferPin')) AS blocked_backends, + (select round(extract(epoch from now()) - extract(epoch from (select xact_start from q_stat_activity + where datid = d.datid and not query like 'autovacuum:%' order by xact_start limit 1))))::int AS kpi_oldest_tx_s, + xact_commit + xact_rollback AS tps, + xact_commit, + xact_rollback, + blks_read, + blks_hit, + temp_bytes, + (select sum(seq_scan) from q_stat_tables)::int8 AS seq_scans_on_tbls_gt_10mb, + tup_inserted, + tup_updated, + tup_deleted, + (select sum(calls) from pg_stat_user_functions where not schemaname like any(array[E'pg\\_%', 'information_schema']))::int8 AS sproc_calls, + blk_read_time, + blk_write_time, + deadlocks, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s + FROM + pg_stat_database d + WHERE + datname = current_database() + gauges: + - numbackends + - active_backends + - blocked_backends + - kpi_oldest_tx_s + locks: + description: > + Retrieves lock statistics from the PostgreSQL `pg_locks` view, providing insights into the types and modes of locks currently held in the database. + It returns the lock type, lock mode, and the count of locks for each type and mode. This metric helps administrators monitor lock contention and performance. + sqls: + 11: |- + WITH q_locks AS ( + select + * + from + pg_locks + where + pid != pg_backend_pid() + and database = (select oid from pg_database where datname = current_database()) + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + locktypes AS tag_locktype, + coalesce((select count(*) FROM q_locks WHERE locktype = locktypes), 0) AS count + FROM + unnest('{relation, extend, page, tuple, transactionid, virtualxid, object, userlock, advisory}'::text[]) locktypes + gauges: + - '*' + locks_mode: + description: > + Retrieves lock mode statistics from the PostgreSQL `pg_locks` view, providing insights into the different lock modes currently held in the database. + It returns the lock mode and the count of locks for each mode. This metric helps administrators monitor lock contention and performance. + sqls: + 11: |- + WITH q_locks AS ( + select + * + from + pg_locks + where + pid != pg_backend_pid() + and database = (select oid from pg_database where datname = current_database()) + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + lockmodes AS tag_lockmode, + coalesce((select count(*) FROM q_locks WHERE mode = lockmodes), 0) AS count + FROM + unnest('{AccessShareLock, ExclusiveLock, RowShareLock, RowExclusiveLock, ShareLock, ShareRowExclusiveLock, AccessExclusiveLock, ShareUpdateExclusiveLock}'::text[]) lockmodes + gauges: + - '*' + logical_subscriptions: + description: > + Retrieves information about logical subscriptions in the PostgreSQL database, including their names, enabled status, and the number of relations in each subscription. + It also provides counts of relations in different states (inserted, deleted, synchronized, and replicated). + This metric helps administrators monitor logical replication subscriptions and their statuses. + sqls: + 11: | + with q_sr as ( + select * from pg_subscription_rel + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + subname::text as tag_subname, + subenabled, + (select count(*) from q_sr where srsubid = oid) as relcount, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'i') as state_i, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'd') as state_d, + (select count(*) from q_sr where srsubid = oid and srsubstate = 's') as state_s, + (select count(*) from q_sr where srsubid = oid and srsubstate = 'r') as state_r + from + pg_subscription + where + subdbid = (select oid from pg_database where datname = current_database()) + gauges: + - '*' + pgbouncer_stats: + description: > + Retrieves statistics from the PgBouncer connection pooler. + This metric helps administrators monitor PgBouncer performance and connection pooling efficiency. + sqls: + 0: show stats + pgbouncer_clients: + description: > + Retrieves client connection statistics from the PgBouncer connection pooler, providing insights into the current state of client connections. + It returns the number of active, idle, and total client connections, as well as transaction counts and memory usage statistics. + This metric helps administrators monitor PgBouncer client connections and performance. + sqls: + 0: show clients + pgpool_processes: + description: > + Retrieves process statistics from the PgPool connection pooler, providing insights into the current state of PgPool processes. + It returns the number of active, idle, and total processes, as well as memory usage statistics. + This metric helps administrators monitor PgPool process performance and resource utilization. + sqls: + 3: show pool_processes + pgpool_stats: + description: > + Retrieves statistics from the PgPool connection pooler, providing insights into the current state of PgPool connections and transactions. + It returns the number of active, idle, and total connections, as well as transaction counts and memory usage statistics. + This metric helps administrators monitor PgPool performance and connection pooling efficiency. + sqls: + 3: show pool_nodes + postgres_role: + description: > + This metric determines the PostgreSQL server role (primary, standby, or standalone) by checking + if the server is in recovery mode and if it has any active replication connections. It returns + an integer value: 0 = standalone, 1 = primary with replicas, 2 = standby/replica. + sqls: + 9.0: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case pg_is_in_recovery() + when 't' then 2 + else (select case (select count(*) from pg_stat_replication where application_name != 'pg_basebackup') when '0' then 0 else 1 end) + end as in_recovery_int + gauges: + - in_recovery_int + is_instance_level: true + privilege_changes: + description: > + Retrieves information about privileges granted to roles on various database objects, including tables, functions, schemas, and databases. + It returns the object type, role name, object name, and privilege type for each privilege granted. + This metric helps administrators monitor and manage database access control and privileges. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + * + FROM ( + SELECT + 'table'::text AS object_type, + grantee::text AS tag_role, + quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, + privilege_type + FROM + information_schema.table_privileges + /* includes also VIEW-s actually */ + WHERE + NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + AND NOT table_schema IN ('information_schema', 'pg_catalog') + /* + union all + + select + -- quite a heavy query currently, maybe faster directly via pg_attribute + has_column_privilege? + 'column' AS object_type, + grantee::text AS tag_role, + quote_ident(table_schema) || '.' || quote_ident(table_name) AS tag_object, + privilege_type + FROM + information_schema.column_privileges cp + WHERE + NOT table_schema IN ('pg_catalog', 'information_schema') + AND NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + AND NOT EXISTS ( + SELECT + * + FROM + information_schema.table_privileges + WHERE + table_schema = cp.table_schema + AND table_name = cp.table_name + AND grantee = cp.grantee + AND privilege_type = cp.privilege_type) */ + UNION ALL + SELECT + 'function' AS object_type, + grantee::text AS tag_role, + quote_ident(routine_schema) || '.' || quote_ident(routine_name) AS tag_object, + privilege_type + FROM + information_schema.routine_privileges + WHERE + NOT routine_schema IN ('information_schema', 'pg_catalog') + AND NOT grantee = ANY ( + SELECT + rolname + FROM + pg_roles + WHERE + rolsuper + OR oid < 16384) + UNION ALL + SELECT + 'schema' AS object_type, + r.rolname::text AS tag_role, + quote_ident(n.nspname) AS tag_object, + p.perm AS privilege_type + FROM + pg_catalog.pg_namespace AS n + CROSS JOIN pg_catalog.pg_roles AS r + CROSS JOIN ( + VALUES ('USAGE'), + ('CREATE')) AS p (perm) + WHERE + NOT n.nspname IN ('information_schema', 'pg_catalog') + AND n.nspname NOT LIKE 'pg_%' + AND NOT r.rolsuper + AND r.oid >= 16384 + AND has_schema_privilege(r.oid, n.oid, p.perm) + UNION ALL + SELECT + 'database' AS object_type, + r.rolname::text AS role_name, + quote_ident(datname) AS tag_object, + p.perm AS permission + FROM + pg_catalog.pg_database AS d + CROSS JOIN pg_catalog.pg_roles AS r + CROSS JOIN ( + VALUES ('CREATE'), + ('CONNECT'), + ('TEMPORARY')) AS p (perm) + WHERE + d.datname = current_database() + AND NOT r.rolsuper + AND r.oid >= 16384 + AND has_database_privilege(r.oid, d.oid, p.perm) + UNION ALL + SELECT + 'superusers' AS object_type, + rolname::text AS role_name, + rolname::text AS tag_object, + 'SUPERUSER' AS permission + FROM + pg_catalog.pg_roles + WHERE + rolsuper + UNION ALL + SELECT + 'login_users' AS object_type, + rolname::text AS role_name, + rolname::text AS tag_object, + 'LOGIN' AS permission + FROM + pg_catalog.pg_roles + WHERE + rolcanlogin) y + psutil_cpu: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides CPU utilization and load averages using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + round(cpu_utilization::numeric, 2)::float as cpu_utilization, + round(load_1m_norm::numeric, 2)::float as load_1m_norm, + round(load_1m::numeric, 2)::float as load_1m, + round(load_5m_norm::numeric, 2)::float as load_5m_norm, + round(load_5m::numeric, 2)::float as load_5m, + round("user"::numeric, 2)::float as "user", + round(system::numeric, 2)::float as system, + round(idle::numeric, 2)::float as idle, + round(iowait::numeric, 2)::float as iowait, + round(irqs::numeric, 2)::float as irqs, + round(other::numeric, 2)::float as other + from + get_psutil_cpu() + init_sql: | + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_cpu( + OUT cpu_utilization float8, OUT load_1m_norm float8, OUT load_1m float8, OUT load_5m_norm float8, OUT load_5m float8, + OUT "user" float8, OUT system float8, OUT idle float8, OUT iowait float8, OUT irqs float8, OUT other float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + + from os import getloadavg + from psutil import cpu_times_percent, cpu_percent, cpu_count + from threading import Thread + + class GetCpuPercentThread(Thread): + def __init__(self, interval_seconds): + self.interval_seconds = interval_seconds + self.cpu_utilization_info = None + super(GetCpuPercentThread, self).__init__() + + def run(self): + self.cpu_utilization_info = cpu_percent(self.interval_seconds) + + t = GetCpuPercentThread(0.5) + t.start() + + ct = cpu_times_percent(0.5) + la = getloadavg() + + t.join() + + return t.cpu_utilization_info, la[0] / cpu_count(), la[0], la[1] / cpu_count(), la[1], ct.user, ct.system, ct.idle, ct.iowait, ct.irq + ct.softirq, ct.steal + ct.guest + ct.guest_nice + + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_cpu() TO pgwatch; + COMMENT ON FUNCTION get_psutil_cpu() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + psutil_disk: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides disk usage statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + dir_or_tablespace as tag_dir_or_tablespace, + path as tag_path, + total, used, free, percent + from + get_psutil_disk() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_disk( + OUT dir_or_tablespace text, OUT path text, OUT total float8, OUT used float8, OUT free float8, OUT percent float8 + ) + RETURNS SETOF record + LANGUAGE plpython3u + SECURITY DEFINER + AS $FUNCTION$ + + from os import stat + from os.path import join, exists + from psutil import disk_usage + ret_list = [] + + # data_directory + r = plpy.execute("select current_setting('data_directory') as dd, current_setting('log_directory') as ld, current_setting('server_version_num')::int as pgver") + dd = r[0]['dd'] + ld = r[0]['ld'] + du_dd = disk_usage(dd) + ret_list.append(['data_directory', dd, du_dd.total, du_dd.used, du_dd.free, du_dd.percent]) + + dd_stat = stat(dd) + # log_directory + if ld: + if not ld.startswith('/'): + ld_path = join(dd, ld) + else: + ld_path = ld + if exists(ld_path): + log_stat = stat(ld_path) + if log_stat.st_dev == dd_stat.st_dev: + pass # no new info, same device + else: + du = disk_usage(ld_path) + ret_list.append(['log_directory', ld_path, du.total, du.used, du.free, du.percent]) + + # WAL / XLOG directory + # plpy.notice('pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog', r[0]['pgver']) + joined_path_wal = join(r[0]['dd'], 'pg_wal' if r[0]['pgver'] >= 100000 else 'pg_xlog') + wal_stat = stat(joined_path_wal) + if wal_stat.st_dev == dd_stat.st_dev: + pass # no new info, same device + else: + du = disk_usage(joined_path_wal) + ret_list.append(['pg_wal', joined_path_wal, du.total, du.used, du.free, du.percent]) + + # add user created tablespaces if any + sql_tablespaces = """ + select spcname as name, pg_catalog.pg_tablespace_location(oid) as location + from pg_catalog.pg_tablespace where not spcname like any(array[E'pg\\_%'])""" + for row in plpy.cursor(sql_tablespaces): + du = disk_usage(row['location']) + ret_list.append([row['name'], row['location'], du.total, du.used, du.free, du.percent]) + return ret_list + + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_disk() TO pgwatch; + COMMENT ON FUNCTION get_psutil_disk() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + psutil_disk_io_total: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides total disk I/O statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + read_count, + write_count, + read_bytes, + write_bytes + from + get_psutil_disk_io_total() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_psutil_disk_io_total( + OUT read_count float8, OUT write_count float8, OUT read_bytes float8, OUT write_bytes float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from psutil import disk_io_counters + dc = disk_io_counters(perdisk=False) + if dc: + return dc.read_count, dc.write_count, dc.read_bytes, dc.write_bytes + else: + return None, None, None, None + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_disk_io_total() TO pgwatch; + COMMENT ON FUNCTION get_psutil_disk_io_total() IS 'created for pgwatch'; + is_instance_level: true + psutil_mem: + description: > + This metric requires the "psutil" Python package to be installed on the PostgreSQL server. + It provides memory usage statistics using the "psutil" library. + "psutil" is known to behave differently depending on the used version and operating system, so if getting + errors please adjust to your needs. "psutil" documentation here: https://psutil.readthedocs.io/en/latest/ + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + total, used, free, buff_cache, available, percent, + swap_total, swap_used, swap_free, swap_percent + from + get_psutil_mem() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; /* "plpython3u" might need changing to "plpythonu" (Python 2 everywhere for new OS-es */ + + CREATE OR REPLACE FUNCTION get_psutil_mem( + OUT total float8, OUT used float8, OUT free float8, OUT buff_cache float8, OUT available float8, OUT percent float8, + OUT swap_total float8, OUT swap_used float8, OUT swap_free float8, OUT swap_percent float8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from psutil import virtual_memory, swap_memory + vm = virtual_memory() + sw = swap_memory() + return vm.total, vm.used, vm.free, vm.buffers + vm.cached, vm.available, vm.percent, sw.total, sw.used, sw.free, sw.percent + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_psutil_mem() TO pgwatch; + COMMENT ON FUNCTION get_psutil_mem() IS 'created for pgwatch'; + gauges: + - '*' + is_instance_level: true + reco_add_index: + description: > + Retrieves recommendations for creating indexes based on the `pg_qualstats_index_advisor()` function. + It provides insights into potential index creation opportunities to improve query performance. + This metric helps administrators optimize database performance by suggesting index creation. + sqls: + 11: |- + select /* pgwatch_generated */ + epoch_ns, + tag_reco_topic, + tag_object_name, + recommendation, + case when exists (select * from pg_inherits + where inhrelid = regclass(tag_object_name) + ) then 'Partitioned table, create the index on parent' else extra_info + end as extra_info + FROM ( + SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'create_index'::text as tag_reco_topic, + (regexp_matches(v::text, E'ON (.*?) '))[1] as tag_object_name, + v::text as recommendation, + '' as extra_info + FROM json_array_elements( + pg_qualstats_index_advisor() -> 'indexes') v + ) x + ORDER BY tag_object_name + node_status: primary + is_private: true + reco_default_public_schema: + description: > + Retrieves recommendations for revoking the CREATE privilege on the public schema from PUBLIC. + This metric helps enhance security by ensuring that only authorized users can create new objects in the public schema. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'default_public_schema_privs'::text as tag_reco_topic, + nspname::text as tag_object_name, + 'REVOKE CREATE ON SCHEMA public FROM PUBLIC;'::text as recommendation, + 'only authorized users should be allowed to create new objects'::text as extra_info + from + pg_namespace + where + nspname = 'public' + and nspacl::text ~ E'[,\\{]+=U?C/' + node_status: primary + reco_disabled_triggers: + description: > + Retrieves recommendations for reviewing and potentially dropping disabled triggers in the PostgreSQL database. + It provides insights into triggers that are currently disabled, helping administrators identify and manage unused or unnecessary triggers. + This metric helps maintain database performance and reduce clutter by suggesting the removal of unused triggers. + sqls: + 11: | + /* "temporarily" disabled triggers might be forgotten about... */ + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'disabled_triggers'::text as tag_reco_topic, + quote_ident(nspname)||'.'||quote_ident(relname) as tag_object_name, + 'review usage of trigger and consider dropping it if not needed anymore'::text as recommendation, + ''::text as extra_info + from + pg_trigger t + join + pg_class c on c.oid = t.tgrelid + join + pg_namespace n on n.oid = c.relnamespace + where + tgenabled = 'D' + node_status: primary + reco_drop_index: + description: > + Retrieves recommendations for dropping unused or invalid indexes in the PostgreSQL database. + It provides insights into indexes that have not been scanned and are consuming a significant portion of the database size. + This metric helps administrators optimize database performance by suggesting the removal of unnecessary indexes. + sqls: + 11: | + /* assumes the pg_qualstats extension */ + with q_database_size as ( + select pg_database_size(current_database()) as database_size_b + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'drop_index'::text as tag_reco_topic, + quote_ident(schemaname)||'.'||quote_ident(indexrelname) as tag_object_name, + ('DROP INDEX ' || quote_ident(schemaname)||'.'||quote_ident(indexrelname) || ';')::text as recommendation, + 'Make sure to also check replica pg_stat_user_indexes.idx_scan count if using them for queries'::text as extra_info + from + pg_stat_user_indexes + join + pg_index using (indexrelid) + join + q_database_size on true + where + idx_scan = 0 + and ((pg_relation_size(indexrelid)::numeric / database_size_b) > 0.005 /* 0.5% DB size threshold */ + or indisvalid) + and not indisprimary + and not indisreplident + and not schemaname like '_timescaledb%' + node_status: primary + reco_nested_views: + description: > + Retrieves recommendations for overly nested views in the PostgreSQL database. + It identifies views that depend on other views and have a nesting depth greater than 3. + This metric helps administrators optimize query performance by suggesting the reduction of view nesting. + sqls: + 11: |- + WITH RECURSIVE views AS ( + -- get the directly depending views + SELECT v.oid::regclass AS view, + format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, + 1 AS level + FROM pg_depend AS d + JOIN pg_rewrite AS r + ON r.oid = d.objid + JOIN pg_class AS v + ON v.oid = r.ev_class + JOIN pg_namespace AS n + ON n.oid = v.relnamespace + WHERE v.relkind = 'v' + AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) + AND NOT v.relname LIKE E'pg\\_%' + AND d.classid = 'pg_rewrite'::regclass + AND d.refclassid = 'pg_class'::regclass + AND d.deptype = 'n' + UNION ALL + -- add the views that depend on these + SELECT v.oid::regclass, + format('%s.%s', quote_ident(n.nspname), quote_ident(v.relname)) as full_name, + views.level + 1 + FROM views + JOIN pg_depend AS d + ON d.refobjid = views.view + JOIN pg_rewrite AS r + ON r.oid = d.objid + JOIN pg_class AS v + ON v.oid = r.ev_class + JOIN pg_namespace AS n + ON n.oid = v.relnamespace + WHERE v.relkind = 'v' + AND NOT n.nspname = ANY(array['information_schema', E'pg\\_%']) + AND d.classid = 'pg_rewrite'::regclass + AND d.refclassid = 'pg_class'::regclass + AND d.deptype = 'n' + AND v.oid <> views.view -- avoid loop + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'overly_nested_views'::text AS tag_reco_topic, + full_name::text as tag_object_name, + 'overly nested views can affect performance'::text recommendation, + 'nesting_depth: ' || coalesce(max(level)::text, '-') AS extra_info + FROM views + GROUP BY 1, 2, 3 + HAVING max(level) > 3 + ORDER BY max(level) DESC, full_name::text + node_status: primary + reco_partial_index_candidates: + description: > + Retrieves recommendations for creating partial indexes on columns with a high fraction of NULL values. + It identifies single-column indexes that could potentially be declared as partial indexes, leaving out NULL values. + This metric helps optimize index usage and improve query performance by suggesting the creation of partial indexes. + sqls: + 11: | + select distinct /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'partial_index_candidates'::text as tag_reco_topic, + quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) as tag_object_name, + ('index ' || quote_ident(ni.nspname)||'.'||quote_ident(ci.relname) || ' on ' || quote_ident(s.schemaname) || '.' || quote_ident(s.tablename) || ' column ' || quote_ident(s.attname) || ' could possibly be declared partial leaving out NULL-s')::text as recommendation, + 'NULL fraction: ' || round((null_frac * 100)::numeric, 1) || '%, rowcount estimate: ' || (c.reltuples)::int8 || ', current definition: ' || pg_get_indexdef(i.indexrelid) as extra_info + from + pg_stats s + join pg_attribute a using (attname) + join pg_index i on i.indkey[0] = a.attnum and i.indrelid = a.attrelid + join pg_class c on c.oid = i.indrelid + join pg_class ci on ci.oid = i.indexrelid + join pg_namespace ni on ni.oid = ci.relnamespace + where + not indisprimary + and not indisunique + and indisready + and indisvalid + and i.indnatts = 1 /* simple 1 column indexes */ + and null_frac > 0.5 /* 50% empty */ + and not pg_get_indexdef(i.indexrelid) like '% WHERE %' + and c.reltuples >= 1e5 /* ignore smaller tables */ + and not exists ( /* leave out sub-partitions */ + select * from pg_inherits where inhrelid = c.oid + ) + reco_sprocs_wo_search_path: + description: > + Retrieves recommendations for stored procedures that do not have a fixed `search_path` set. + It identifies stored procedures that could potentially be abused by malicious users if used objects are not fully qualified. + This metric helps enhance security by suggesting the setting of a fixed search_path for stored procedures. + sqls: + 11: |- + with q_sprocs as ( + select /* pgwatch_generated */ + format('%s.%s', quote_ident(nspname), quote_ident(proname)) as sproc_name, + 'alter function ' || proname || '(' || pg_get_function_arguments(p.oid) || ') set search_path = X;' as fix_sql + from + pg_proc p + join pg_namespace n on n.oid = p.pronamespace + where prosecdef and not 'search_path' = ANY(coalesce(proconfig, '{}'::text[])) + and not pg_catalog.obj_description(p.oid, 'pg_proc') ~ 'pgwatch' + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'sprocs_wo_search_path'::text as tag_reco_topic, + sproc_name::text as tag_object_name, + fix_sql::text as recommendation, + 'functions without fixed search_path can be potentially abused by malicious users if used objects are not fully qualified'::text as extra_info + from + q_sprocs + order by + tag_object_name, extra_info + node_status: primary + reco_superusers: + description: > + Retrieves recommendations for reviewing the number of superusers in the PostgreSQL database. + It identifies if there are too many superusers, which can pose a security risk. + This metric helps maintain database security by suggesting a review of superuser accounts. + sqls: + 11: | + with q_su as ( + select count(*) from pg_roles where rolcanlogin and rolsuper + ), + q_total as ( + select count(*) from pg_roles where rolcanlogin + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + 'superuser_count'::text as tag_reco_topic, + '-'::text as tag_object_name, + 'too many superusers detected - review recommended'::text as recommendation, + format('%s active superusers, %s total active users', q_su.count, q_total.count) as extra_info + from + q_su, q_total + where + q_su.count >= 10 + node_status: primary + recommendations: + description: > + When enabled, this metric will find all other metrics starting with `reco_*` and execute those queries. + The metric targets performance, security, and other "best practices" violations. + Users can add new `reco_*` queries freely. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_qualstats; + sqls: + 11: /* dummy placeholder - special handling in code to collect other metrics named reco_* */ + replication: + description: > + This metric collects replication statistics from the `pg_stat_replication` view. + It provides insights into the status of replication connections, including lag times and states. + This metric is useful for monitoring replication health and performance. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + application_name as tag_application_name, + usename AS tag_usename, + concat(coalesce(client_addr::text, client_hostname), '_', client_port::text) as tag_client_info, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, sent_lsn)::int8, 0) as sent_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, write_lsn)::int8, 0) as write_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, flush_lsn)::int8, 0) as flush_lag_b, + coalesce(pg_wal_lsn_diff(case when pg_is_in_recovery() then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end, replay_lsn)::int8, 0) as replay_lag_b, + (extract(epoch from write_lag) * 1000)::int8 as write_lag_ms, + (extract(epoch from flush_lag) * 1000)::int8 as flush_lag_ms, + (extract(epoch from replay_lag) * 1000)::int8 as replay_lag_ms, + state, + sync_state, + case when sync_state in ('sync', 'quorum') then 1 else 0 end as is_sync_int, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int + from + pg_stat_replication + where + coalesce(application_name, '') not in ('pg_basebackup', 'pg_rewind'); + gauges: + - '*' + is_instance_level: true + replication_slot_stats: + description: > + This metric collects statistics from the `pg_stat_replication_slots` view. + It provides insights into the status of replication slots, including transaction counts and byte usage. + This metric is useful for monitoring replication slot health and performance. + sqls: + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + slot_name::text as tag_slot_name, + spill_txns, + spill_count, + spill_bytes, + stream_txns, + stream_count, + stream_bytes, + total_txns, + total_bytes + from + pg_stat_replication_slots + replication_slots: + description: > + This metric collects information about replication slots from the `pg_replication_slots` view. + It provides insights into the status of replication slots, including their activity and lag times. + This metric is useful for monitoring replication slot health and performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + slot_name::text as tag_slot_name, + coalesce(plugin, 'physical')::text as tag_plugin, + active, + case when active then 0 else 1 end as non_active_int, + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::int8 as restart_lsn_lag_b, + greatest(age(xmin), age(catalog_xmin))::int8 as xmin_age_tx + from + pg_replication_slots + node_status: primary + gauges: + - '*' + is_instance_level: true + sequence_health: + description: > + This metric collects health statistics for sequences in the PostgreSQL database. + It provides insights into the usage and status of sequences, including maximum usage percentages and counts of sequences that are heavily used. + This metric is useful for monitoring sequence health and performance. + sqls: + 11: |- + with q_seq_data as ( + select * from pg_sequences + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select round(100.0 * coalesce(max(last_value::numeric / max_value), 0), 2)::float from q_seq_data where not cycle) as max_used_pct, + (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.5) as p50_used_seq_count, + (select count(*) from q_seq_data where not cycle and last_value::numeric / max_value > 0.75) as p75_used_seq_count + server_log_event_counts: + description: > + This metric enables the Postgres server log "tailing" for errors. It can't be used for remote setups, though, + unless the DB logs are somehow mounted or copied over, as real file access is needed! + sqls: + 11: |- + /* + Dummy placeholder - special handling in gatherer code for log parsing + */ + settings: + description: > + This metric collects various PostgreSQL server settings and configurations. + It provides insights into the server's configuration, including version, memory settings, and other important parameters. + This metric is useful for monitoring server settings and ensuring optimal performance. + sqls: + 11: | + with qs as ( + select name, setting from pg_settings + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_setting('server_version') as server_version, + current_setting('server_version_num')::int8 as server_version_num, + (regexp_matches(regexp_replace(current_setting('server_version'), '(beta|devel).*', '', 'g'), E'\\d+'))[1]::float8 as major_version, + current_setting('block_size')::int as block_size, + current_setting('max_connections')::int as max_connections, + current_setting('hot_standby') as hot_standby, + (select setting from qs where name = 'hot_standby_feedback') as hot_standby_feedback, + current_setting('fsync') as fsync, + current_setting('full_page_writes') as full_page_writes, + current_setting('synchronous_commit') as synchronous_commit, + (select setting from qs where name = 'wal_compression') as wal_compression, + (select setting from qs where name = 'wal_log_hints') as wal_log_hints, + (select setting from qs where name = 'synchronous_standby_names') as synchronous_standby_names, + current_setting('shared_buffers') as shared_buffers, + current_setting('work_mem') as work_mem, + current_setting('maintenance_work_mem') as maintenance_work_mem, + current_setting('effective_cache_size') as effective_cache_size, + (select setting::int8 from qs where name = 'default_statistics_target') as default_statistics_target, + (select setting::float8 from qs where name = 'random_page_cost') as random_page_cost, + pg_size_pretty(((select setting::int8 from qs where name = 'min_wal_size') * 1024^2)::int8) as min_wal_size, + pg_size_pretty(((select setting::int8 from qs where name = 'max_wal_size') * 1024^2)::int8) as max_wal_size, + (select setting from qs where name = 'checkpoint_segments') as checkpoint_segments, + current_setting('checkpoint_timeout') as checkpoint_timeout, + current_setting('checkpoint_completion_target') as checkpoint_completion_target, + (select setting::int8 from qs where name = 'max_worker_processes') as max_worker_processes, + (select setting::int8 from qs where name = 'max_parallel_workers') as max_parallel_workers, + (select setting::int8 from qs where name = 'max_parallel_workers_per_gather') as max_parallel_workers_per_gather, + (select case when setting = 'on' then 1 else 0 end from qs where name = 'jit') as jit, + (select case when setting = 'on' then 1 else 0 end from qs where name = 'ssl') as ssl, + current_setting('statement_timeout') as statement_timeout, + current_setting('deadlock_timeout') as deadlock_timeout, + (select setting from qs where name = 'data_checksums') as data_checksums, + (select setting::int8 from qs where name = 'max_connections') as max_connections, + (select setting::int8 from qs where name = 'max_wal_senders') as max_wal_senders, + (select setting::int8 from qs where name = 'max_replication_slots') as max_replication_slots, + (select setting::int8 from qs where name = 'max_prepared_transactions') as max_prepared_transactions, + (select setting::int8 from qs where name = 'lock_timeout') || ' (ms)' as lock_timeout, + (select setting from qs where name = 'archive_mode') as archive_mode, + (select setting from qs where name = 'archive_command') as archive_command, + current_setting('archive_timeout') as archive_timeout, + (select setting from qs where name = 'shared_preload_libraries') as shared_preload_libraries, + (select setting from qs where name = 'listen_addresses') as listen_addresses, + (select setting from qs where name = 'ssl') as ssl, + (select setting from qs where name = 'autovacuum') as autovacuum, + (select setting::int8 from qs where name = 'autovacuum_max_workers') as autovacuum_max_workers, + (select setting::float8 from qs where name = 'autovacuum_vacuum_scale_factor') as autovacuum_vacuum_scale_factor, + (select setting::float8 from qs where name = 'autovacuum_vacuum_threshold') as autovacuum_vacuum_threshold, + (select setting::float8 from qs where name = 'autovacuum_analyze_scale_factor') as autovacuum_analyze_scale_factor, + (select setting::float8 from qs where name = 'autovacuum_analyze_threshold') as autovacuum_analyze_scale_factor + show_plans_realtime: + description: > + This metric collects real-time query plans from the `pg_show_plans` extension. + It provides insights into the execution plans of currently running queries, helping to identify performance issues and optimize query execution. + This metric is useful for monitoring query performance and understanding how queries are executed in real-time. + sqls: + 11: | + /* assumes pg_show_plans extension */ + select /* pgwatch_generated */ + max((extract(epoch from now()) * 1e9)::int8) as epoch_ns, + max(extract(epoch from now() - query_start))::int as max_s, + avg(extract(epoch from now() - query_start))::int as avg_s, + count(*), + array_to_string(array_agg(distinct usename order by usename), ',') as "users", + max(md5(plan)) as tag_hash, /* needed for influx */ + plan, + max(query) as query + from + pg_show_plans p + join + pg_stat_activity a + using (pid) + where + p.pid != pg_backend_pid() + and datname = current_database() + and now() - query_start > '1s'::interval + and backend_type = 'client backend' + group by + plan + order by + max_s desc + limit + 10 + smart_health_per_disk: + description: > + This metric collects SMART health status for all disk devices using the `smartmontools` utility. + It provides insights into the health of disk devices, including their SMART status and return codes. + This metric is useful for monitoring disk health and identifying potential issues with disk devices. + This helper is always meant to be tested and adjusted to make sure all disk are detected. + Most likely smartctl privileges must be escalated to give postgres access: `sudo chmod u+s /usr/local/sbin/smartctl` + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + device as tag_device, + retcode + from + get_smart_health_per_device() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_smart_health_per_device(OUT device text, OUT retcode int) RETURNS SETOF record AS + $$ + import subprocess + ret_list = [] + + #disk_detect_cmd='smartctl --scan | cut -d " " -f3 | grep mega' # for Lenovo ServerRAID M1210 + disk_detect_cmd='lsblk -io KNAME,TYPE | grep '' disk'' | cut -d " " -f1 | sort' + p = subprocess.run(disk_detect_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + if p.returncode != 0: + return ret_list + disks = p.stdout.splitlines() + + for disk in disks: + # health_cmd = 'smartctl -d $disk -a -q silent /dev/sda' % disk # for Lenovo ServerRAID M1210 members + health_cmd = 'smartctl -a -q silent /dev/%s' % disk + p = subprocess.run(health_cmd, stdout=subprocess.PIPE, encoding='utf-8', shell=True) + ret_list.append((disk, p.returncode)) + + return ret_list + + $$ LANGUAGE plpython3u VOLATILE; + + GRANT EXECUTE ON FUNCTION get_smart_health_per_device() TO pgwatch; + + COMMENT ON FUNCTION get_smart_health_per_device() is 'created for pgwatch'; + sproc_hashes: + description: > + This metric collects hashes of all stored procedures in the database. + It provides a way to track changes in stored procedures over time by comparing their hashes. + This metric is useful for monitoring stored procedure integrity and detecting changes. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + p.oid::text as tag_oid, + quote_ident(nspname)||'.'||quote_ident(proname) as tag_sproc, + md5(prosrc) + from + pg_proc p + join + pg_namespace n on n.oid = pronamespace + where + not nspname like any(array[E'pg\\_%', 'information_schema']) + sproc_stats: + description: > + This metric collects statistics about user-defined functions (stored procedures) in the database. + It provides insights into function usage, including call counts and execution times. + This metric is useful for monitoring function performance and identifying potential bottlenecks. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + schemaname::text AS tag_schema, + funcname::text AS tag_function_name, + quote_ident(schemaname)||'.'||quote_ident(funcname) as tag_function_full_name, + p.oid::text as tag_oid, -- for overloaded funcs + calls as sp_calls, + self_time, + total_time + FROM + pg_stat_user_functions f + JOIN + pg_proc p ON p.oid = f.funcid + ORDER BY + total_time DESC + LIMIT + 300 + stat_activity: + description: > + This metric collects statistics about currently active queries in the database. + It provides insights into the state of active queries, including their duration and blocking status. + This metric is useful for monitoring query performance and identifying long-running or blocked queries. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + s.query as query, + count(*) as count + from pg_stat_activity s + where s.datname = current_database() + and s.state = 'active' + and s.backend_type = 'client backend' + and s.pid != pg_backend_pid() + and now() - s.query_start > '100ms'::interval + group by s.query + stat_activity_realtime: + description: > + This metric collects real-time statistics about currently active queries in the database. + It provides insights into the state of active queries, including their duration and blocking status. + This metric is useful for monitoring query performance and identifying long-running or blocked queries in real-time. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pid as tag_pid, + usename::text AS user, + application_name AS appname, + coalesce(client_addr::text, 'local') AS ip, + extract(epoch FROM (now() - query_start))::int AS duration_s, + (coalesce(wait_event_type, '') IN ('LWLockNamed', 'Lock', 'BufferPin'))::int AS waiting, + array_to_string(pg_blocking_pids(pid), ',') as blocking_pids, + ltrim(regexp_replace(query, E'[ \\t\\n\\r]+' , ' ', 'g'))::varchar(300) AS query + FROM + pg_stat_activity + WHERE + state != 'idle' + AND backend_type IN ('client backend', 'autovacuum worker') + AND pid != pg_backend_pid() + AND datname = current_database() + AND now() - query_start > '500ms'::interval + ORDER BY + now() - query_start DESC + LIMIT 25 + stat_io: + description: > + This metric collects I/O statistics from the `pg_stat_io` view. + It provides insights into read and write operations, including the number of reads, writes, and their associated times. + This metric is useful for monitoring I/O performance and identifying potential bottlenecks in disk operations. + sqls: + 16: |- + SELECT /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(backend_type, 'total') as tag_backend_type, + sum(coalesce(reads, 0))::int8 as reads, + (sum(coalesce(reads, 0) * op_bytes) / 1e6)::int8 as read_bytes_mb, + sum(coalesce(read_time, 0))::int8 as read_time_ms, + sum(coalesce(writes, 0))::int8 as writes, + (sum(coalesce(writes, 0) * op_bytes) / 1e6)::int8 as write_bytes_mb, + sum(coalesce(write_time, 0))::int8 as write_time_ms, + sum(coalesce(writebacks, 0))::int8 as writebacks, + (sum(coalesce(writebacks, 0) * op_bytes) / 1e6)::int8 as writeback_bytes_mb, + sum(coalesce(writeback_time, 0))::int8 as writeback_time_ms, + sum(coalesce(fsyncs, 0))::int8 fsyncs, + sum(coalesce(fsync_time, 0))::int8 fsync_time_ms, + max(extract(epoch from now() - stats_reset)::int) as stats_reset_s + FROM + pg_stat_io + GROUP BY + ROLLUP (backend_type) + is_instance_level: true + stat_ssl: + description: > + This metric collects SSL connection statistics from the `pg_stat_ssl` view. + It provides insights into the number of SSL connections, including those that are encrypted and those that are not. + This metric is useful for monitoring SSL usage and ensuring secure connections in the PostgreSQL database. + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as total, + count(*) FILTER (WHERE ssl) as "on", + count(*) FILTER (WHERE NOT ssl) as "off" + FROM + pg_stat_ssl AS s, + pg_stat_activity AS a + WHERE + a.pid = s.pid + AND a.datname = current_database() + AND a.pid <> pg_backend_pid() + AND NOT (a.client_addr = '127.0.0.1' OR client_port = -1) + gauges: + - '*' + stat_statements: + description: > + This metric collects statistics from the `pg_stat_statements` extension. + It provides insights into query performance, including execution times, block reads/writes, and user information. + This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: |- + WITH q_data AS ( + SELECT + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 13: |- + WITH q_data AS ( + SELECT + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + select /* pgwatch_generated */ + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + select /* pgwatch_generated */ + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 15: |- + WITH /* pgwatch_generated */ q_data AS ( + SELECT + queryid::text AS tag_queryid, + /* + if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision AS blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.temp_blk_read_time, + b.temp_blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b + 17: |- + WITH /* pgwatch_generated */ q_data AS ( + SELECT + queryid::text AS tag_queryid, + /* + NB! if security conscious about exposing query texts replace the below expression with a dash ('-') OR + use the stat_statements_no_query_text metric instead, created specifically for this use case. + */ + array_to_string(array_agg(DISTINCT quote_ident(pg_get_userbyid(userid))), ',') AS users, + sum(s.calls)::int8 AS calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision AS total_time, + sum(shared_blks_hit)::int8 AS shared_blks_hit, + sum(shared_blks_read)::int8 AS shared_blks_read, + sum(shared_blks_written)::int8 AS shared_blks_written, + sum(shared_blks_dirtied)::int8 AS shared_blks_dirtied, + sum(temp_blks_read)::int8 AS temp_blks_read, + sum(temp_blks_written)::int8 AS temp_blks_written, + round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, + round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision AS temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision AS temp_blk_write_time, + sum(wal_fpi)::int8 AS wal_fpi, + sum(wal_bytes)::int8 AS wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision AS total_plan_time, + max(query::varchar(8000)) AS query + FROM + pg_stat_statements s + WHERE + calls > 5 + AND total_exec_time > 5 + AND dbid = ( + SELECT + oid + FROM + pg_database + WHERE + datname = current_database()) + AND NOT upper(s.query::varchar(50)) + LIKE ANY (ARRAY['DEALLOCATE%', + 'SET %', + 'RESET %', + 'BEGIN%', + 'BEGIN;', + 'COMMIT%', + 'END%', + 'ROLLBACK%', + 'SHOW%']) + GROUP BY + queryid + ) + SELECT + (EXTRACT(epoch FROM now()) * 1e9)::int8 AS epoch_ns, + b.tag_queryid, + b.users, + b.calls, + b.total_time, + b.shared_blks_hit, + b.shared_blks_read, + b.shared_blks_written, + b.shared_blks_dirtied, + b.temp_blks_read, + b.temp_blks_written, + b.blk_read_time, + b.blk_write_time, + b.temp_blk_read_time, + b.temp_blk_write_time, + b.wal_fpi, + b.wal_bytes, + b.total_plan_time, + ltrim(regexp_replace(b.query, E'[ \\t\\n\\r]+', ' ', 'g')) AS tag_query + FROM ( + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + total_time > 0 + ORDER BY + total_time DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + ORDER BY + calls DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_read > 0 + ORDER BY + shared_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + shared_blks_written > 0 + ORDER BY + shared_blks_written DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_read > 0 + ORDER BY + temp_blks_read DESC + LIMIT 100) a + UNION + SELECT + * + FROM ( + SELECT + * + FROM + q_data + WHERE + temp_blks_written > 0 + ORDER BY + temp_blks_written DESC + LIMIT 100) a) b; + stat_statements_calls: + description: > + This metric collects statistics from the `pg_stat_statements` extension, focusing on the number of calls and total execution time. + It provides insights into query performance, including execution times and call counts. + This metric is useful for monitoring query performance and identifying slow or resource-intensive queries. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(sum(calls), 0)::int8 as calls, + coalesce(round(sum(total_time)::numeric, 3), 0)::float8 as total_time + from + pg_stat_statements + where + dbid = (select oid from pg_database where datname = current_database()) + 13: | + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + coalesce(sum(calls), 0)::int8 as calls, + coalesce(round(sum(total_exec_time)::numeric, 3), 0)::float8 as total_time, + round(sum(total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements + where + dbid = (select oid from pg_database where datname = current_database()) + stat_statements_no_query_text: + description: > + This metric collects statistics from the `pg_stat_statements` extension without including the query text. + It provides insights into query performance, including execution times, block reads/writes, and user information, + while omitting the actual query text for security or privacy reasons. + This metric is useful for monitoring query performance without exposing sensitive query details. + init_sql: CREATE EXTENSION IF NOT EXISTS pg_stat_statements; + sqls: + 11: |- + with q_data as ( + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-'::text as tag_query, + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time + from + pg_stat_statements s + where + calls > 5 + and total_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 13: |- + with q_data as ( + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + coalesce(queryid::text, 'insufficient-privileges-total') as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 15: |- + with /* pgwatch_generated */ q_data as ( + select + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + queryid::text as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round(sum(blk_read_time)::numeric, 3)::double precision as blk_read_time, + round(sum(blk_write_time)::numeric, 3)::double precision as blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, + sum(wal_fpi) as wal_fpi, + sum(wal_bytes) as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a + 17: |- + with /* pgwatch_generated */ q_data as ( + select + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + '-' as tag_query, + queryid::text as tag_queryid, + array_to_string(array_agg(distinct quote_ident(pg_get_userbyid(userid))), ',') as users, + sum(s.calls)::int8 as calls, + round(sum(s.total_exec_time)::numeric, 3)::double precision as total_time, + sum(shared_blks_hit)::int8 as shared_blks_hit, + sum(shared_blks_read)::int8 as shared_blks_read, + sum(shared_blks_written)::int8 as shared_blks_written, + sum(shared_blks_dirtied)::int8 as shared_blks_dirtied, + sum(temp_blks_read)::int8 as temp_blks_read, + sum(temp_blks_written)::int8 as temp_blks_written, + round((sum(shared_blk_read_time) + sum(local_blk_read_time))::numeric, 3)::double precision AS blk_read_time, + round((sum(shared_blk_write_time) + sum(local_blk_write_time))::numeric, 3)::double precision AS blk_write_time, + round(sum(temp_blk_read_time)::numeric, 3)::double precision as temp_blk_read_time, + round(sum(temp_blk_write_time)::numeric, 3)::double precision as temp_blk_write_time, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + round(sum(s.total_plan_time)::numeric, 3)::double precision as total_plan_time + from + pg_stat_statements s + where + calls > 5 + and total_exec_time > 0 + and dbid = (select oid from pg_database where datname = current_database()) + and not upper(s.query) like any (array['DEALLOCATE%', 'SET %', 'RESET %', 'BEGIN%', 'BEGIN;', + 'COMMIT%', 'END%', 'ROLLBACK%', 'SHOW%']) + group by + queryid + ) + select * from ( + select + * + from + q_data + where + total_time > 0 + order by + total_time desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + order by + calls desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_read > 0 + order by + shared_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + shared_blks_written > 0 + order by + shared_blks_written desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_read > 0 + order by + temp_blks_read desc + limit 100 + ) a + union + select * from ( + select + * + from + q_data + where + temp_blks_written > 0 + order by + temp_blks_written desc + limit 100 + ) a; + metric_storage_name: stat_statements + subscription_stats: + description: > + This metric collects statistics from the `pg_stat_subscription_stats` view, which provides information about the status of logical replication subscriptions. + It includes details such as the number of apply and sync errors, which can help in monitoring the health of logical replication. + sqls: + 15: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + subname::text as tag_subname, + apply_error_count, + sync_error_count + from + pg_stat_subscription_stats + table_bloat_approx_stattuple: + description: > + This metric collects approximate table bloat statistics using the `pgstattuple_approx` function. + It provides insights into the amount of free space and dead tuples in tables, which can help in identifying bloat issues. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(n.nspname)||'.'||quote_ident(c.relname) as tag_full_table_name, + approx_free_percent, + approx_free_space as approx_free_space_b, + approx_tuple_count, + dead_tuple_percent, + dead_tuple_len as dead_tuple_len_b + from + pg_class c + join lateral pgstattuple_approx(c.oid) st on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables, + join pg_namespace n on n.oid = c.relnamespace + where + relkind in ('r', 'm') + and c.relpages >= 128 -- tables > 1mb + and not n.nspname like any (array[E'pg\\_%', 'information_schema']) + node_status: primary + gauges: + - '*' + table_bloat_approx_summary: + description: > + This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. + It aggregates data from multiple tables to give an overview of bloat across the database. + sqls: + 11: |- + /* accessing pgstattuple_approx directly requires superuser or pg_stat_scan_tables/pg_monitor builtin roles or + execute grant on pgstattuple_approx(regclass) + */ + with table_bloat_approx as ( + select + avg(approx_free_percent)::double precision as approx_free_percent, + sum(approx_free_space)::double precision as approx_free_space, + avg(dead_tuple_percent)::double precision as dead_tuple_percent, + sum(dead_tuple_len)::double precision as dead_tuple_len + from + pg_class c + join + pg_namespace n on n.oid = c.relnamespace + join lateral pgstattuple_approx(c.oid) on (c.oid not in (select relation from pg_locks where mode = 'AccessExclusiveLock')) -- skip locked tables + where + relkind in ('r', 'm') + and c.relpages >= 128 -- tables >1mb + and not n.nspname != 'information_schema' + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + approx_free_percent, + approx_free_space as approx_free_space_b, + dead_tuple_percent, + dead_tuple_len as dead_tuple_len_b + from + table_bloat_approx + where + approx_free_space > 0 + gauges: + - '*' + table_bloat_approx_summary_sql: + description: > + This metric provides a summary of approximate table bloat statistics, including the total bloat size and percentage for the current database. + It aggregates data from multiple tables to give an overview of bloat across the database. + sqls: + 11: | + WITH q_bloat AS ( + SELECT + quote_ident(schemaname)||'.'||quote_ident(tblname) as full_table_name, + bloat_ratio as approx_bloat_percent, + bloat_size as approx_bloat_bytes, + fillfactor + FROM ( + + /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. + * This query is compatible with PostgreSQL 9.0 and more + */ + SELECT current_database(), + schemaname, + tblname, + bs * tblpages AS real_size, + (tblpages - est_tblpages) * bs AS extra_size, + CASE + WHEN tblpages - est_tblpages > 0 + THEN 100 * (tblpages - est_tblpages) / tblpages::float + ELSE 0 + END AS extra_ratio, + fillfactor, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages - est_tblpages_ff) * bs + ELSE 0 + END AS bloat_size, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float + ELSE 0 + END AS bloat_ratio, + is_na + -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag + FROM ( + SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, + ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + + ceil(toasttuples / 4) AS est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + -- , stattuple.pgstattuple(tblid) AS pst + FROM ( + SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END + - CASE + WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma + ELSE ceil(tpl_data_size)::int % ma END + ) AS tpl_size, + bs - page_hdr AS size_per_block, + (heappages + toastpages) AS tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + FROM ( + SELECT tbl.oid AS tblid, + ns.nspname AS schemaname, + tbl.relname AS tblname, + tbl.reltuples, + tbl.relpages AS heappages, + coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, + 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' + THEN 8 + ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE + WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 + ELSE 0::int END + + + CASE WHEN tbl.relhasoids THEN 4 ELSE 0 END AS tpl_hdr_size, + sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR count(att.attname) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname + AND s.tablename = tbl.relname AND s.inherited = false AND + s.attname = att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE att.attnum > 0 + AND NOT att.attisdropped + AND tbl.relkind IN ('r', 'm') + AND ns.nspname != 'information_schema' + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, tbl.relhasoids + ORDER BY 2, 3 + ) AS s + ) AS s2 + ) AS s3 + -- WHERE NOT is_na + ) s4 + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, + ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage + 12: | + WITH q_bloat AS ( + SELECT quote_ident(schemaname) || '.' || quote_ident(tblname) as full_table_name, + bloat_ratio as approx_bloat_percent, + bloat_size as approx_bloat_bytes, + fillfactor + FROM ( + + /* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read. + * This query is compatible with PostgreSQL 9.0 and more + */ + SELECT current_database(), + schemaname, + tblname, + bs * tblpages AS real_size, + (tblpages - est_tblpages) * bs AS extra_size, + CASE + WHEN tblpages > 0 AND tblpages - est_tblpages > 0 + THEN 100 * (tblpages - est_tblpages) / tblpages::float + ELSE 0 + END AS extra_ratio, + fillfactor, + CASE + WHEN tblpages - est_tblpages_ff > 0 + THEN (tblpages - est_tblpages_ff) * bs + ELSE 0 + END AS bloat_size, + CASE + WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0 + THEN 100 * (tblpages - est_tblpages_ff) / tblpages::float + ELSE 0 + END AS bloat_ratio, + is_na + -- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag + FROM ( + SELECT ceil(reltuples / ((bs - page_hdr) / tpl_size)) + ceil(toasttuples / 4) AS est_tblpages, + ceil(reltuples / ((bs - page_hdr) * fillfactor / (tpl_size * 100))) + + ceil(toasttuples / 4) AS est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + -- , stattuple.pgstattuple(tblid) AS pst + FROM ( + SELECT (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - CASE WHEN tpl_hdr_size % ma = 0 THEN ma ELSE tpl_hdr_size % ma END + - CASE + WHEN ceil(tpl_data_size)::int % ma = 0 THEN ma + ELSE ceil(tpl_data_size)::int % ma END + ) AS tpl_size, + bs - page_hdr AS size_per_block, + (heappages + toastpages) AS tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + FROM ( + SELECT tbl.oid AS tblid, + ns.nspname AS schemaname, + tbl.relname AS tblname, + tbl.reltuples, + tbl.relpages AS heappages, + coalesce(toast.relpages, 0) AS toastpages, + coalesce(toast.reltuples, 0) AS toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + FROM 'fillfactor=([0-9]+)')::smallint, + 100) AS fillfactor, + current_setting('block_size')::numeric AS bs, + CASE + WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' + THEN 8 + ELSE 4 END AS ma, + 24 AS page_hdr, + 23 + CASE + WHEN MAX(coalesce(null_frac, 0)) > 0 THEN (7 + count(*)) / 8 + ELSE 0::int END + + + 0 AS tpl_hdr_size, + sum((1 - coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + OR + count(att.attname) <> count(s.attname) AS is_na + FROM pg_attribute AS att + JOIN pg_class AS tbl ON att.attrelid = tbl.oid + JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace + LEFT JOIN pg_stats AS s ON s.schemaname = ns.nspname + AND s.tablename = tbl.relname AND s.inherited = false AND + s.attname = att.attname + LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid + WHERE att.attnum > 0 + AND NOT att.attisdropped + AND tbl.relkind IN ('r', 'm') + AND ns.nspname != 'information_schema' + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + ORDER BY 2, 3 + ) AS s + ) AS s2 + ) AS s3 + -- WHERE NOT is_na + ) s4 + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + (select sum(approx_bloat_bytes) from q_bloat) as approx_table_bloat_b, + ((select sum(approx_bloat_bytes) from q_bloat) * 100 / pg_database_size(current_database()))::int8 as approx_bloat_percentage + gauges: + - '*' + table_hashes: + description: > + This metric collects hashes of table definitions to detect changes in the schema. + It uses the `pg_catalog.pg_tables` view to gather information about tables and their columns. + The hash is computed based on the table schema, name, and column definitions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + quote_ident(table_schema)||'.'||quote_ident(table_name) as tag_table, + md5((array_agg((c.*)::text order by ordinal_position))::text) + from ( + SELECT current_database()::information_schema.sql_identifier AS table_catalog, + nc.nspname::information_schema.sql_identifier AS table_schema, + c.relname::information_schema.sql_identifier AS table_name, + a.attname::information_schema.sql_identifier AS column_name, + a.attnum::information_schema.cardinal_number AS ordinal_position, + pg_get_expr(ad.adbin, ad.adrelid)::information_schema.character_data AS column_default, + CASE + WHEN a.attnotnull OR t.typtype = 'd'::"char" AND t.typnotnull THEN 'NO'::text + ELSE 'YES'::text + END::information_schema.yes_or_no AS is_nullable, + CASE + WHEN t.typtype = 'd'::"char" THEN + CASE + WHEN bt.typelem <> 0::oid AND bt.typlen = '-1'::integer THEN 'ARRAY'::text + WHEN nbt.nspname = 'pg_catalog'::name THEN format_type(t.typbasetype, NULL::integer) + ELSE 'USER-DEFINED'::text + END + ELSE + CASE + WHEN t.typelem <> 0::oid AND t.typlen = '-1'::integer THEN 'ARRAY'::text + WHEN nt.nspname = 'pg_catalog'::name THEN format_type(a.atttypid, NULL::integer) + ELSE 'USER-DEFINED'::text + END + END::information_schema.character_data AS data_type, + information_schema._pg_char_max_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_maximum_length, + information_schema._pg_char_octet_length(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS character_octet_length, + information_schema._pg_numeric_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision, + information_schema._pg_numeric_precision_radix(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_precision_radix, + information_schema._pg_numeric_scale(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS numeric_scale, + information_schema._pg_datetime_precision(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.cardinal_number AS datetime_precision, + information_schema._pg_interval_type(information_schema._pg_truetypid(a.*, t.*), information_schema._pg_truetypmod(a.*, t.*))::information_schema.character_data AS interval_type, + NULL::integer::information_schema.cardinal_number AS interval_precision, + NULL::character varying::information_schema.sql_identifier AS character_set_catalog, + NULL::character varying::information_schema.sql_identifier AS character_set_schema, + NULL::character varying::information_schema.sql_identifier AS character_set_name, + CASE + WHEN nco.nspname IS NOT NULL THEN current_database() + ELSE NULL::name + END::information_schema.sql_identifier AS collation_catalog, + nco.nspname::information_schema.sql_identifier AS collation_schema, + co.collname::information_schema.sql_identifier AS collation_name, + CASE + WHEN t.typtype = 'd'::"char" THEN current_database() + ELSE NULL::name + END::information_schema.sql_identifier AS domain_catalog, + CASE + WHEN t.typtype = 'd'::"char" THEN nt.nspname + ELSE NULL::name + END::information_schema.sql_identifier AS domain_schema, + CASE + WHEN t.typtype = 'd'::"char" THEN t.typname + ELSE NULL::name + END::information_schema.sql_identifier AS domain_name, + current_database()::information_schema.sql_identifier AS udt_catalog, + COALESCE(nbt.nspname, nt.nspname)::information_schema.sql_identifier AS udt_schema, + COALESCE(bt.typname, t.typname)::information_schema.sql_identifier AS udt_name, + NULL::character varying::information_schema.sql_identifier AS scope_catalog, + NULL::character varying::information_schema.sql_identifier AS scope_schema, + NULL::character varying::information_schema.sql_identifier AS scope_name, + NULL::integer::information_schema.cardinal_number AS maximum_cardinality, + a.attnum::information_schema.sql_identifier AS dtd_identifier, + 'NO'::character varying::information_schema.yes_or_no AS is_self_referencing, + 'NO'::character varying::information_schema.yes_or_no AS is_identity, + NULL::character varying::information_schema.character_data AS identity_generation, + NULL::character varying::information_schema.character_data AS identity_start, + NULL::character varying::information_schema.character_data AS identity_increment, + NULL::character varying::information_schema.character_data AS identity_maximum, + NULL::character varying::information_schema.character_data AS identity_minimum, + NULL::character varying::information_schema.yes_or_no AS identity_cycle, + 'NEVER'::character varying::information_schema.character_data AS is_generated, + NULL::character varying::information_schema.character_data AS generation_expression, + CASE + WHEN c.relkind = 'r'::"char" OR (c.relkind = ANY (ARRAY['v'::"char", 'f'::"char"])) AND pg_column_is_updatable(c.oid::regclass, a.attnum, false) THEN 'YES'::text + ELSE 'NO'::text + END::information_schema.yes_or_no AS is_updatable + FROM pg_attribute a + LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum + JOIN (pg_class c + JOIN pg_namespace nc ON c.relnamespace = nc.oid) ON a.attrelid = c.oid + JOIN (pg_type t + JOIN pg_namespace nt ON t.typnamespace = nt.oid) ON a.atttypid = t.oid + LEFT JOIN (pg_type bt + JOIN pg_namespace nbt ON bt.typnamespace = nbt.oid) ON t.typtype = 'd'::"char" AND t.typbasetype = bt.oid + LEFT JOIN (pg_collation co + JOIN pg_namespace nco ON co.collnamespace = nco.oid) ON a.attcollation = co.oid AND (nco.nspname <> 'pg_catalog'::name OR co.collname <> 'default'::name) + WHERE NOT pg_is_other_temp_schema(nc.oid) AND a.attnum > 0 AND NOT a.attisdropped AND (c.relkind = ANY (ARRAY['r'::"char", 'v'::"char", 'f'::"char"])) + + ) c + where + not table_schema like any (array[E'pg\\_%', 'information_schema']) + group by + table_schema, table_name + order by + table_schema, table_name + table_io_stats: + description: > + This metric collects I/O statistics for tables, including heap and index block reads and hits. + It provides insights into the performance of table access patterns. + sqls: + 11: |- + select * from ( + with recursive + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + SELECT (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, + schemaname::text as tag_schema, + relname::text as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(relname) as tag_table_full_name, + heap_blks_read, + heap_blks_hit, + idx_blks_read, + idx_blks_hit, + toast_blks_read, + toast_blks_hit, + tidx_blks_read, + tidx_blks_hit + FROM pg_statio_user_tables + WHERE NOT schemaname LIKE E'pg\\_temp%' + AND (heap_blks_read > 0 OR heap_blks_hit > 0 OR idx_blks_read > 0 OR idx_blks_hit > 0 OR + tidx_blks_read > 0 OR + tidx_blks_hit > 0) + ) + select epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + heap_blks_read, + heap_blks_hit, + idx_blks_read, + idx_blks_hit, + toast_blks_read, + toast_blks_hit, + tidx_blks_read, + tidx_blks_hit + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * + from ( + select epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(heap_blks_read)::int8, + sum(heap_blks_hit)::int8, + sum(idx_blks_read)::int8, + sum(idx_blks_hit)::int8, + sum(toast_blks_read)::int8, + sum(toast_blks_hit)::int8, + sum(tidx_blks_read)::int8, + sum(tidx_blks_hit)::int8 + from q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by 1, 2, 3, 4 + ) x + ) y + order by + coalesce(heap_blks_read, 0) + + coalesce(heap_blks_hit, 0) + + coalesce(idx_blks_read, 0) + + coalesce(idx_blks_hit, 0) + + coalesce(toast_blks_read, 0) + + coalesce(toast_blks_hit, 0) + + coalesce(tidx_blks_read, 0) + + coalesce(tidx_blks_hit, 0) + desc limit 300 + table_stats: + description: > + This metric collects statistics about user tables, including size, vacuum status, and transaction freeze age. + It provides insights into the health and performance of tables in the database. + sqls: + 11: |- + with recursive + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, -- not sent to final output + quote_ident(schemaname) as tag_schema, + quote_ident(ut.relname) as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, + pg_table_size(relid) as table_size_b, + abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + pg_total_relation_size(relid) as total_relation_size_b, + case when reltoastrelid != 0 then pg_total_relation_size(reltoastrelid) else 0::int8 end as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age + from pg_stat_user_tables ut + join + pg_class c on c.oid = ut.relid + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists(select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + and c.relpersistence != 't' -- and temp tables + ) + + select /* pgwatch_generated */ + epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2, 3, 4 + ) x + order by table_size_b desc nulls last limit 300 + 16: |- + with recursive /* pgwatch_generated */ + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, -- not sent to final output + quote_ident(schemaname) as tag_schema, + quote_ident(ut.relname) as tag_table_name, + quote_ident(schemaname) || '.' || quote_ident(ut.relname) as tag_table_full_name, + pg_table_size(relid) as table_size_b, + abs(greatest(ceil(log((pg_table_size(relid) + 1) / 10 ^ 6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + pg_total_relation_size(relid) as total_relation_size_b, + case when c.reltoastrelid != 0 then pg_total_relation_size(c.reltoastrelid) else 0::int8 end as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + case when 'autovacuum_enabled=off' = ANY (c.reloptions) then 1 else 0 end as no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, + extract(epoch from now() - last_seq_scan)::int8 as last_seq_scan_s + from pg_stat_user_tables ut + join pg_class c on c.oid = ut.relid + left join pg_class t on t.oid = c.reltoastrelid + left join pg_index ti on ti.indrelid = t.oid + left join pg_class tir on tir.oid = ti.indexrelid + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + and c.relpersistence != 't' -- and temp tables + order by case when c.relkind = 'p' then 1e9::int else coalesce(c.relpages, 0) + coalesce(t.relpages, 0) + coalesce(tir.relpages, 0) end desc + limit 1500 /* NB! When changing the bottom final LIMIT also adjust this limit. Should be at least 5x bigger as approx sizes depend a lot on vacuum frequency. + The general idea is to reduce filesystem "stat"-ing on tables that won't make it to final output anyways based on approximate size */ + ) + + select /* pgwatch_generated */ + epoch_ns, + tag_schema, + tag_table_name, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age, + last_seq_scan_s + from q_tstats + where not tag_schema like E'\\_timescaledb%' + and not exists (select * from q_root_part where oid = q_tstats.relid) + + union all + + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) as tag_schema, + quote_ident(qr.root_relname) as tag_table_name, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age, + min(last_seq_scan_s)::int8 last_seq_scan_s + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2, 3, 4 + ) x + order by table_size_b desc nulls last limit 300 + gauges: + - table_size_b + - total_relation_size_b + - toast_size_b + - seconds_since_last_vacuum + - seconds_since_last_analyze + - n_live_tup + - n_dead_tup + statement_timeout_seconds: 300 + table_stats_approx: + description: > + This metric collects approximate statistics about user tables, including size, vacuum status, and transaction freeze age. + It provides insights into the health and performance of tables in the database. + sqls: + 11: |- + with recursive /* pgwatch_generated */ + q_root_part as ( + select c.oid, + c.relkind, + n.nspname root_schema, + c.relname root_relname + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + where relkind in ('p', 'r') + and relpersistence != 't' + and not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and not exists(select * from pg_inherits where inhrelid = c.oid) + and exists(select * from pg_inherits where inhparent = c.oid) + ), + q_parts (relid, relkind, level, root) as ( + select oid, relkind, 1, oid + from q_root_part + union all + select inhrelid, c.relkind, level + 1, q.root + from pg_inherits i + join q_parts q on inhparent = q.relid + join pg_class c on c.oid = i.inhrelid + ), + q_tstats as ( + with q_tbls_by_total_associated_relpages_approx as ( + select * from ( + select + c.oid, + c.relname, + c.relpages, + coalesce((select sum(relpages) from pg_class ci join pg_index i on i.indexrelid = ci.oid where i.indrelid = c.oid), 0) as index_relpages, + coalesce((select coalesce(ct.relpages, 0) + coalesce(cti.relpages, 0) from pg_class ct left join pg_index ti on ti.indrelid = ct.oid left join pg_class cti on cti.oid = ti.indexrelid where ct.oid = c.reltoastrelid), 0) as toast_relpages, + case when 'autovacuum_enabled=off' = ANY(c.reloptions) then 1 else 0 end as no_autovacuum, + case when c.relkind != 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, + c.relpersistence + from + pg_class c + join pg_namespace n on n.oid = c.relnamespace + where + not n.nspname like any (array[E'pg\\_%', 'information_schema', E'\\_timescaledb%']) + and c.relkind = 'r' + and c.relpersistence != 't' + ) x + order by relpages + index_relpages + toast_relpages desc limit 300 + ), q_block_size as ( + select current_setting('block_size')::int8 as bs + ) + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + relid, + quote_ident(schemaname)||'.'||quote_ident(ut.relname) as tag_table_full_name, + bs * relpages as table_size_b, + abs(greatest(ceil(log((bs*relpages+1) / 10^6)), 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + bs * (relpages + index_relpages + toast_relpages) as total_relation_size_b, + bs * toast_relpages as toast_size_b, + (extract(epoch from now() - greatest(last_vacuum, last_autovacuum)))::int8 as seconds_since_last_vacuum, + (extract(epoch from now() - greatest(last_analyze, last_autoanalyze)))::int8 as seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + coalesce(idx_scan, 0) as idx_scan, + coalesce(idx_tup_fetch, 0) as idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age, + relpersistence + from + pg_stat_user_tables ut + join q_tbls_by_total_associated_relpages_approx t on t.oid = ut.relid + join q_block_size on true + where + -- leaving out fully locked tables as pg_relation_size also wants a lock and would wait + not exists (select 1 from pg_locks where relation = relid and mode = 'AccessExclusiveLock') + order by relpages desc + ) + select /* pgwatch_generated */ + epoch_ns, + tag_table_full_name, + 0 as is_part_root, + table_size_b, + tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + total_relation_size_b, + toast_size_b, + seconds_since_last_vacuum, + seconds_since_last_analyze, + no_autovacuum, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count, + tx_freeze_age + from q_tstats + where not exists (select * from q_root_part where oid = q_tstats.relid) + union all + select * from ( + select + epoch_ns, + quote_ident(qr.root_schema) || '.' || quote_ident(qr.root_relname) as tag_table_full_name, + 1 as is_part_root, + sum(table_size_b)::int8 table_size_b, + abs(greatest(ceil(log((sum(table_size_b) + 1) / 10 ^ 6)), + 0))::text as tag_table_size_cardinality_mb, -- i.e. 0=<1MB, 1=<10MB, 2=<100MB,.. + sum(total_relation_size_b)::int8 total_relation_size_b, + sum(toast_size_b)::int8 toast_size_b, + min(seconds_since_last_vacuum)::int8 seconds_since_last_vacuum, + min(seconds_since_last_analyze)::int8 seconds_since_last_analyze, + sum(no_autovacuum)::int8 no_autovacuum, + sum(seq_scan)::int8 seq_scan, + sum(seq_tup_read)::int8 seq_tup_read, + sum(idx_scan)::int8 idx_scan, + sum(idx_tup_fetch)::int8 idx_tup_fetch, + sum(n_tup_ins)::int8 n_tup_ins, + sum(n_tup_upd)::int8 n_tup_upd, + sum(n_tup_del)::int8 n_tup_del, + sum(n_tup_hot_upd)::int8 n_tup_hot_upd, + sum(n_live_tup)::int8 n_live_tup, + sum(n_dead_tup)::int8 n_dead_tup, + sum(vacuum_count)::int8 vacuum_count, + sum(autovacuum_count)::int8 autovacuum_count, + sum(analyze_count)::int8 analyze_count, + sum(autoanalyze_count)::int8 autoanalyze_count, + max(tx_freeze_age)::int8 tx_freeze_age + from + q_tstats ts + join q_parts qp on qp.relid = ts.relid + join q_root_part qr on qr.oid = qp.root + group by + 1, 2 + ) x; + + gauges: + - table_size_b + - total_relation_size_b + - toast_size_b + - seconds_since_last_vacuum + - seconds_since_last_analyze + - n_live_tup + - n_dead_tup + metric_storage_name: table_stats + unused_indexes: + description: > + This metric collects information about unused indexes in the database. + It helps identify indexes that are not being used and can potentially be dropped to improve performance. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + * + from ( + select + format('%I.%I', sui.schemaname, sui.indexrelname) as tag_index_full_name, + sui.idx_scan, + coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b, + system_identifier::text as tag_sys_id /* to easily check also all replicas as could be still used there */ + from + pg_stat_user_indexes sui + join pg_index i on i.indexrelid = sui.indexrelid + join pg_control_system() on true + where not sui.schemaname like E'pg\\_temp%' + and idx_scan = 0 + and not (indisprimary or indisunique or indisexclusion) + and not exists (select * from pg_locks where relation = sui.relid and mode = 'AccessExclusiveLock') + ) x + where index_size_b > 100*1024^2 /* list >100MB only */ + order by index_size_b desc + limit 25 + vmstat: + description: > + This metric collects system-level statistics using the `vmstat` command. + It provides insights into memory usage, CPU load, and other system metrics. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + r, b, swpd, free, buff, cache, si, so, bi, bo, "in", cs, us, sy, id, wa, st, cpu_count, load_1m, load_5m, load_15m, total_memory + from + get_vmstat() + init_sql: |- + CREATE EXTENSION IF NOT EXISTS plpython3u; + + CREATE OR REPLACE FUNCTION get_vmstat( + IN delay int default 1, + OUT r int, OUT b int, OUT swpd int8, OUT free int8, OUT buff int8, OUT cache int8, OUT si int8, OUT so int8, OUT bi int8, + OUT bo int8, OUT "in" int, OUT cs int, OUT us int, OUT sy int, OUT id int, OUT wa int, OUT st int, + OUT cpu_count int, OUT load_1m float4, OUT load_5m float4, OUT load_15m float4, OUT total_memory int8 + ) + LANGUAGE plpython3u + AS $FUNCTION$ + from os import cpu_count, popen + unit = 1024 # 'vmstat' default block byte size + + cpu_count = cpu_count() + vmstat_lines = popen('vmstat {} 2'.format(delay)).readlines() + vm = [int(x) for x in vmstat_lines[-1].split()] + # plpy.notice(vm) + load_1m, load_5m, load_15m = None, None, None + with open('/proc/loadavg', 'r') as f: + la_line = f.readline() + if la_line: + splits = la_line.split() + if len(splits) == 5: + load_1m, load_5m, load_15m = splits[0], splits[1], splits[2] + + total_memory = None + with open('/proc/meminfo', 'r') as f: + mi_line = f.readline() + splits = mi_line.split() + # plpy.notice(splits) + if len(splits) == 3: + total_memory = int(splits[1]) * 1024 + + return vm[0], vm[1], vm[2] * unit, vm[3] * unit, vm[4] * unit, vm[5] * unit, vm[6] * unit, vm[7] * unit, vm[8] * unit, \ + vm[9] * unit, vm[10], vm[11], vm[12], vm[13], vm[14], vm[15], vm[16], cpu_count, load_1m, load_5m, load_15m, total_memory + $FUNCTION$; + + GRANT EXECUTE ON FUNCTION get_vmstat(int) TO pgwatch; + COMMENT ON FUNCTION get_vmstat(int) IS 'created for pgwatch'; + wal: + description: > + This metric collects information about the Write-Ahead Logging (WAL) system in PostgreSQL. + It provides insights into WAL activity, including the current WAL location, replay lag, and other related metrics. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + case + when pg_is_in_recovery() = false then + pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::int8 + else + pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0')::int8 + end as xlog_location_b, + case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, + extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, + system_identifier::text as tag_sys_id, + case + when pg_is_in_recovery() = false then + ('x'||substr(pg_walfile_name(pg_current_wal_lsn()), 1, 8))::bit(32)::int + else + (select min_recovery_end_timeline::int from pg_control_recovery()) + end as timeline + from pg_control_system() + gauges: + - '*' + is_instance_level: true + wal_receiver: + description: > + This metric collects information about the WAL receiver process in PostgreSQL. + It provides insights into the status of the WAL receiver, including replay lag and last replay timestamp. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())::int8 as replay_lag_b, + extract(epoch from (now() - pg_last_xact_replay_timestamp()))::int8 as last_replay_s + node_status: standby + gauges: + - '*' + is_instance_level: true + wal_size: + description: > + This metric collects the size of the Write-Ahead Log (WAL) directory in PostgreSQL. + It provides insights into the total size of WAL files currently stored in the database. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + sum(size)::int8 as wal_size_b + from pg_ls_waldir() + gauges: + - '*' + is_instance_level: true + wal_stats: + description: > + This metric collects statistics about the Write-Ahead Logging (WAL) system in PostgreSQL. + It provides insights into WAL activity, including the number of records, full page images, and write/sync times. + sqls: + 14: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + wal_records, + wal_fpi, + (wal_bytes / 1024)::int8 as wal_bytes_kb, + wal_buffers_full, + wal_write, + wal_sync, + wal_write_time::int8, + wal_sync_time::int8 + from + pg_stat_wal + gauges: + - '*' + is_instance_level: true + wait_events: + description: > + This metric retrieves information about wait events for active sessions in the PostgreSQL database. + It tracks the wait event types and counts of processes currently experiencing each wait event type, + providing insights into potential bottlenecks and resource contention issues. + sqls: + 11: |- + SELECT datname as tag_datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total + FROM pg_stat_activity + WHERE state = 'active' + GROUP BY datname, tag_wait_event_type + UNION + SELECT 'server_process' as tag_datname, coalesce (wait_event, 'CPU*') || ' - ' || coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total + FROM pg_stat_activity + WHERE state = 'active' AND datname IS NULL + GROUP BY datname, tag_wait_event_type + gauges: + - total + is_instance_level: true + long_running_transactions: + description: > + This metric retrieves information about long-running transactions in the PostgreSQL database. + It counts the number of transactions that have been running for more than one minute and provides + the age of the oldest transaction, helping administrators identify potential blocking transactions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + count(*) as transactions, + max(extract(epoch from (clock_timestamp() - xact_start)))::int8 as age_in_seconds + from pg_catalog.pg_stat_activity + where state is distinct from 'idle' + and (now() - xact_start) > '1 minutes'::interval + and query not like 'autovacuum:%' + gauges: + - transactions + - age_in_seconds + is_instance_level: true + database_wraparound: + description: > + This metric retrieves information about transaction ID wraparound in PostgreSQL databases. + It tracks the age of the oldest unfrozen transaction ID and multi-transaction ID for each database, + helping administrators monitor vacuum freeze operations and prevent transaction ID wraparound issues. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + datname::text as tag_datname, + age(d.datfrozenxid)::int8 as age_datfrozenxid, + mxid_age(d.datminmxid)::int8 as age_datminmxid + from pg_catalog.pg_database d + where d.datallowconn + gauges: + - age_datfrozenxid + - age_datminmxid + pg_stat_activity: + sqls: + 11: |- + SELECT /* pgwatch_generated */ + pg_database.datname AS tag_dbname, + tmp2.tag_application_name, + tmp.tag_state, + COALESCE(count,0) as count, + COALESCE(max_tx_duration,0) as max_tx_duration + FROM + ( + VALUES ('active'), + ('idle'), + ('idle in transaction'), + ('idle in transaction (aborted)'), + ('fastpath function call'), + ('disabled') + ) AS tmp(tag_state) + CROSS JOIN pg_database + LEFT JOIN + ( + SELECT datname, + application_name as tag_application_name, + state as tag_state, + count(*) AS count, + MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration + FROM pg_stat_activity GROUP BY datname, tag_application_name, tag_state + ) AS tmp2 + ON tmp.tag_state = tmp2.tag_state AND pg_database.datname = tmp2.datname + gauges: + - count + - max_tx_duration + pg_archiver: + sqls: + 11: | + WITH + current_wal_file AS ( + SELECT CASE WHEN NOT pg_is_in_recovery() THEN pg_walfile_name(pg_current_wal_insert_lsn()) ELSE NULL END pg_walfile_name + ), + current_wal AS ( + SELECT + ('x'||substring(pg_walfile_name,9,8))::bit(32)::int log, + ('x'||substring(pg_walfile_name,17,8))::bit(32)::int seg, + pg_walfile_name + FROM current_wal_file + ), + archive_wal AS( + SELECT + ('x'||substring(last_archived_wal,9,8))::bit(32)::int log, + ('x'||substring(last_archived_wal,17,8))::bit(32)::int seg, + last_archived_wal + FROM pg_stat_archiver + ) + SELECT coalesce(((cw.log - aw.log) * 256) + (cw.seg-aw.seg), -1) as pending_wal_count FROM current_wal cw, archive_wal aw + gauges: + - pending_wal_count + pg_blocked: + sqls: + 11: | + SELECT + count(blocked.transactionid) AS queries, + '__transaction__' AS tag_table + FROM pg_catalog.pg_locks blocked + WHERE NOT blocked.granted AND locktype = 'transactionid' + GROUP BY locktype + UNION + SELECT + count(blocked.relation) AS queries, + blocked.relation::regclass::text AS tag_table + FROM pg_catalog.pg_locks blocked + WHERE NOT blocked.granted AND locktype != 'transactionid' + GROUP BY relation + gauges: + - queries + pg_database_wraparound: + sqls: + 11: | + SELECT + datname as tag_datname, + age(d.datfrozenxid) as age_datfrozenxid, + mxid_age(d.datminmxid) as age_datminmxid + FROM + pg_catalog.pg_database d + WHERE + d.datallowconn + gauges: + - age_datfrozenxid + - age_datminmxid + pg_gin_index: + sqls: + 11: | + SELECT + index_name AS tag_index_name, + pending_list_bytes AS pending_list_bytes + FROM + postgres_gin_pending_list_size() + gauges: + - pending_list_bytes + pg_indexes_with_size: + sqls: + 11: SELECT schemaname as tag_schemaname, tablename as tag_relname, indexname as tag_indexrelname, pg_class.relpages + * 8192::bigint as index_size from pg_indexes inner join pg_namespace on pg_indexes.schemaname + = pg_namespace.nspname inner join pg_class on pg_class.relnamespace = pg_namespace.oid + and pg_class.relname = pg_indexes.indexname where pg_indexes.schemaname != 'pg_catalog' and pg_indexes.schemaname != 'pg_toast' + gauges: + - index_size + pg_integer_capacity: + sqls: + 11: | + SELECT 'sent_notifications' as tag_table_name, 'id' as tag_column_name, max(id) as current, (select (2^(numeric_precision-1)-1) as maximum FROM information_schema.columns WHERE (table_name, column_name) = ('sent_notifications', 'id')) FROM sent_notifications + UNION ALL + SELECT 'notes', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('notes', 'id')) FROM notes + UNION ALL + SELECT 'system_note_metadata', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('system_note_metadata', 'id')) FROM system_note_metadata + UNION ALL + SELECT 'merge_request_diffs', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('merge_request_diffs', 'id')) FROM merge_request_diffs + UNION ALL + SELECT 'merge_request_metrics', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('merge_request_metrics', 'id')) FROM merge_request_metrics + UNION ALL + SELECT 'deployments', 'id', max(id), (select (2^(numeric_precision-1)-1) FROM information_schema.columns WHERE (table_name, column_name) = ('deployments', 'id')) FROM deployments + gauges: + - current + - maximum + pg_long_running_transactions: + sqls: + 11: | + SELECT COUNT(*) as transactions, + COALESCE(MAX(EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)))::int8, 0) AS age_in_seconds + FROM pg_catalog.pg_stat_activity + WHERE state is distinct from 'idle' AND (now() - xact_start) > '1 minutes'::interval AND query not like 'autovacuum:%' + gauges: + - '*' + pg_oldest_blocked: + sqls: + 11: | + SELECT coalesce(extract('epoch' from max(clock_timestamp() - state_change)), 0) age_seconds + FROM pg_catalog.pg_stat_activity + WHERE wait_event_type = 'Lock' + AND state='active' + gauges: + - age_seconds + pg_postmaster: + sqls: + 11: SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time() + gauges: + - start_time_seconds + pg_replication: + sqls: + 11: SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH + FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag, CASE WHEN pg_is_in_recovery() + THEN 1 ELSE 0 END as is_replica + gauges: + - lag + - is_replica + pg_replication_slots: + sqls: + 11: | + SELECT slot_name as tag_slot_name, slot_type as tag_slot_type, + case when active then 1.0 else 0.0 end AS active, + age(xmin) AS xmin_age, + age(catalog_xmin) AS catalog_xmin_age, + CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - restart_lsn AS restart_lsn_bytes, + CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_lsn() END - confirmed_flush_lsn AS confirmed_flush_lsn_bytes + FROM pg_replication_slots + gauges: + - active + - xmin_age + - catalog_xmin_age + - restart_lsn_bytes + - confirmed_flush_lsn_bytes + pg_slow: + sqls: + 11: | + SELECT COUNT(*) AS queries + FROM pg_catalog.pg_stat_activity + WHERE state = 'active' AND (now() - query_start) > '1 seconds'::interval + gauges: + - queries + pg_stat_activity_autovacuum: + sqls: + 11: | + SELECT + SPLIT_PART(query, '.', 2) AS tag_relname, + EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) AS tag_age_in_seconds + FROM + pg_catalog.pg_stat_activity + WHERE + query like 'autovacuum:%' + AND + EXTRACT(EPOCH FROM (clock_timestamp() - xact_start)) > 300 + gauges: + - age_in_seconds + pg_stat_activity_autovacuum_active: + sqls: + 11: | + SELECT v.phase as tag_phase, + CASE + when a.query ~ '^autovacuum.*to prevent wraparound' then 'wraparound' + when a.query ~* '^vacuum' then 'user' + when a.pid is null then null + ELSE 'regular' + END as tag_mode, + count(1) as workers_count + FROM pg_stat_progress_vacuum v + LEFT JOIN pg_catalog.pg_stat_activity a using (pid) + GROUP BY 1,2 + gauges: + - workers_count + pg_stat_kcache: + sqls: + 11: "WITH ranked_kcache AS (\n SELECT pg_get_userbyid(pg_stat_kcache_raw.userid)\ + \ AS tag_user,\n pg_database.datname,\n pg_stat_kcache_raw.queryid,\n \ + \ pg_stat_kcache_raw.exec_user_time,\n pg_stat_kcache_raw.exec_system_time,\n\ + \ pg_stat_kcache_raw.exec_user_time + pg_stat_kcache_raw.exec_system_time\ + \ AS tag_exec_total_time,\n row_number() over (order by pg_stat_kcache_raw.exec_user_time\ + \ + pg_stat_kcache_raw.exec_system_time desc) as rn\n FROM public.pg_stat_kcache()\ + \ pg_stat_kcache_raw\n INNER JOIN pg_database ON pg_database.oid = pg_stat_kcache_raw.dbid\n\ + ) \nSELECT\n ranked_kcache.tag_user,\n datname,\n queryid,\n exec_user_time,\n\ + \ exec_system_time,\n exec_total_time\nFROM ranked_kcache\nWHERE rn <= 500\n\ + UNION ALL\nSELECT\n 'tail_dummy_user' as tag_user,\n current_database() as datname,\n\ + \ -1 as queryid,\n sum(exec_user_time) as exec_user_time,\n sum(exec_system_time)\ + \ as exec_system_time,\n sum(exec_total_time) as exec_total_time\nFROM ranked_kcache\n\ + WHERE rn > 500\n" + gauges: + - exec_user_time + - exec_system_time + - exec_total_time + pg_stat_replication: + sqls: + 11: "SELECT application_name as tag_application_name, \n (pg_current_wal_lsn() - '0/0') % (2^52)::bigint\ + \ as current_wal_lsn, \n (sent_lsn - '0/0') % (2^52)::bigint as sent_lsn, \n\ + \ (write_lsn - '0/0') % (2^52)::bigint as write_lsn, \n (flush_lsn - '0/0')\ + \ % (2^52)::bigint as flush_lsn, \n (replay_lsn - '0/0') % (2^52)::bigint as\ + \ replay_lsn,\n EXTRACT(SECONDS FROM (now() - reply_time)) reply_time_lag \n\ + \ FROM pg_stat_replication\n" + gauges: + - current_wal_lsn + - sent_lsn + - write_lsn + - flush_lsn + - replay_lsn + - reply_time_lag + pg_stat_ssl: + sqls: + 11: | + SELECT pid as tag_pid, bits as bits, + CASE WHEN ssl THEN 1.0 ELSE 0.0 END AS active + FROM pg_stat_ssl + gauges: + - active + - bits + pg_stat_statements: + sqls: + 11: | + WITH ranked_statements AS ( + SELECT + pg_get_userbyid(userid) as user, + pg_database.datname, + pg_stat_statements.queryid , + pg_stat_statements.plans as plans_total, + pg_stat_statements.calls, + pg_stat_statements.total_exec_time as exec_time_total, + pg_stat_statements.total_plan_time as plan_time_total, + pg_stat_statements.rows, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_hit) as shared_bytes_hit_total, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_read) as shared_bytes_read_total, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_dirtied) as shared_bytes_dirtied_total, + (current_setting('block_size')::int * pg_stat_statements.shared_blks_written) as shared_bytes_written_total, + pg_stat_statements.blk_read_time as block_read_total, + pg_stat_statements.blk_write_time as block_write_total, + pg_stat_statements.wal_records, + pg_stat_statements.wal_fpi, + pg_stat_statements.wal_bytes, + (current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, + (current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written, + row_number() over (order by total_exec_time desc) as rn + FROM pg_stat_statements + JOIN pg_database + ON pg_database.oid = pg_stat_statements.dbid + ) + SELECT + ranked_statements.user as tag_user, + datname as tag_datname, + queryid as tag_queryid, + calls::int8 as calls, + plans_total::int8 as plans_total, + exec_time_total::int8 as exec_time_total, + plan_time_total::int8 as plan_time_total, + rows::int8 as rows, + shared_bytes_hit_total::int8 as shared_bytes_hit_total, + shared_bytes_read_total::int8 as shared_bytes_read_total, + shared_bytes_dirtied_total::int8 as shared_bytes_dirtied_total, + shared_bytes_written_total::int8 as shared_bytes_written_total, + block_read_total::int8 as block_read_total, + block_write_total::int8 as block_write_total, + wal_records::int8 as wal_records, + wal_fpi::int8 as wal_fpi, + wal_bytes::int8 as wal_bytes, + temp_bytes_read::int8 as temp_bytes_read, + temp_bytes_written::int8 as temp_bytes_written + FROM ranked_statements + WHERE rn <= 500 + UNION ALL + SELECT + 'tail_dummy_user' as tag_user, + current_database() as tag_datname, + -1 as tag_qqueryid, + sum(calls)::int8 as calls, + sum(plans_total)::int8 as plans_total, + sum(exec_time_total)::int8 as exec_time_total, + sum(plan_time_total)::int8 as plan_time_total, + sum(rows)::int8 as rows, + sum(shared_bytes_hit_total)::int8 as shared_bytes_hit_total, + sum(shared_bytes_read_total)::int8 as shared_bytes_read_total, + sum(shared_bytes_dirtied_total)::int8 as shared_bytes_dirtied_total, + sum(shared_bytes_written_total)::int8 as shared_bytes_written_total, + sum(block_read_total)::int8 as block_read_total, + sum(block_write_total)::int8 as block_write_total, + sum(wal_records)::int8 as wal_records, + sum(wal_fpi)::int8 as wal_fpi, + sum(wal_bytes)::int8 as wal_bytes, + sum(temp_bytes_read)::int8 as temp_bytes_read, + sum(temp_bytes_written)::int8 as temp_bytes_written + FROM ranked_statements + WHERE rn > 500 + gauges: + - calls + - plans_total + - exec_time_total + - plan_time_total + - rows + - shared_bytes_hit_total + - shared_bytes_read_total + - shared_bytes_dirtied_total + - shared_bytes_written_total + - block_read_total + - block_write_total + - wal_records + - wal_fpi + - wal_bytes + - temp_bytes_read + - temp_bytes_written + pg_stat_user_indexes: + sqls: + 11: SELECT schemaname as tag_schemaname, relname as tag_relname, indexrelname as tag_indexrelname, idx_scan, idx_tup_read, idx_tup_fetch + FROM pg_stat_user_indexes + gauges: + - idx_scan + - idx_tup_read + - idx_tup_fetch + pg_stat_user_tables: + sqls: + 11: | + SELECT + current_database() as tag_datname, + schemaname as tag_schemaname, + relname as tag_relname, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + GREATEST(last_autovacuum, last_vacuum, '1970-01-01Z') as last_vacuum, + GREATEST(last_autoanalyze, last_analyze, '1970-01-01Z') as last_analyze, + (vacuum_count + autovacuum_count) as vacuum_count, + (analyze_count + autoanalyze_count) as analyze_count + FROM + pg_stat_user_tables + gauges: + - seq_scan + - seq_tup_read + - idx_scan + - idx_tup_fetch + - n_tup_ins + - n_tup_upd + - n_tup_del + - n_tup_hot_upd + - n_live_tup + - n_dead_tup + - last_vacuum + - last_analyze + - vacuum_count + - analyze_count + pg_stat_wal_receiver: + sqls: + 11: | + SELECT case status when 'stopped' then 0 when 'starting' then 1 when 'streaming' then 2 when 'waiting' then 3 when 'restarting' then 4 when 'stopping' then 5 else -1 end as tag_status, + (receive_start_lsn- '0/0') % (2^52)::bigint as receive_start_lsn, + receive_start_tli, + (flushed_lsn- '0/0') % (2^52)::bigint as flushed_lsn, + received_tli, + extract(epoch from last_msg_send_time) as last_msg_send_time, + extract(epoch from last_msg_receipt_time) as last_msg_receipt_time, + (latest_end_lsn - '0/0') % (2^52)::bigint as latest_end_lsn, + extract(epoch from latest_end_time) as latest_end_time, + substring(slot_name from 'repmgr_slot_([0-9]*)') as upstream_node, + trim(both '''' from substring(conninfo from 'host=([^ ]*)')) as tag_upstream_host, + slot_name + FROM pg_catalog.pg_stat_wal_receiver + gauges: + - status + - receive_start_lsn + - receive_start_tli + - flushed_lsn + - received_tli + - last_msg_send_time + - last_msg_receipt_time + - latest_end_lsn + - latest_end_time + - upstream_node + pg_statio_user_indexes: + sqls: + 11: SELECT schemaname as tag_schemaname, relname as tag_relname, indexrelname as tag_indexrelname, idx_blks_read, idx_blks_hit FROM + pg_statio_user_indexes + gauges: + - idx_blks_read + - idx_blks_hit + pg_statio_user_tables: + sqls: + 11: SELECT schemaname as tag_schemaname, relname as tag_relname, heap_blks_read, heap_blks_hit, idx_blks_read, + idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit + FROM pg_statio_user_tables + gauges: + - heap_blks_read + - heap_blks_hit + - idx_blks_read + - idx_blks_hit + - toast_blks_read + - toast_blks_hit + - tidx_blks_read + - tidx_blks_hit + pg_stuck_idle_in_transaction: + sqls: + 11: | + SELECT COUNT(*) AS queries + FROM pg_catalog.pg_stat_activity + WHERE state = 'idle in transaction' AND (now() - query_start) > '10 minutes'::interval + gauges: + - queries + pg_total_relation_size: + sqls: + 11: | + SELECT relnamespace::regnamespace as tag_schemaname, + relname as tag_relname, + pg_total_relation_size(oid) bytes + FROM pg_class + WHERE relkind = 'r'; + gauges: + - bytes + pg_txid: + sqls: + 11: | + SELECT + CASE WHEN pg_is_in_recovery() THEN 'NaN'::float ELSE txid_current() % (2^52)::bigint END AS current, + CASE WHEN pg_is_in_recovery() THEN 'NaN'::float ELSE txid_snapshot_xmin(txid_current_snapshot()) % (2^52)::bigint END AS xmin, + CASE WHEN pg_is_in_recovery() THEN 'NaN'::float ELSE txid_current() - txid_snapshot_xmin(txid_current_snapshot()) END AS xmin_age + gauges: + - current + - xmin + - xmin_age + pg_wait_sampling: + sqls: + 11: | + SELECT + coalesce(p.event_type, 'CPU*') as tag_wait_type, + coalesce(p.event, 'CPU*') as tag_wait_event, + SUM(p.count) as count, + p.queryid as tag_queryid, + pgsa.backend_type as tag_backend_type + FROM pg_wait_sampling_profile p LEFT OUTER JOIN pg_stat_statements s on p.queryid = s.queryid LEFT OUTER JOIN pg_stat_activity pgsa on pgsa.pid = p.pid GROUP BY p.queryid, p.event_type, p.event, pgsa.backend_type + gauges: + - count + pg_wait_sampling_agg: + sqls: + 11: | + SELECT + coalesce(p.event_type, 'CPU*') as tag_wait_type, + coalesce(p.event, 'CPU*') as tag_wait_event, + SUM(p.count) as count + FROM pg_wait_sampling_profile p GROUP BY p.event_type, p.event + gauges: + - count + pg_xlog_position: + sqls: + 11: | + SELECT CASE + WHEN pg_is_in_recovery() + THEN (pg_last_wal_replay_lsn() - '0/0') % (2^52)::bigint + ELSE (pg_current_wal_lsn() - '0/0') % (2^52)::bigint + END AS bytes + gauges: + - bytes + + + +presets: + full: + description: almost all available metrics for a even deeper performance understanding + metrics: + backends: 1 + bgwriter: 1 + checkpointer: 1 + db_size: 1 + db_stats: 1 + locks_mode: 1 + replication: 1 + replication_slots: 1 + settings: 1 + sproc_stats: 1 + table_io_stats: 1 + table_stats: 1 + wait_events: 1 + wal: 1 + pg_stat_activity: 1 + pg_replication: 1 + pg_stat_wal_receiver: 1 + pg_archiver: 1 + pg_postmaster: 1 + pg_stat_user_tables: 1 + pg_statio_user_tables: 1 + pg_stat_user_indexes: 1 + pg_statio_user_indexes: 1 + pg_indexes_with_size: 1 + pg_stat_statements: 1 + pg_stat_kcache: 1 + pg_total_relation_size: 1 + pg_blocked: 1 + pg_oldest_blocked: 1 + pg_slow: 1 + pg_long_running_transactions: 1 + pg_stuck_idle_in_transaction: 1 + pg_txid: 1 + pg_database_wraparound: 1 + pg_xlog_position: 1 + pg_replication_slots: 1 + pg_stat_ssl: 1 + pg_integer_capacity: 1 + pg_wait_sampling: 1 + pg_wait_sampling_agg: 1 + pg_stat_activity_autovacuum: 1 + pg_stat_activity_autovacuum_active: 1 + pg_gin_index: 1 diff --git a/config/pgwatch-prometheus/sources.yml b/config/pgwatch-prometheus/sources.yml index 66a424fac9880880525e18a43d7ba0c278d8e4e0..998ed5e5d64dfb3a36d9e842e6a25e3dd5244036 100644 --- a/config/pgwatch-prometheus/sources.yml +++ b/config/pgwatch-prometheus/sources.yml @@ -1,14 +1,12 @@ -# PGWatch Sources Configuration - Prometheus Instance -# This instance exposes metrics in Prometheus format +# PGWatch Sources Configuration - Prometheus Instance -- name: target-database +- unique_name: target-database conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database - kind: postgres + preset_metrics: full custom_metrics: - pg_stat_statements_calls: 30 + is_enabled: true + group: default custom_tags: env: demo cluster: local sink_type: prometheus - is_enabled: true - stmt_timeout: 15 \ No newline at end of file diff --git a/config/prometheus/prometheus.yml b/config/prometheus/prometheus.yml index 61a876b71eeed87f108c757c0e402767cd727418..d3eb569d5d65af1177e76af7f5029054491efdf9 100644 --- a/config/prometheus/prometheus.yml +++ b/config/prometheus/prometheus.yml @@ -1,6 +1,7 @@ global: - scrape_interval: 1s - evaluation_interval: 1s + scrape_interval: 15s # Default scrape interval + evaluation_interval: 15s # Default evaluation interval + scrape_timeout: 10s # Global scrape timeout rule_files: # - "first_rules.yml" @@ -10,5 +11,6 @@ scrape_configs: - job_name: 'pgwatch-prometheus' static_configs: - targets: ['pgwatch-prometheus:9091'] - scrape_interval: 30s + scrape_interval: 30s # How often to scrape PGWatch + scrape_timeout: 25s # Timeout for each scrape (must be < scrape_interval) metrics_path: /pgwatch \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 98e75901f7ae29f8ab9d4b233b2f7b1d31edf9ad..1a935fa25fbcfb747fa03e2e183117f3c920651c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,20 +1,39 @@ version: '3.8' services: - # Target Database - The PostgreSQL database being monitored - target-db: - image: postgres:15 - container_name: target-db - environment: - POSTGRES_DB: target_database - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"] - ports: - - "5432:5432" + # Sources Generator - Generates sources.yml files from instances.yaml template + sources-generator: + image: alpine:latest + container_name: sources-generator + working_dir: /app volumes: - - target_db_data:/var/lib/postgresql/data - - ./config/target-db/init.sql:/docker-entrypoint-initdb.d/init.sql + - ./instances.yaml:/app/instances.yaml + - ./config:/app/config + command: > + sh -c " + mkdir -p /app/config/pgwatch-postgres /app/config/pgwatch-prometheus && + echo '# PGWatch Sources Configuration - PostgreSQL Instance' > /app/config/pgwatch-postgres/sources.yml && + sed 's/~sink_type~/postgresql/g' /app/instances.yaml >> /app/config/pgwatch-postgres/sources.yml && + echo '# PGWatch Sources Configuration - Prometheus Instance' > /app/config/pgwatch-prometheus/sources.yml && + echo '' >> /app/config/pgwatch-prometheus/sources.yml && + sed 's/~sink_type~/prometheus/g' /app/instances.yaml >> /app/config/pgwatch-prometheus/sources.yml && + echo 'Generated sources.yml files for both postgres and prometheus' + " + + # Target Database - The PostgreSQL database being monitored + # target-db: + # image: postgres:15 + # container_name: target-db + # environment: + # POSTGRES_DB: target_database + # POSTGRES_USER: postgres + # POSTGRES_PASSWORD: postgres + # command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"] + # ports: + # - "5432:5432" + # volumes: + # - target_db_data:/var/lib/postgresql/data + # - ./config/target-db/init.sql:/docker-entrypoint-initdb.d/init.sql # Postgres Sink - Storage for metrics in PostgreSQL format sink-postgres: @@ -55,8 +74,9 @@ services: ports: - "8080:8080" depends_on: + - sources-generator - sink-postgres - - target-db + # - target-db volumes: - ./config/pgwatch-postgres/sources.yml:/etc/pgwatch/sources.yml - ./config/pgwatch-postgres/metrics.yml:/etc/pgwatch/metrics.yml @@ -70,8 +90,9 @@ services: - "8089:8089" - "9091:9091" depends_on: + - sources-generator - sink-prometheus - - target-db + # - target-db volumes: - ./config/pgwatch-prometheus/sources.yml:/etc/pgwatch/sources.yml - ./config/pgwatch-prometheus/metrics.yml:/etc/pgwatch/metrics.yml @@ -93,7 +114,7 @@ services: - sink-prometheus volumes: - target_db_data: + # target_db_data: sink_postgres_data: prometheus_data: grafana_data: diff --git a/instances.yml b/instances.yml new file mode 100644 index 0000000000000000000000000000000000000000..8cdaa9d4b7d11140c47e58e388c0a15a8aa3f73f --- /dev/null +++ b/instances.yml @@ -0,0 +1,10 @@ +- unique_name: target-database + conn_str: postgresql://pgwatch_monitor:monitor_pass@target-db:5432/target_database + preset_metrics: full + custom_metrics: + is_enabled: true + group: default + custom_tags: + env: demo + cluster: local + sink_type: ~sink_type~ diff --git a/old-metrics.yml b/old-metrics.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391