PostgreSQL Source Code git master
bufmgr.c File Reference
#include "postgres.h"
#include <sys/file.h>
#include <unistd.h>
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "executor/instrument.h"
#include "lib/binaryheap.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/aio.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
#include "utils/memdebug.h"
#include "utils/ps_status.h"
#include "utils/rel.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include "lib/sort_template.h"
Include dependency graph for bufmgr.c:

Go to the source code of this file.

Data Structures

struct  PrivateRefCountData
 
struct  PrivateRefCountEntry
 
struct  CkptTsStatus
 
struct  SMgrSortArray
 

Macros

#define BufHdrGetBlock(bufHdr)   ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
 
#define BufferGetLSN(bufHdr)   (PageGetLSN(BufHdrGetBlock(bufHdr)))
 
#define LocalBufHdrGetBlock(bufHdr)    LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
#define BUF_WRITTEN   0x01
 
#define BUF_REUSABLE   0x02
 
#define RELS_BSEARCH_THRESHOLD   20
 
#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)
 
#define REFCOUNT_ARRAY_ENTRIES   8
 
#define BufferIsPinned(bufnum)
 
#define ST_SORT   sort_checkpoint_bufferids
 
#define ST_ELEMENT_TYPE   CkptSortItem
 
#define ST_COMPARE(a, b)   ckpt_buforder_comparator(a, b)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define ST_SORT   sort_pending_writebacks
 
#define ST_ELEMENT_TYPE   PendingWriteback
 
#define ST_COMPARE(a, b)   buffertag_comparator(&a->tag, &b->tag)
 
#define ST_SCOPE   static
 
#define ST_DEFINE
 
#define READV_COUNT_BITS   7
 
#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)
 

Typedefs

typedef struct PrivateRefCountData PrivateRefCountData
 
typedef struct PrivateRefCountEntry PrivateRefCountEntry
 
typedef struct CkptTsStatus CkptTsStatus
 
typedef struct SMgrSortArray SMgrSortArray
 

Functions

static void ReservePrivateRefCountEntry (void)
 
static PrivateRefCountEntryNewPrivateRefCountEntry (Buffer buffer)
 
static PrivateRefCountEntryGetPrivateRefCountEntry (Buffer buffer, bool do_move)
 
static int32 GetPrivateRefCount (Buffer buffer)
 
static void ForgetPrivateRefCountEntry (PrivateRefCountEntry *ref)
 
static void ResOwnerReleaseBufferIO (Datum res)
 
static char * ResOwnerPrintBufferIO (Datum res)
 
static void ResOwnerReleaseBufferPin (Datum res)
 
static char * ResOwnerPrintBufferPin (Datum res)
 
static pg_noinline PrivateRefCountEntryGetPrivateRefCountEntrySlow (Buffer buffer, bool do_move)
 
static Buffer ReadBuffer_common (Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
static BlockNumber ExtendBufferedRelCommon (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static BlockNumber ExtendBufferedRelShared (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
 
static bool PinBuffer (BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
 
static void PinBuffer_Locked (BufferDesc *buf)
 
static void UnpinBuffer (BufferDesc *buf)
 
static void UnpinBufferNoOwner (BufferDesc *buf)
 
static void BufferSync (int flags)
 
static int SyncOneBuffer (int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 
static void WaitIO (BufferDesc *buf)
 
static void AbortBufferIO (Buffer buffer)
 
static void shared_buffer_write_error_callback (void *arg)
 
static void local_buffer_write_error_callback (void *arg)
 
static BufferDescBufferAlloc (SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
 
static bool AsyncReadBuffers (ReadBuffersOperation *operation, int *nblocks_progress)
 
static void CheckReadBuffersOperation (ReadBuffersOperation *operation, bool is_complete)
 
static Buffer GetVictimBuffer (BufferAccessStrategy strategy, IOContext io_context)
 
static void FlushUnlockedBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FlushBuffer (BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
 
static void FindAndDropRelationBuffers (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
 
static void RelationCopyStorageUsingBuffer (RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
 
static void AtProcExit_Buffers (int code, Datum arg)
 
static void CheckForBufferLeaks (void)
 
static int rlocator_comparator (const void *p1, const void *p2)
 
static int buffertag_comparator (const BufferTag *ba, const BufferTag *bb)
 
static int ckpt_buforder_comparator (const CkptSortItem *a, const CkptSortItem *b)
 
static int ts_ckpt_progress_comparator (Datum a, Datum b, void *arg)
 
PrefetchBufferResult PrefetchSharedBuffer (SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
 
PrefetchBufferResult PrefetchBuffer (Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 
bool ReadRecentBuffer (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer)
 
Buffer ReadBuffer (Relation reln, BlockNumber blockNum)
 
Buffer ReadBufferExtended (Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
 
Buffer ReadBufferWithoutRelcache (RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
 
Buffer ExtendBufferedRel (BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
 
BlockNumber ExtendBufferedRelBy (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
 
Buffer ExtendBufferedRelTo (BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, BlockNumber extend_to, ReadBufferMode mode)
 
static void ZeroAndLockBuffer (Buffer buffer, ReadBufferMode mode, bool already_valid)
 
static pg_attribute_always_inline Buffer PinBufferForBlock (Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
 
static pg_attribute_always_inline bool StartReadBuffersImpl (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
 
bool StartReadBuffers (ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags)
 
bool StartReadBuffer (ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
 
static bool ReadBuffersCanStartIOOnce (Buffer buffer, bool nowait)
 
static bool ReadBuffersCanStartIO (Buffer buffer, bool nowait)
 
static void ProcessReadBuffersResult (ReadBuffersOperation *operation)
 
void WaitReadBuffers (ReadBuffersOperation *operation)
 
static void InvalidateBuffer (BufferDesc *buf)
 
static bool InvalidateVictimBuffer (BufferDesc *buf_hdr)
 
uint32 GetPinLimit (void)
 
uint32 GetAdditionalPinLimit (void)
 
void LimitAdditionalPins (uint32 *additional_pins)
 
bool BufferIsLockedByMe (Buffer buffer)
 
bool BufferIsLockedByMeInMode (Buffer buffer, BufferLockMode mode)
 
bool BufferIsDirty (Buffer buffer)
 
void MarkBufferDirty (Buffer buffer)
 
Buffer ReleaseAndReadBuffer (Buffer buffer, Relation relation, BlockNumber blockNum)
 
static void WakePinCountWaiter (BufferDesc *buf)
 
void TrackNewBufferPin (Buffer buf)
 
bool BgBufferSync (WritebackContext *wb_context)
 
void AtEOXact_Buffers (bool isCommit)
 
void InitBufferManagerAccess (void)
 
char * DebugPrintBufferRefcount (Buffer buffer)
 
void CheckPointBuffers (int flags)
 
BlockNumber BufferGetBlockNumber (Buffer buffer)
 
void BufferGetTag (Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
 
BlockNumber RelationGetNumberOfBlocksInFork (Relation relation, ForkNumber forkNum)
 
bool BufferIsPermanent (Buffer buffer)
 
XLogRecPtr BufferGetLSNAtomic (Buffer buffer)
 
void DropRelationBuffers (SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
 
void DropRelationsAllBuffers (SMgrRelation *smgr_reln, int nlocators)
 
void DropDatabaseBuffers (Oid dbid)
 
void FlushRelationBuffers (Relation rel)
 
void FlushRelationsAllBuffers (SMgrRelation *smgrs, int nrels)
 
void CreateAndCopyRelationData (RelFileLocator src_rlocator, RelFileLocator dst_rlocator, bool permanent)
 
void FlushDatabaseBuffers (Oid dbid)
 
void FlushOneBuffer (Buffer buffer)
 
void ReleaseBuffer (Buffer buffer)
 
void UnlockReleaseBuffer (Buffer buffer)
 
void IncrBufferRefCount (Buffer buffer)
 
void MarkBufferDirtyHint (Buffer buffer, bool buffer_std)
 
void UnlockBuffers (void)
 
void LockBuffer (Buffer buffer, BufferLockMode mode)
 
bool ConditionalLockBuffer (Buffer buffer)
 
void CheckBufferIsPinnedOnce (Buffer buffer)
 
void LockBufferForCleanup (Buffer buffer)
 
bool HoldingBufferPinThatDelaysRecovery (void)
 
bool ConditionalLockBufferForCleanup (Buffer buffer)
 
bool IsBufferCleanupOK (Buffer buffer)
 
bool StartBufferIO (BufferDesc *buf, bool forInput, bool nowait)
 
void TerminateBufferIO (BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner, bool release_aio)
 
uint32 LockBufHdr (BufferDesc *desc)
 
pg_noinline uint32 WaitBufHdrUnlocked (BufferDesc *buf)
 
void WritebackContextInit (WritebackContext *context, int *max_pending)
 
void ScheduleBufferTagForWriteback (WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
 
void IssuePendingWritebacks (WritebackContext *wb_context, IOContext io_context)
 
static bool EvictUnpinnedBufferInternal (BufferDesc *desc, bool *buffer_flushed)
 
bool EvictUnpinnedBuffer (Buffer buf, bool *buffer_flushed)
 
void EvictAllUnpinnedBuffers (int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
void EvictRelUnpinnedBuffers (Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
 
static bool MarkDirtyUnpinnedBufferInternal (Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
 
bool MarkDirtyUnpinnedBuffer (Buffer buf, bool *buffer_already_dirty)
 
void MarkDirtyRelUnpinnedBuffers (Relation rel, int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
void MarkDirtyAllUnpinnedBuffers (int32 *buffers_dirtied, int32 *buffers_already_dirty, int32 *buffers_skipped)
 
static pg_attribute_always_inline void buffer_stage_common (PgAioHandle *ioh, bool is_write, bool is_temp)
 
static void buffer_readv_decode_error (PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
 
static void buffer_readv_encode_error (PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
 
static pg_attribute_always_inline void buffer_readv_complete_one (PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
 
static pg_attribute_always_inline PgAioResult buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
 
static void buffer_readv_report (PgAioResult result, const PgAioTargetData *td, int elevel)
 
static void shared_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static PgAioResult shared_buffer_readv_complete_local (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 
static void local_buffer_readv_stage (PgAioHandle *ioh, uint8 cb_data)
 
static PgAioResult local_buffer_readv_complete (PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
 

Variables

bool zero_damaged_pages = false
 
int bgwriter_lru_maxpages = 100
 
double bgwriter_lru_multiplier = 2.0
 
bool track_io_timing = false
 
int effective_io_concurrency = DEFAULT_EFFECTIVE_IO_CONCURRENCY
 
int maintenance_io_concurrency = DEFAULT_MAINTENANCE_IO_CONCURRENCY
 
int io_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT
 
int io_max_combine_limit = DEFAULT_IO_COMBINE_LIMIT
 
int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER
 
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER
 
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER
 
static BufferDescPinCountWaitBuf = NULL
 
static Buffer PrivateRefCountArrayKeys [REFCOUNT_ARRAY_ENTRIES]
 
static struct PrivateRefCountEntry PrivateRefCountArray [REFCOUNT_ARRAY_ENTRIES]
 
static HTABPrivateRefCountHash = NULL
 
static int32 PrivateRefCountOverflowed = 0
 
static uint32 PrivateRefCountClock = 0
 
static int ReservedRefCountSlot = -1
 
static int PrivateRefCountEntryLast = -1
 
static uint32 MaxProportionalPins
 
const ResourceOwnerDesc buffer_io_resowner_desc
 
const ResourceOwnerDesc buffer_pin_resowner_desc
 
const PgAioHandleCallbacks aio_shared_buffer_readv_cb
 
const PgAioHandleCallbacks aio_local_buffer_readv_cb
 

Macro Definition Documentation

◆ BUF_DROP_FULL_SCAN_THRESHOLD

#define BUF_DROP_FULL_SCAN_THRESHOLD   (uint64) (NBuffers / 32)

Definition at line 91 of file bufmgr.c.

◆ BUF_REUSABLE

#define BUF_REUSABLE   0x02

Definition at line 81 of file bufmgr.c.

◆ BUF_WRITTEN

#define BUF_WRITTEN   0x01

Definition at line 80 of file bufmgr.c.

◆ BufferGetLSN

#define BufferGetLSN (   bufHdr)    (PageGetLSN(BufHdrGetBlock(bufHdr)))

Definition at line 73 of file bufmgr.c.

◆ BufferIsPinned

#define BufferIsPinned (   bufnum)
Value:
( \
!BufferIsValid(bufnum) ? \
false \
: \
BufferIsLocal(bufnum) ? \
(LocalRefCount[-(bufnum) - 1] > 0) \
: \
(GetPrivateRefCount(bufnum) > 0) \
)
static int32 GetPrivateRefCount(Buffer buffer)
Definition: bufmgr.c:517
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
int32 * LocalRefCount
Definition: localbuf.c:49

Definition at line 577 of file bufmgr.c.

◆ BufHdrGetBlock

#define BufHdrGetBlock (   bufHdr)    ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))

Definition at line 72 of file bufmgr.c.

◆ LocalBufHdrGetBlock

#define LocalBufHdrGetBlock (   bufHdr)     LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]

Definition at line 76 of file bufmgr.c.

◆ READV_COUNT_BITS

#define READV_COUNT_BITS   7

◆ READV_COUNT_MASK

#define READV_COUNT_MASK   ((1 << READV_COUNT_BITS) - 1)

◆ REFCOUNT_ARRAY_ENTRIES

#define REFCOUNT_ARRAY_ENTRIES   8

Definition at line 122 of file bufmgr.c.

◆ RELS_BSEARCH_THRESHOLD

#define RELS_BSEARCH_THRESHOLD   20

Definition at line 83 of file bufmgr.c.

◆ ST_COMPARE [1/2]

#define ST_COMPARE (   a,
  b 
)    ckpt_buforder_comparator(a, b)

Definition at line 6550 of file bufmgr.c.

◆ ST_COMPARE [2/2]

#define ST_COMPARE (   a,
  b 
)    buffertag_comparator(&a->tag, &b->tag)

Definition at line 6550 of file bufmgr.c.

◆ ST_DEFINE [1/2]

#define ST_DEFINE

Definition at line 6552 of file bufmgr.c.

◆ ST_DEFINE [2/2]

#define ST_DEFINE

Definition at line 6552 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [1/2]

#define ST_ELEMENT_TYPE   CkptSortItem

Definition at line 6549 of file bufmgr.c.

◆ ST_ELEMENT_TYPE [2/2]

#define ST_ELEMENT_TYPE   PendingWriteback

Definition at line 6549 of file bufmgr.c.

◆ ST_SCOPE [1/2]

#define ST_SCOPE   static

Definition at line 6551 of file bufmgr.c.

◆ ST_SCOPE [2/2]

#define ST_SCOPE   static

Definition at line 6551 of file bufmgr.c.

◆ ST_SORT [1/2]

#define ST_SORT   sort_checkpoint_bufferids

Definition at line 6548 of file bufmgr.c.

◆ ST_SORT [2/2]

#define ST_SORT   sort_pending_writebacks

Definition at line 6548 of file bufmgr.c.

Typedef Documentation

◆ CkptTsStatus

typedef struct CkptTsStatus CkptTsStatus

◆ PrivateRefCountData

◆ PrivateRefCountEntry

◆ SMgrSortArray

typedef struct SMgrSortArray SMgrSortArray

Function Documentation

◆ AbortBufferIO()

static void AbortBufferIO ( Buffer  buffer)
static

Definition at line 6261 of file bufmgr.c.

6262{
6263 BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1);
6264 uint32 buf_state;
6265
6266 buf_state = LockBufHdr(buf_hdr);
6267 Assert(buf_state & (BM_IO_IN_PROGRESS | BM_TAG_VALID));
6268
6269 if (!(buf_state & BM_VALID))
6270 {
6271 Assert(!(buf_state & BM_DIRTY));
6272 UnlockBufHdr(buf_hdr);
6273 }
6274 else
6275 {
6276 Assert(buf_state & BM_DIRTY);
6277 UnlockBufHdr(buf_hdr);
6278
6279 /* Issue notice if this is not the first failure... */
6280 if (buf_state & BM_IO_ERROR)
6281 {
6282 /* Buffer is pinned, so we can read tag without spinlock */
6284 (errcode(ERRCODE_IO_ERROR),
6285 errmsg("could not write block %u of %s",
6286 buf_hdr->tag.blockNum,
6288 BufTagGetForkNum(&buf_hdr->tag)).str),
6289 errdetail("Multiple failures --- write error might be permanent.")));
6290 }
6291 }
6292
6293 TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false, false);
6294}
#define BM_TAG_VALID
Definition: buf_internals.h:71
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc)
#define BM_DIRTY
Definition: buf_internals.h:69
#define BM_IO_IN_PROGRESS
Definition: buf_internals.h:72
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
#define BM_VALID
Definition: buf_internals.h:70
#define BM_IO_ERROR
Definition: buf_internals.h:73
static BufferDesc * GetBufferDescriptor(uint32 id)
void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, bool forget_owner, bool release_aio)
Definition: bufmgr.c:6199
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6359
uint32_t uint32
Definition: c.h:552
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define WARNING
Definition: elog.h:36
#define ereport(elevel,...)
Definition: elog.h:150
Assert(PointerIsAligned(start, uint64))
#define relpathperm(rlocator, forknum)
Definition: relpath.h:146
BufferTag tag
BlockNumber blockNum

References Assert(), buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufTagGetForkNum(), BufTagGetRelFileLocator(), ereport, errcode(), errdetail(), errmsg(), GetBufferDescriptor(), LockBufHdr(), relpathperm, BufferDesc::tag, TerminateBufferIO(), UnlockBufHdr(), and WARNING.

Referenced by ResOwnerReleaseBufferIO().

◆ AsyncReadBuffers()

static bool AsyncReadBuffers ( ReadBuffersOperation operation,
int *  nblocks_progress 
)
static

Definition at line 1839 of file bufmgr.c.

1840{
1841 Buffer *buffers = &operation->buffers[0];
1842 int flags = operation->flags;
1843 BlockNumber blocknum = operation->blocknum;
1844 ForkNumber forknum = operation->forknum;
1845 char persistence = operation->persistence;
1846 int16 nblocks_done = operation->nblocks_done;
1847 Buffer *io_buffers = &operation->buffers[nblocks_done];
1848 int io_buffers_len = 0;
1849 PgAioHandle *ioh;
1850 uint32 ioh_flags = 0;
1851 void *io_pages[MAX_IO_COMBINE_LIMIT];
1852 IOContext io_context;
1853 IOObject io_object;
1854 bool did_start_io;
1855
1856 /*
1857 * When this IO is executed synchronously, either because the caller will
1858 * immediately block waiting for the IO or because IOMETHOD_SYNC is used,
1859 * the AIO subsystem needs to know.
1860 */
1861 if (flags & READ_BUFFERS_SYNCHRONOUSLY)
1862 ioh_flags |= PGAIO_HF_SYNCHRONOUS;
1863
1864 if (persistence == RELPERSISTENCE_TEMP)
1865 {
1866 io_context = IOCONTEXT_NORMAL;
1867 io_object = IOOBJECT_TEMP_RELATION;
1868 ioh_flags |= PGAIO_HF_REFERENCES_LOCAL;
1869 }
1870 else
1871 {
1872 io_context = IOContextForStrategy(operation->strategy);
1873 io_object = IOOBJECT_RELATION;
1874 }
1875
1876 /*
1877 * If zero_damaged_pages is enabled, add the READ_BUFFERS_ZERO_ON_ERROR
1878 * flag. The reason for that is that, hopefully, zero_damaged_pages isn't
1879 * set globally, but on a per-session basis. The completion callback,
1880 * which may be run in other processes, e.g. in IO workers, may have a
1881 * different value of the zero_damaged_pages GUC.
1882 *
1883 * XXX: We probably should eventually use a different flag for
1884 * zero_damaged_pages, so we can report different log levels / error codes
1885 * for zero_damaged_pages and ZERO_ON_ERROR.
1886 */
1889
1890 /*
1891 * For the same reason as with zero_damaged_pages we need to use this
1892 * backend's ignore_checksum_failure value.
1893 */
1896
1897
1898 /*
1899 * To be allowed to report stats in the local completion callback we need
1900 * to prepare to report stats now. This ensures we can safely report the
1901 * checksum failure even in a critical section.
1902 */
1904
1905 /*
1906 * Get IO handle before ReadBuffersCanStartIO(), as pgaio_io_acquire()
1907 * might block, which we don't want after setting IO_IN_PROGRESS.
1908 *
1909 * If we need to wait for IO before we can get a handle, submit
1910 * already-staged IO first, so that other backends don't need to wait.
1911 * There wouldn't be a deadlock risk, as pgaio_io_acquire() just needs to
1912 * wait for already submitted IO, which doesn't require additional locks,
1913 * but it could still cause undesirable waits.
1914 *
1915 * A secondary benefit is that this would allow us to measure the time in
1916 * pgaio_io_acquire() without causing undue timer overhead in the common,
1917 * non-blocking, case. However, currently the pgstats infrastructure
1918 * doesn't really allow that, as it a) asserts that an operation can't
1919 * have time without operations b) doesn't have an API to report
1920 * "accumulated" time.
1921 */
1923 if (unlikely(!ioh))
1924 {
1926
1928 }
1929
1930 /*
1931 * Check if we can start IO on the first to-be-read buffer.
1932 *
1933 * If an I/O is already in progress in another backend, we want to wait
1934 * for the outcome: either done, or something went wrong and we will
1935 * retry.
1936 */
1937 if (!ReadBuffersCanStartIO(buffers[nblocks_done], false))
1938 {
1939 /*
1940 * Someone else has already completed this block, we're done.
1941 *
1942 * When IO is necessary, ->nblocks_done is updated in
1943 * ProcessReadBuffersResult(), but that is not called if no IO is
1944 * necessary. Thus update here.
1945 */
1946 operation->nblocks_done += 1;
1947 *nblocks_progress = 1;
1948
1949 pgaio_io_release(ioh);
1950 pgaio_wref_clear(&operation->io_wref);
1951 did_start_io = false;
1952
1953 /*
1954 * Report and track this as a 'hit' for this backend, even though it
1955 * must have started out as a miss in PinBufferForBlock(). The other
1956 * backend will track this as a 'read'.
1957 */
1958 TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + operation->nblocks_done,
1959 operation->smgr->smgr_rlocator.locator.spcOid,
1960 operation->smgr->smgr_rlocator.locator.dbOid,
1961 operation->smgr->smgr_rlocator.locator.relNumber,
1962 operation->smgr->smgr_rlocator.backend,
1963 true);
1964
1965 if (persistence == RELPERSISTENCE_TEMP)
1967 else
1969
1970 if (operation->rel)
1971 pgstat_count_buffer_hit(operation->rel);
1972
1973 pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0);
1974
1975 if (VacuumCostActive)
1977 }
1978 else
1979 {
1980 instr_time io_start;
1981
1982 /* We found a buffer that we need to read in. */
1983 Assert(io_buffers[0] == buffers[nblocks_done]);
1984 io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
1985 io_buffers_len = 1;
1986
1987 /*
1988 * How many neighboring-on-disk blocks can we scatter-read into other
1989 * buffers at the same time? In this case we don't wait if we see an
1990 * I/O already in progress. We already set BM_IO_IN_PROGRESS for the
1991 * head block, so we should get on with that I/O as soon as possible.
1992 */
1993 for (int i = nblocks_done + 1; i < operation->nblocks; i++)
1994 {
1995 if (!ReadBuffersCanStartIO(buffers[i], true))
1996 break;
1997 /* Must be consecutive block numbers. */
1998 Assert(BufferGetBlockNumber(buffers[i - 1]) ==
1999 BufferGetBlockNumber(buffers[i]) - 1);
2000 Assert(io_buffers[io_buffers_len] == buffers[i]);
2001
2002 io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
2003 }
2004
2005 /* get a reference to wait for in WaitReadBuffers() */
2006 pgaio_io_get_wref(ioh, &operation->io_wref);
2007
2008 /* provide the list of buffers to the completion callbacks */
2009 pgaio_io_set_handle_data_32(ioh, (uint32 *) io_buffers, io_buffers_len);
2010
2012 persistence == RELPERSISTENCE_TEMP ?
2015 flags);
2016
2017 pgaio_io_set_flag(ioh, ioh_flags);
2018
2019 /* ---
2020 * Even though we're trying to issue IO asynchronously, track the time
2021 * in smgrstartreadv():
2022 * - if io_method == IOMETHOD_SYNC, we will always perform the IO
2023 * immediately
2024 * - the io method might not support the IO (e.g. worker IO for a temp
2025 * table)
2026 * ---
2027 */
2029 smgrstartreadv(ioh, operation->smgr, forknum,
2030 blocknum + nblocks_done,
2031 io_pages, io_buffers_len);
2032 pgstat_count_io_op_time(io_object, io_context, IOOP_READ,
2033 io_start, 1, io_buffers_len * BLCKSZ);
2034
2035 if (persistence == RELPERSISTENCE_TEMP)
2036 pgBufferUsage.local_blks_read += io_buffers_len;
2037 else
2038 pgBufferUsage.shared_blks_read += io_buffers_len;
2039
2040 /*
2041 * Track vacuum cost when issuing IO, not after waiting for it.
2042 * Otherwise we could end up issuing a lot of IO in a short timespan,
2043 * despite a low cost limit.
2044 */
2045 if (VacuumCostActive)
2046 VacuumCostBalance += VacuumCostPageMiss * io_buffers_len;
2047
2048 *nblocks_progress = io_buffers_len;
2049 did_start_io = true;
2050 }
2051
2052 return did_start_io;
2053}
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:162
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition: aio.c:964
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
Definition: aio.c:366
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition: aio.c:330
void pgaio_submit_staged(void)
Definition: aio.c:1123
void pgaio_io_release(PgAioHandle *ioh)
Definition: aio.c:240
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:188
@ PGAIO_HCB_LOCAL_BUFFER_READV
Definition: aio.h:200
@ PGAIO_HCB_SHARED_BUFFER_READV
Definition: aio.h:198
@ PGAIO_HF_SYNCHRONOUS
Definition: aio.h:70
@ PGAIO_HF_REFERENCES_LOCAL
Definition: aio.h:60
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
Definition: aio_callback.c:140
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
Definition: aio_callback.c:86
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
bool track_io_timing
Definition: bufmgr.c:169
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4318
static bool ReadBuffersCanStartIO(Buffer buffer, bool nowait)
Definition: bufmgr.c:1639
bool zero_damaged_pages
Definition: bufmgr.c:166
#define READ_BUFFERS_ZERO_ON_ERROR
Definition: bufmgr.h:122
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:403
#define MAX_IO_COMBINE_LIMIT
Definition: bufmgr.h:173
#define READ_BUFFERS_IGNORE_CHECKSUM_FAILURES
Definition: bufmgr.h:126
#define READ_BUFFERS_SYNCHRONOUSLY
Definition: bufmgr.h:128
bool ignore_checksum_failure
Definition: bufpage.c:27
int16_t int16
Definition: c.h:547
#define unlikely(x)
Definition: c.h:418
IOContext IOContextForStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:747
int VacuumCostPageMiss
Definition: globals.c:152
bool VacuumCostActive
Definition: globals.c:158
int VacuumCostBalance
Definition: globals.c:157
int VacuumCostPageHit
Definition: globals.c:151
BufferUsage pgBufferUsage
Definition: instrument.c:20
int i
Definition: isn.c:77
IOObject
Definition: pgstat.h:276
@ IOOBJECT_RELATION
Definition: pgstat.h:277
@ IOOBJECT_TEMP_RELATION
Definition: pgstat.h:278
IOContext
Definition: pgstat.h:285
@ IOCONTEXT_NORMAL
Definition: pgstat.h:289
@ IOOP_READ
Definition: pgstat.h:315
@ IOOP_HIT
Definition: pgstat.h:309
#define pgstat_count_buffer_hit(rel)
Definition: pgstat.h:720
void pgstat_prepare_report_checksum_failure(Oid dboid)
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:91
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:68
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:122
ForkNumber
Definition: relpath.h:56
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
void smgrstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
Definition: smgr.c:753
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 shared_blks_hit
Definition: instrument.h:26
ForkNumber forknum
Definition: bufmgr.h:137
PgAioWaitRef io_wref
Definition: bufmgr.h:150
Buffer * buffers
Definition: bufmgr.h:145
SMgrRelation smgr
Definition: bufmgr.h:135
BufferAccessStrategy strategy
Definition: bufmgr.h:138
BlockNumber blocknum
Definition: bufmgr.h:146
PgAioReturn io_return
Definition: bufmgr.h:151
RelFileLocator locator
RelFileNumber relNumber
RelFileLocatorBackend smgr_rlocator
Definition: smgr.h:38

References Assert(), RelFileLocatorBackend::backend, ReadBuffersOperation::blocknum, BufferGetBlock(), BufferGetBlockNumber(), ReadBuffersOperation::buffers, CurrentResourceOwner, RelFileLocator::dbOid, ReadBuffersOperation::flags, ReadBuffersOperation::forknum, i, ignore_checksum_failure, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, IOOP_READ, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, RelFileLocatorBackend::locator, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_HF_REFERENCES_LOCAL, PGAIO_HF_SYNCHRONOUS, pgaio_io_acquire(), pgaio_io_acquire_nb(), pgaio_io_get_wref(), pgaio_io_register_callbacks(), pgaio_io_release(), pgaio_io_set_flag(), pgaio_io_set_handle_data_32(), pgaio_submit_staged(), pgaio_wref_clear(), pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_io_op(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_prepare_report_checksum_failure(), READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersCanStartIO(), ReadBuffersOperation::rel, RelFileLocator::relNumber, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, ReadBuffersOperation::smgr, SMgrRelationData::smgr_rlocator, smgrstartreadv(), RelFileLocator::spcOid, ReadBuffersOperation::strategy, track_io_timing, unlikely, VacuumCostActive, VacuumCostBalance, VacuumCostPageHit, VacuumCostPageMiss, and zero_damaged_pages.

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ AtEOXact_Buffers()

void AtEOXact_Buffers ( bool  isCommit)

Definition at line 4084 of file bufmgr.c.

4085{
4087
4088 AtEOXact_LocalBuffers(isCommit);
4089
4091}
static void CheckForBufferLeaks(void)
Definition: bufmgr.c:4154
static int32 PrivateRefCountOverflowed
Definition: bufmgr.c:241
void AtEOXact_LocalBuffers(bool isCommit)
Definition: localbuf.c:1003

References Assert(), AtEOXact_LocalBuffers(), CheckForBufferLeaks(), and PrivateRefCountOverflowed.

Referenced by AbortTransaction(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), CommitTransaction(), PrepareTransaction(), and WalWriterMain().

◆ AtProcExit_Buffers()

static void AtProcExit_Buffers ( int  code,
Datum  arg 
)
static

Definition at line 4136 of file bufmgr.c.

4137{
4138 UnlockBuffers();
4139
4141
4142 /* localbuf.c needs a chance too */
4144}
void UnlockBuffers(void)
Definition: bufmgr.c:5668
void AtProcExit_LocalBuffers(void)
Definition: localbuf.c:1014

References AtProcExit_LocalBuffers(), CheckForBufferLeaks(), and UnlockBuffers().

Referenced by InitBufferManagerAccess().

◆ BgBufferSync()

bool BgBufferSync ( WritebackContext wb_context)

Definition at line 3716 of file bufmgr.c.

3717{
3718 /* info obtained from freelist.c */
3719 int strategy_buf_id;
3720 uint32 strategy_passes;
3721 uint32 recent_alloc;
3722
3723 /*
3724 * Information saved between calls so we can determine the strategy
3725 * point's advance rate and avoid scanning already-cleaned buffers.
3726 */
3727 static bool saved_info_valid = false;
3728 static int prev_strategy_buf_id;
3729 static uint32 prev_strategy_passes;
3730 static int next_to_clean;
3731 static uint32 next_passes;
3732
3733 /* Moving averages of allocation rate and clean-buffer density */
3734 static float smoothed_alloc = 0;
3735 static float smoothed_density = 10.0;
3736
3737 /* Potentially these could be tunables, but for now, not */
3738 float smoothing_samples = 16;
3739 float scan_whole_pool_milliseconds = 120000.0;
3740
3741 /* Used to compute how far we scan ahead */
3742 long strategy_delta;
3743 int bufs_to_lap;
3744 int bufs_ahead;
3745 float scans_per_alloc;
3746 int reusable_buffers_est;
3747 int upcoming_alloc_est;
3748 int min_scan_buffers;
3749
3750 /* Variables for the scanning loop proper */
3751 int num_to_scan;
3752 int num_written;
3753 int reusable_buffers;
3754
3755 /* Variables for final smoothed_density update */
3756 long new_strategy_delta;
3757 uint32 new_recent_alloc;
3758
3759 /*
3760 * Find out where the clock-sweep currently is, and how many buffer
3761 * allocations have happened since our last call.
3762 */
3763 strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
3764
3765 /* Report buffer alloc counts to pgstat */
3766 PendingBgWriterStats.buf_alloc += recent_alloc;
3767
3768 /*
3769 * If we're not running the LRU scan, just stop after doing the stats
3770 * stuff. We mark the saved state invalid so that we can recover sanely
3771 * if LRU scan is turned back on later.
3772 */
3773 if (bgwriter_lru_maxpages <= 0)
3774 {
3775 saved_info_valid = false;
3776 return true;
3777 }
3778
3779 /*
3780 * Compute strategy_delta = how many buffers have been scanned by the
3781 * clock-sweep since last time. If first time through, assume none. Then
3782 * see if we are still ahead of the clock-sweep, and if so, how many
3783 * buffers we could scan before we'd catch up with it and "lap" it. Note:
3784 * weird-looking coding of xxx_passes comparisons are to avoid bogus
3785 * behavior when the passes counts wrap around.
3786 */
3787 if (saved_info_valid)
3788 {
3789 int32 passes_delta = strategy_passes - prev_strategy_passes;
3790
3791 strategy_delta = strategy_buf_id - prev_strategy_buf_id;
3792 strategy_delta += (long) passes_delta * NBuffers;
3793
3794 Assert(strategy_delta >= 0);
3795
3796 if ((int32) (next_passes - strategy_passes) > 0)
3797 {
3798 /* we're one pass ahead of the strategy point */
3799 bufs_to_lap = strategy_buf_id - next_to_clean;
3800#ifdef BGW_DEBUG
3801 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3802 next_passes, next_to_clean,
3803 strategy_passes, strategy_buf_id,
3804 strategy_delta, bufs_to_lap);
3805#endif
3806 }
3807 else if (next_passes == strategy_passes &&
3808 next_to_clean >= strategy_buf_id)
3809 {
3810 /* on same pass, but ahead or at least not behind */
3811 bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);
3812#ifdef BGW_DEBUG
3813 elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",
3814 next_passes, next_to_clean,
3815 strategy_passes, strategy_buf_id,
3816 strategy_delta, bufs_to_lap);
3817#endif
3818 }
3819 else
3820 {
3821 /*
3822 * We're behind, so skip forward to the strategy point and start
3823 * cleaning from there.
3824 */
3825#ifdef BGW_DEBUG
3826 elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",
3827 next_passes, next_to_clean,
3828 strategy_passes, strategy_buf_id,
3829 strategy_delta);
3830#endif
3831 next_to_clean = strategy_buf_id;
3832 next_passes = strategy_passes;
3833 bufs_to_lap = NBuffers;
3834 }
3835 }
3836 else
3837 {
3838 /*
3839 * Initializing at startup or after LRU scanning had been off. Always
3840 * start at the strategy point.
3841 */
3842#ifdef BGW_DEBUG
3843 elog(DEBUG2, "bgwriter initializing: strategy %u-%u",
3844 strategy_passes, strategy_buf_id);
3845#endif
3846 strategy_delta = 0;
3847 next_to_clean = strategy_buf_id;
3848 next_passes = strategy_passes;
3849 bufs_to_lap = NBuffers;
3850 }
3851
3852 /* Update saved info for next time */
3853 prev_strategy_buf_id = strategy_buf_id;
3854 prev_strategy_passes = strategy_passes;
3855 saved_info_valid = true;
3856
3857 /*
3858 * Compute how many buffers had to be scanned for each new allocation, ie,
3859 * 1/density of reusable buffers, and track a moving average of that.
3860 *
3861 * If the strategy point didn't move, we don't update the density estimate
3862 */
3863 if (strategy_delta > 0 && recent_alloc > 0)
3864 {
3865 scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
3866 smoothed_density += (scans_per_alloc - smoothed_density) /
3867 smoothing_samples;
3868 }
3869
3870 /*
3871 * Estimate how many reusable buffers there are between the current
3872 * strategy point and where we've scanned ahead to, based on the smoothed
3873 * density estimate.
3874 */
3875 bufs_ahead = NBuffers - bufs_to_lap;
3876 reusable_buffers_est = (float) bufs_ahead / smoothed_density;
3877
3878 /*
3879 * Track a moving average of recent buffer allocations. Here, rather than
3880 * a true average we want a fast-attack, slow-decline behavior: we
3881 * immediately follow any increase.
3882 */
3883 if (smoothed_alloc <= (float) recent_alloc)
3884 smoothed_alloc = recent_alloc;
3885 else
3886 smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /
3887 smoothing_samples;
3888
3889 /* Scale the estimate by a GUC to allow more aggressive tuning. */
3890 upcoming_alloc_est = (int) (smoothed_alloc * bgwriter_lru_multiplier);
3891
3892 /*
3893 * If recent_alloc remains at zero for many cycles, smoothed_alloc will
3894 * eventually underflow to zero, and the underflows produce annoying
3895 * kernel warnings on some platforms. Once upcoming_alloc_est has gone to
3896 * zero, there's no point in tracking smaller and smaller values of
3897 * smoothed_alloc, so just reset it to exactly zero to avoid this
3898 * syndrome. It will pop back up as soon as recent_alloc increases.
3899 */
3900 if (upcoming_alloc_est == 0)
3901 smoothed_alloc = 0;
3902
3903 /*
3904 * Even in cases where there's been little or no buffer allocation
3905 * activity, we want to make a small amount of progress through the buffer
3906 * cache so that as many reusable buffers as possible are clean after an
3907 * idle period.
3908 *
3909 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times
3910 * the BGW will be called during the scan_whole_pool time; slice the
3911 * buffer pool into that many sections.
3912 */
3913 min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));
3914
3915 if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))
3916 {
3917#ifdef BGW_DEBUG
3918 elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",
3919 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);
3920#endif
3921 upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;
3922 }
3923
3924 /*
3925 * Now write out dirty reusable buffers, working forward from the
3926 * next_to_clean point, until we have lapped the strategy scan, or cleaned
3927 * enough buffers to match our estimate of the next cycle's allocation
3928 * requirements, or hit the bgwriter_lru_maxpages limit.
3929 */
3930
3931 num_to_scan = bufs_to_lap;
3932 num_written = 0;
3933 reusable_buffers = reusable_buffers_est;
3934
3935 /* Execute the LRU scan */
3936 while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
3937 {
3938 int sync_state = SyncOneBuffer(next_to_clean, true,
3939 wb_context);
3940
3941 if (++next_to_clean >= NBuffers)
3942 {
3943 next_to_clean = 0;
3944 next_passes++;
3945 }
3946 num_to_scan--;
3947
3948 if (sync_state & BUF_WRITTEN)
3949 {
3950 reusable_buffers++;
3951 if (++num_written >= bgwriter_lru_maxpages)
3952 {
3954 break;
3955 }
3956 }
3957 else if (sync_state & BUF_REUSABLE)
3958 reusable_buffers++;
3959 }
3960
3962
3963#ifdef BGW_DEBUG
3964 elog(DEBUG1, "bgwriter: recent_alloc=%u smoothed=%.2f delta=%ld ahead=%d density=%.2f reusable_est=%d upcoming_est=%d scanned=%d wrote=%d reusable=%d",
3965 recent_alloc, smoothed_alloc, strategy_delta, bufs_ahead,
3966 smoothed_density, reusable_buffers_est, upcoming_alloc_est,
3967 bufs_to_lap - num_to_scan,
3968 num_written,
3969 reusable_buffers - reusable_buffers_est);
3970#endif
3971
3972 /*
3973 * Consider the above scan as being like a new allocation scan.
3974 * Characterize its density and update the smoothed one based on it. This
3975 * effectively halves the moving average period in cases where both the
3976 * strategy and the background writer are doing some useful scanning,
3977 * which is helpful because a long memory isn't as desirable on the
3978 * density estimates.
3979 */
3980 new_strategy_delta = bufs_to_lap - num_to_scan;
3981 new_recent_alloc = reusable_buffers - reusable_buffers_est;
3982 if (new_strategy_delta > 0 && new_recent_alloc > 0)
3983 {
3984 scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
3985 smoothed_density += (scans_per_alloc - smoothed_density) /
3986 smoothing_samples;
3987
3988#ifdef BGW_DEBUG
3989 elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
3990 new_recent_alloc, new_strategy_delta,
3991 scans_per_alloc, smoothed_density);
3992#endif
3993 }
3994
3995 /* Return true if OK to hibernate */
3996 return (bufs_to_lap == 0 && recent_alloc == 0);
3997}
int BgWriterDelay
Definition: bgwriter.c:58
#define BUF_REUSABLE
Definition: bufmgr.c:81
double bgwriter_lru_multiplier
Definition: bufmgr.c:168
static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
Definition: bufmgr.c:4014
int bgwriter_lru_maxpages
Definition: bufmgr.c:167
#define BUF_WRITTEN
Definition: bufmgr.c:80
int32_t int32
Definition: c.h:548
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:226
int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
Definition: freelist.c:321
int NBuffers
Definition: globals.c:142
PgStat_BgWriterStats PendingBgWriterStats
PgStat_Counter buf_written_clean
Definition: pgstat.h:242
PgStat_Counter maxwritten_clean
Definition: pgstat.h:243
PgStat_Counter buf_alloc
Definition: pgstat.h:244

References Assert(), bgwriter_lru_maxpages, bgwriter_lru_multiplier, BgWriterDelay, PgStat_BgWriterStats::buf_alloc, BUF_REUSABLE, BUF_WRITTEN, PgStat_BgWriterStats::buf_written_clean, DEBUG1, DEBUG2, elog, PgStat_BgWriterStats::maxwritten_clean, NBuffers, PendingBgWriterStats, StrategySyncStart(), and SyncOneBuffer().

Referenced by BackgroundWriterMain().

◆ buffer_readv_complete()

static pg_attribute_always_inline PgAioResult buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data,
bool  is_temp 
)
static

Definition at line 7469 of file bufmgr.c.

7471{
7472 PgAioResult result = prior_result;
7474 uint8 first_error_off = 0;
7475 uint8 first_zeroed_off = 0;
7476 uint8 first_ignored_off = 0;
7477 uint8 error_count = 0;
7478 uint8 zeroed_count = 0;
7479 uint8 ignored_count = 0;
7480 uint8 checkfail_count = 0;
7481 uint64 *io_data;
7482 uint8 handle_data_len;
7483
7484 if (is_temp)
7485 {
7486 Assert(td->smgr.is_temp);
7488 }
7489 else
7490 Assert(!td->smgr.is_temp);
7491
7492 /*
7493 * Iterate over all the buffers affected by this IO and call the
7494 * per-buffer completion function for each buffer.
7495 */
7496 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
7497 for (uint8 buf_off = 0; buf_off < handle_data_len; buf_off++)
7498 {
7499 Buffer buf = io_data[buf_off];
7500 bool failed;
7501 bool failed_verification = false;
7502 bool failed_checksum = false;
7503 bool zeroed_buffer = false;
7504 bool ignored_checksum = false;
7505
7507
7508 /*
7509 * If the entire I/O failed on a lower-level, each buffer needs to be
7510 * marked as failed. In case of a partial read, the first few buffers
7511 * may be ok.
7512 */
7513 failed =
7514 prior_result.status == PGAIO_RS_ERROR
7515 || prior_result.result <= buf_off;
7516
7517 buffer_readv_complete_one(td, buf_off, buf, cb_data, failed, is_temp,
7518 &failed_verification,
7519 &failed_checksum,
7520 &ignored_checksum,
7521 &zeroed_buffer);
7522
7523 /*
7524 * Track information about the number of different kinds of error
7525 * conditions across all pages, as there can be multiple pages failing
7526 * verification as part of one IO.
7527 */
7528 if (failed_verification && !zeroed_buffer && error_count++ == 0)
7529 first_error_off = buf_off;
7530 if (zeroed_buffer && zeroed_count++ == 0)
7531 first_zeroed_off = buf_off;
7532 if (ignored_checksum && ignored_count++ == 0)
7533 first_ignored_off = buf_off;
7534 if (failed_checksum)
7535 checkfail_count++;
7536 }
7537
7538 /*
7539 * If the smgr read succeeded [partially] and page verification failed for
7540 * some of the pages, adjust the IO's result state appropriately.
7541 */
7542 if (prior_result.status != PGAIO_RS_ERROR &&
7543 (error_count > 0 || ignored_count > 0 || zeroed_count > 0))
7544 {
7545 buffer_readv_encode_error(&result, is_temp,
7546 zeroed_count > 0, ignored_count > 0,
7547 error_count, zeroed_count, checkfail_count,
7548 first_error_off, first_zeroed_off,
7549 first_ignored_off);
7550 pgaio_result_report(result, td, DEBUG1);
7551 }
7552
7553 /*
7554 * For shared relations this reporting is done in
7555 * shared_buffer_readv_complete_local().
7556 */
7557 if (is_temp && checkfail_count > 0)
7559 checkfail_count);
7560
7561 return result;
7562}
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
Definition: aio.c:355
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
Definition: aio_callback.c:156
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
Definition: aio_callback.c:173
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
Definition: aio_target.c:73
@ PGAIO_RS_ERROR
Definition: aio_types.h:84
static pg_attribute_always_inline void buffer_readv_complete_one(PgAioTargetData *td, uint8 buf_off, Buffer buffer, uint8 flags, bool failed, bool is_temp, bool *buffer_invalid, bool *failed_checksum, bool *ignored_checksum, bool *zeroed_buffer)
Definition: bufmgr.c:7325
static void buffer_readv_encode_error(PgAioResult *result, bool is_temp, bool zeroed_any, bool ignored_any, uint8 error_count, uint8 zeroed_count, uint8 checkfail_count, uint8 first_error_off, uint8 first_zeroed_off, uint8 first_ignored_off)
Definition: bufmgr.c:7230
uint8_t uint8
Definition: c.h:550
uint64_t uint64
Definition: c.h:553
ProcNumber MyProcNumber
Definition: globals.c:90
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
uint32 status
Definition: aio_types.h:108
int32 result
Definition: aio_types.h:113
RelFileLocator rlocator
Definition: aio_types.h:65
struct PgAioTargetData::@125 smgr

References Assert(), buf, buffer_readv_complete_one(), buffer_readv_encode_error(), BufferIsValid(), RelFileLocator::dbOid, DEBUG1, PgAioTargetData::is_temp, MyProcNumber, pgaio_io_get_handle_data(), pgaio_io_get_owner(), pgaio_io_get_target_data(), pgaio_result_report(), PGAIO_RS_ERROR, pgstat_report_checksum_failures_in_db(), PgAioResult::result, PgAioTargetData::rlocator, PgAioTargetData::smgr, and PgAioResult::status.

Referenced by local_buffer_readv_complete(), and shared_buffer_readv_complete().

◆ buffer_readv_complete_one()

static pg_attribute_always_inline void buffer_readv_complete_one ( PgAioTargetData td,
uint8  buf_off,
Buffer  buffer,
uint8  flags,
bool  failed,
bool  is_temp,
bool *  buffer_invalid,
bool *  failed_checksum,
bool *  ignored_checksum,
bool *  zeroed_buffer 
)
static

Definition at line 7325 of file bufmgr.c.

7331{
7332 BufferDesc *buf_hdr = is_temp ?
7333 GetLocalBufferDescriptor(-buffer - 1)
7334 : GetBufferDescriptor(buffer - 1);
7335 BufferTag tag = buf_hdr->tag;
7336 char *bufdata = BufferGetBlock(buffer);
7337 uint32 set_flag_bits;
7338 int piv_flags;
7339
7340 /* check that the buffer is in the expected state for a read */
7341#ifdef USE_ASSERT_CHECKING
7342 {
7343 uint32 buf_state = pg_atomic_read_u32(&buf_hdr->state);
7344
7345 Assert(buf_state & BM_TAG_VALID);
7346 Assert(!(buf_state & BM_VALID));
7347 /* temp buffers don't use BM_IO_IN_PROGRESS */
7348 if (!is_temp)
7349 Assert(buf_state & BM_IO_IN_PROGRESS);
7350 Assert(!(buf_state & BM_DIRTY));
7351 }
7352#endif
7353
7354 *buffer_invalid = false;
7355 *failed_checksum = false;
7356 *ignored_checksum = false;
7357 *zeroed_buffer = false;
7358
7359 /*
7360 * We ask PageIsVerified() to only log the message about checksum errors,
7361 * as the completion might be run in any backend (or IO workers). We will
7362 * report checksum errors in buffer_readv_report().
7363 */
7364 piv_flags = PIV_LOG_LOG;
7365
7366 /* the local zero_damaged_pages may differ from the definer's */
7368 piv_flags |= PIV_IGNORE_CHECKSUM_FAILURE;
7369
7370 /* Check for garbage data. */
7371 if (!failed)
7372 {
7373 /*
7374 * If the buffer is not currently pinned by this backend, e.g. because
7375 * we're completing this IO after an error, the buffer data will have
7376 * been marked as inaccessible when the buffer was unpinned. The AIO
7377 * subsystem holds a pin, but that doesn't prevent the buffer from
7378 * having been marked as inaccessible. The completion might also be
7379 * executed in a different process.
7380 */
7381#ifdef USE_VALGRIND
7382 if (!BufferIsPinned(buffer))
7383 VALGRIND_MAKE_MEM_DEFINED(bufdata, BLCKSZ);
7384#endif
7385
7386 if (!PageIsVerified((Page) bufdata, tag.blockNum, piv_flags,
7387 failed_checksum))
7388 {
7389 if (flags & READ_BUFFERS_ZERO_ON_ERROR)
7390 {
7391 memset(bufdata, 0, BLCKSZ);
7392 *zeroed_buffer = true;
7393 }
7394 else
7395 {
7396 *buffer_invalid = true;
7397 /* mark buffer as having failed */
7398 failed = true;
7399 }
7400 }
7401 else if (*failed_checksum)
7402 *ignored_checksum = true;
7403
7404 /* undo what we did above */
7405#ifdef USE_VALGRIND
7406 if (!BufferIsPinned(buffer))
7407 VALGRIND_MAKE_MEM_NOACCESS(bufdata, BLCKSZ);
7408#endif
7409
7410 /*
7411 * Immediately log a message about the invalid page, but only to the
7412 * server log. The reason to do so immediately is that this may be
7413 * executed in a different backend than the one that originated the
7414 * request. The reason to do so immediately is that the originator
7415 * might not process the query result immediately (because it is busy
7416 * doing another part of query processing) or at all (e.g. if it was
7417 * cancelled or errored out due to another IO also failing). The
7418 * definer of the IO will emit an ERROR or WARNING when processing the
7419 * IO's results
7420 *
7421 * To avoid duplicating the code to emit these log messages, we reuse
7422 * buffer_readv_report().
7423 */
7424 if (*buffer_invalid || *failed_checksum || *zeroed_buffer)
7425 {
7426 PgAioResult result_one = {0};
7427
7428 buffer_readv_encode_error(&result_one, is_temp,
7429 *zeroed_buffer,
7430 *ignored_checksum,
7431 *buffer_invalid,
7432 *zeroed_buffer ? 1 : 0,
7433 *failed_checksum ? 1 : 0,
7434 buf_off, buf_off, buf_off);
7435 pgaio_result_report(result_one, td, LOG_SERVER_ONLY);
7436 }
7437 }
7438
7439 /* Terminate I/O and set BM_VALID. */
7440 set_flag_bits = failed ? BM_IO_ERROR : BM_VALID;
7441 if (is_temp)
7442 TerminateLocalBufferIO(buf_hdr, false, set_flag_bits, true);
7443 else
7444 TerminateBufferIO(buf_hdr, false, set_flag_bits, false, true);
7445
7446 /*
7447 * Call the BUFFER_READ_DONE tracepoint in the callback, even though the
7448 * callback may not be executed in the same backend that called
7449 * BUFFER_READ_START. The alternative would be to defer calling the
7450 * tracepoint to a later point (e.g. the local completion callback for
7451 * shared buffer reads), which seems even less helpful.
7452 */
7453 TRACE_POSTGRESQL_BUFFER_READ_DONE(tag.forkNum,
7454 tag.blockNum,
7455 tag.spcOid,
7456 tag.dbOid,
7457 tag.relNumber,
7459 false);
7460}
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
#define BufferIsPinned(bufnum)
Definition: bufmgr.c:577
bool PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_failure_p)
Definition: bufpage.c:94
#define PIV_LOG_LOG
Definition: bufpage.h:468
PageData * Page
Definition: bufpage.h:81
#define PIV_IGNORE_CHECKSUM_FAILURE
Definition: bufpage.h:469
#define LOG_SERVER_ONLY
Definition: elog.h:32
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint32 set_flag_bits, bool release_aio)
Definition: localbuf.c:562
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
Definition: memdebug.h:26
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
Definition: memdebug.h:27
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
pg_atomic_uint32 state
RelFileNumber relNumber
ForkNumber forkNum
Oid spcOid

References Assert(), buftag::blockNum, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, buffer_readv_encode_error(), BufferGetBlock(), BufferIsPinned, buftag::dbOid, buftag::forkNum, GetBufferDescriptor(), GetLocalBufferDescriptor(), INVALID_PROC_NUMBER, LOG_SERVER_ONLY, MyProcNumber, PageIsVerified(), pg_atomic_read_u32(), pgaio_result_report(), PIV_IGNORE_CHECKSUM_FAILURE, PIV_LOG_LOG, READ_BUFFERS_IGNORE_CHECKSUM_FAILURES, READ_BUFFERS_ZERO_ON_ERROR, buftag::relNumber, buftag::spcOid, BufferDesc::state, BufferDesc::tag, TerminateBufferIO(), TerminateLocalBufferIO(), VALGRIND_MAKE_MEM_DEFINED, and VALGRIND_MAKE_MEM_NOACCESS.

Referenced by buffer_readv_complete().

◆ buffer_readv_decode_error()

static void buffer_readv_decode_error ( PgAioResult  result,
bool *  zeroed_any,
bool *  ignored_any,
uint8 zeroed_or_error_count,
uint8 checkfail_count,
uint8 first_off 
)
inlinestatic

Definition at line 7188 of file bufmgr.c.

7194{
7195 uint32 rem_error = result.error_data;
7196
7197 /* see static asserts in buffer_readv_encode_error */
7198#define READV_COUNT_BITS 7
7199#define READV_COUNT_MASK ((1 << READV_COUNT_BITS) - 1)
7200
7201 *zeroed_any = rem_error & 1;
7202 rem_error >>= 1;
7203
7204 *ignored_any = rem_error & 1;
7205 rem_error >>= 1;
7206
7207 *zeroed_or_error_count = rem_error & READV_COUNT_MASK;
7208 rem_error >>= READV_COUNT_BITS;
7209
7210 *checkfail_count = rem_error & READV_COUNT_MASK;
7211 rem_error >>= READV_COUNT_BITS;
7212
7213 *first_off = rem_error & READV_COUNT_MASK;
7214 rem_error >>= READV_COUNT_BITS;
7215}
#define READV_COUNT_BITS
#define READV_COUNT_MASK
uint32 error_data
Definition: aio_types.h:111

References PgAioResult::error_data, READV_COUNT_BITS, and READV_COUNT_MASK.

Referenced by buffer_readv_encode_error(), buffer_readv_report(), and shared_buffer_readv_complete_local().

◆ buffer_readv_encode_error()

static void buffer_readv_encode_error ( PgAioResult result,
bool  is_temp,
bool  zeroed_any,
bool  ignored_any,
uint8  error_count,
uint8  zeroed_count,
uint8  checkfail_count,
uint8  first_error_off,
uint8  first_zeroed_off,
uint8  first_ignored_off 
)
inlinestatic

Definition at line 7230 of file bufmgr.c.

7240{
7241
7242 uint8 shift = 0;
7243 uint8 zeroed_or_error_count =
7244 error_count > 0 ? error_count : zeroed_count;
7245 uint8 first_off;
7246
7248 "PG_IOV_MAX is bigger than reserved space for error data");
7250 "PGAIO_RESULT_ERROR_BITS is insufficient for buffer_readv");
7251
7252 /*
7253 * We only have space to encode one offset - but luckily that's good
7254 * enough. If there is an error, the error is the interesting offset, same
7255 * with a zeroed buffer vs an ignored buffer.
7256 */
7257 if (error_count > 0)
7258 first_off = first_error_off;
7259 else if (zeroed_count > 0)
7260 first_off = first_zeroed_off;
7261 else
7262 first_off = first_ignored_off;
7263
7264 Assert(!zeroed_any || error_count == 0);
7265
7266 result->error_data = 0;
7267
7268 result->error_data |= zeroed_any << shift;
7269 shift += 1;
7270
7271 result->error_data |= ignored_any << shift;
7272 shift += 1;
7273
7274 result->error_data |= ((uint32) zeroed_or_error_count) << shift;
7275 shift += READV_COUNT_BITS;
7276
7277 result->error_data |= ((uint32) checkfail_count) << shift;
7278 shift += READV_COUNT_BITS;
7279
7280 result->error_data |= ((uint32) first_off) << shift;
7281 shift += READV_COUNT_BITS;
7282
7283 result->id = is_temp ? PGAIO_HCB_LOCAL_BUFFER_READV :
7285
7286 if (error_count > 0)
7287 result->status = PGAIO_RS_ERROR;
7288 else
7289 result->status = PGAIO_RS_WARNING;
7290
7291 /*
7292 * The encoding is complicated enough to warrant cross-checking it against
7293 * the decode function.
7294 */
7295#ifdef USE_ASSERT_CHECKING
7296 {
7297 bool zeroed_any_2,
7298 ignored_any_2;
7299 uint8 zeroed_or_error_count_2,
7300 checkfail_count_2,
7301 first_off_2;
7302
7304 &zeroed_any_2, &ignored_any_2,
7305 &zeroed_or_error_count_2,
7306 &checkfail_count_2,
7307 &first_off_2);
7308 Assert(zeroed_any == zeroed_any_2);
7309 Assert(ignored_any == ignored_any_2);
7310 Assert(zeroed_or_error_count == zeroed_or_error_count_2);
7311 Assert(checkfail_count == checkfail_count_2);
7312 Assert(first_off == first_off_2);
7313 }
7314#endif
7315
7316#undef READV_COUNT_BITS
7317#undef READV_COUNT_MASK
7318}
#define PGAIO_RESULT_ERROR_BITS
Definition: aio_types.h:98
@ PGAIO_RS_WARNING
Definition: aio_types.h:83
static void buffer_readv_decode_error(PgAioResult result, bool *zeroed_any, bool *ignored_any, uint8 *zeroed_or_error_count, uint8 *checkfail_count, uint8 *first_off)
Definition: bufmgr.c:7188
#define StaticAssertDecl(condition, errmessage)
Definition: c.h:940
#define PG_IOV_MAX
Definition: pg_iovec.h:47
uint32 id
Definition: aio_types.h:105

References Assert(), buffer_readv_decode_error(), PgAioResult::error_data, PgAioResult::id, PG_IOV_MAX, PGAIO_HCB_LOCAL_BUFFER_READV, PGAIO_HCB_SHARED_BUFFER_READV, PGAIO_RESULT_ERROR_BITS, PGAIO_RS_ERROR, PGAIO_RS_WARNING, READV_COUNT_BITS, StaticAssertDecl, and PgAioResult::status.

Referenced by buffer_readv_complete(), and buffer_readv_complete_one().

◆ buffer_readv_report()

static void buffer_readv_report ( PgAioResult  result,
const PgAioTargetData td,
int  elevel 
)
static

Definition at line 7572 of file bufmgr.c.

7574{
7575 int nblocks = td->smgr.nblocks;
7576 BlockNumber first = td->smgr.blockNum;
7577 BlockNumber last = first + nblocks - 1;
7578 ProcNumber errProc =
7580 RelPathStr rpath =
7581 relpathbackend(td->smgr.rlocator, errProc, td->smgr.forkNum);
7582 bool zeroed_any,
7583 ignored_any;
7584 uint8 zeroed_or_error_count,
7585 checkfail_count,
7586 first_off;
7587 uint8 affected_count;
7588 const char *msg_one,
7589 *msg_mult,
7590 *det_mult,
7591 *hint_mult;
7592
7593 buffer_readv_decode_error(result, &zeroed_any, &ignored_any,
7594 &zeroed_or_error_count,
7595 &checkfail_count,
7596 &first_off);
7597
7598 /*
7599 * Treat a read that had both zeroed buffers *and* ignored checksums as a
7600 * special case, it's too irregular to be emitted the same way as the
7601 * other cases.
7602 */
7603 if (zeroed_any && ignored_any)
7604 {
7605 Assert(zeroed_any && ignored_any);
7606 Assert(nblocks > 1); /* same block can't be both zeroed and ignored */
7607 Assert(result.status != PGAIO_RS_ERROR);
7608 affected_count = zeroed_or_error_count;
7609
7610 ereport(elevel,
7612 errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation \"%s\"",
7613 affected_count, checkfail_count, first, last, rpath.str),
7614 affected_count > 1 ?
7615 errdetail("Block %u held the first zeroed page.",
7616 first + first_off) : 0,
7617 errhint_plural("See server log for details about the other %d invalid block.",
7618 "See server log for details about the other %d invalid blocks.",
7619 affected_count + checkfail_count - 1,
7620 affected_count + checkfail_count - 1));
7621 return;
7622 }
7623
7624 /*
7625 * The other messages are highly repetitive. To avoid duplicating a long
7626 * and complicated ereport(), gather the translated format strings
7627 * separately and then do one common ereport.
7628 */
7629 if (result.status == PGAIO_RS_ERROR)
7630 {
7631 Assert(!zeroed_any); /* can't have invalid pages when zeroing them */
7632 affected_count = zeroed_or_error_count;
7633 msg_one = _("invalid page in block %u of relation \"%s\"");
7634 msg_mult = _("%u invalid pages among blocks %u..%u of relation \"%s\"");
7635 det_mult = _("Block %u held the first invalid page.");
7636 hint_mult = _("See server log for the other %u invalid block(s).");
7637 }
7638 else if (zeroed_any && !ignored_any)
7639 {
7640 affected_count = zeroed_or_error_count;
7641 msg_one = _("invalid page in block %u of relation \"%s\"; zeroing out page");
7642 msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation \"%s\"");
7643 det_mult = _("Block %u held the first zeroed page.");
7644 hint_mult = _("See server log for the other %u zeroed block(s).");
7645 }
7646 else if (!zeroed_any && ignored_any)
7647 {
7648 affected_count = checkfail_count;
7649 msg_one = _("ignoring checksum failure in block %u of relation \"%s\"");
7650 msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation \"%s\"");
7651 det_mult = _("Block %u held the first ignored page.");
7652 hint_mult = _("See server log for the other %u ignored block(s).");
7653 }
7654 else
7656
7657 ereport(elevel,
7659 affected_count == 1 ?
7660 errmsg_internal(msg_one, first + first_off, rpath.str) :
7661 errmsg_internal(msg_mult, affected_count, first, last, rpath.str),
7662 affected_count > 1 ? errdetail_internal(det_mult, first + first_off) : 0,
7663 affected_count > 1 ? errhint_internal(hint_mult, affected_count - 1) : 0);
7664}
#define pg_unreachable()
Definition: c.h:347
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1170
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1243
int errhint_internal(const char *fmt,...)
Definition: elog.c:1352
int errhint_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1373
#define _(x)
Definition: elog.c:91
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
int ProcNumber
Definition: procnumber.h:24
#define relpathbackend(rlocator, backend, forknum)
Definition: relpath.h:141
char str[REL_PATH_STR_MAXLEN+1]
Definition: relpath.h:123
BlockNumber blockNum
Definition: aio_types.h:66
BlockNumber nblocks
Definition: aio_types.h:67
ForkNumber forkNum
Definition: aio_types.h:68

References _, Assert(), PgAioTargetData::blockNum, buffer_readv_decode_error(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errdetail(), errdetail_internal(), errhint_internal(), errhint_plural(), errmsg(), errmsg_internal(), PgAioTargetData::forkNum, INVALID_PROC_NUMBER, PgAioTargetData::is_temp, MyProcNumber, PgAioTargetData::nblocks, pg_unreachable, PGAIO_RS_ERROR, relpathbackend, PgAioTargetData::rlocator, PgAioTargetData::smgr, PgAioResult::status, and RelPathStr::str.

◆ buffer_stage_common()

static pg_attribute_always_inline void buffer_stage_common ( PgAioHandle ioh,
bool  is_write,
bool  is_temp 
)
static

Definition at line 7077 of file bufmgr.c.

7078{
7079 uint64 *io_data;
7080 uint8 handle_data_len;
7081 PgAioWaitRef io_ref;
7083
7084 io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
7085
7086 pgaio_io_get_wref(ioh, &io_ref);
7087
7088 /* iterate over all buffers affected by the vectored readv/writev */
7089 for (int i = 0; i < handle_data_len; i++)
7090 {
7091 Buffer buffer = (Buffer) io_data[i];
7092 BufferDesc *buf_hdr = is_temp ?
7093 GetLocalBufferDescriptor(-buffer - 1)
7094 : GetBufferDescriptor(buffer - 1);
7095 uint32 buf_state;
7096
7097 /*
7098 * Check that all the buffers are actually ones that could conceivably
7099 * be done in one IO, i.e. are sequential. This is the last
7100 * buffer-aware code before IO is actually executed and confusion
7101 * about which buffers are targeted by IO can be hard to debug, making
7102 * it worth doing extra-paranoid checks.
7103 */
7104 if (i == 0)
7105 first = buf_hdr->tag;
7106 else
7107 {
7108 Assert(buf_hdr->tag.relNumber == first.relNumber);
7109 Assert(buf_hdr->tag.blockNum == first.blockNum + i);
7110 }
7111
7112 if (is_temp)
7113 buf_state = pg_atomic_read_u32(&buf_hdr->state);
7114 else
7115 buf_state = LockBufHdr(buf_hdr);
7116
7117 /* verify the buffer is in the expected state */
7118 Assert(buf_state & BM_TAG_VALID);
7119 if (is_write)
7120 {
7121 Assert(buf_state & BM_VALID);
7122 Assert(buf_state & BM_DIRTY);
7123 }
7124 else
7125 {
7126 Assert(!(buf_state & BM_VALID));
7127 Assert(!(buf_state & BM_DIRTY));
7128 }
7129
7130 /* temp buffers don't use BM_IO_IN_PROGRESS */
7131 if (!is_temp)
7132 Assert(buf_state & BM_IO_IN_PROGRESS);
7133
7134 Assert(BUF_STATE_GET_REFCOUNT(buf_state) >= 1);
7135
7136 /*
7137 * Reflect that the buffer is now owned by the AIO subsystem.
7138 *
7139 * For local buffers: This can't be done just via LocalRefCount, as
7140 * one might initially think, as this backend could error out while
7141 * AIO is still in progress, releasing all the pins by the backend
7142 * itself.
7143 *
7144 * This pin is released again in TerminateBufferIO().
7145 */
7146 buf_hdr->io_wref = io_ref;
7147
7148 if (is_temp)
7149 {
7150 buf_state += BUF_REFCOUNT_ONE;
7151 pg_atomic_unlocked_write_u32(&buf_hdr->state, buf_state);
7152 }
7153 else
7154 UnlockBufHdrExt(buf_hdr, buf_state, 0, 0, 1);
7155
7156 /*
7157 * Ensure the content lock that prevents buffer modifications while
7158 * the buffer is being written out is not released early due to an
7159 * error.
7160 */
7161 if (is_write && !is_temp)
7162 {
7163 LWLock *content_lock;
7164
7165 content_lock = BufferDescriptorGetContentLock(buf_hdr);
7166
7167 Assert(LWLockHeldByMe(content_lock));
7168
7169 /*
7170 * Lock is now owned by AIO subsystem.
7171 */
7172 LWLockDisown(content_lock);
7173 }
7174
7175 /*
7176 * Stop tracking this buffer via the resowner - the AIO system now
7177 * keeps track.
7178 */
7179 if (!is_temp)
7181 }
7182}
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:295
#define BUF_REFCOUNT_ONE
Definition: buf_internals.h:51
static LWLock * BufferDescriptorGetContentLock(const BufferDesc *bdesc)
static uint32 UnlockBufHdrExt(BufferDesc *desc, uint32 old_buf_state, uint32 set_bits, uint32 unset_bits, int refcount_change)
static void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:229
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1977
void LWLockDisown(LWLock *lock)
Definition: lwlock.c:1883
PgAioWaitRef io_wref
Definition: lwlock.h:42

References Assert(), buftag::blockNum, BM_DIRTY, BM_IO_IN_PROGRESS, BM_TAG_VALID, BM_VALID, BUF_REFCOUNT_ONE, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, BufferDesc::io_wref, LockBufHdr(), LWLockDisown(), LWLockHeldByMe(), pg_atomic_read_u32(), pg_atomic_unlocked_write_u32(), PG_USED_FOR_ASSERTS_ONLY, pgaio_io_get_handle_data(), pgaio_io_get_wref(), buftag::relNumber, ResourceOwnerForgetBufferIO(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdrExt().

Referenced by local_buffer_readv_stage(), and shared_buffer_readv_stage().

◆ BufferAlloc()

static pg_attribute_always_inline BufferDesc * BufferAlloc ( SMgrRelation  smgr,
char  relpersistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool *  foundPtr,
IOContext  io_context 
)
inlinestatic

Definition at line 2075 of file bufmgr.c.

2079{
2080 BufferTag newTag; /* identity of requested block */
2081 uint32 newHash; /* hash value for newTag */
2082 LWLock *newPartitionLock; /* buffer partition lock for it */
2083 int existing_buf_id;
2084 Buffer victim_buffer;
2085 BufferDesc *victim_buf_hdr;
2086 uint32 victim_buf_state;
2087 uint32 set_bits = 0;
2088
2089 /* Make sure we will have room to remember the buffer pin */
2092
2093 /* create a tag so we can lookup the buffer */
2094 InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
2095
2096 /* determine its hash code and partition lock ID */
2097 newHash = BufTableHashCode(&newTag);
2098 newPartitionLock = BufMappingPartitionLock(newHash);
2099
2100 /* see if the block is in the buffer pool already */
2101 LWLockAcquire(newPartitionLock, LW_SHARED);
2102 existing_buf_id = BufTableLookup(&newTag, newHash);
2103 if (existing_buf_id >= 0)
2104 {
2105 BufferDesc *buf;
2106 bool valid;
2107
2108 /*
2109 * Found it. Now, pin the buffer so no one can steal it from the
2110 * buffer pool, and check to see if the correct data has been loaded
2111 * into the buffer.
2112 */
2113 buf = GetBufferDescriptor(existing_buf_id);
2114
2115 valid = PinBuffer(buf, strategy, false);
2116
2117 /* Can release the mapping lock as soon as we've pinned it */
2118 LWLockRelease(newPartitionLock);
2119
2120 *foundPtr = true;
2121
2122 if (!valid)
2123 {
2124 /*
2125 * We can only get here if (a) someone else is still reading in
2126 * the page, (b) a previous read attempt failed, or (c) someone
2127 * called StartReadBuffers() but not yet WaitReadBuffers().
2128 */
2129 *foundPtr = false;
2130 }
2131
2132 return buf;
2133 }
2134
2135 /*
2136 * Didn't find it in the buffer pool. We'll have to initialize a new
2137 * buffer. Remember to unlock the mapping lock while doing the work.
2138 */
2139 LWLockRelease(newPartitionLock);
2140
2141 /*
2142 * Acquire a victim buffer. Somebody else might try to do the same, we
2143 * don't hold any conflicting locks. If so we'll have to undo our work
2144 * later.
2145 */
2146 victim_buffer = GetVictimBuffer(strategy, io_context);
2147 victim_buf_hdr = GetBufferDescriptor(victim_buffer - 1);
2148
2149 /*
2150 * Try to make a hashtable entry for the buffer under its new tag. If
2151 * somebody else inserted another buffer for the tag, we'll release the
2152 * victim buffer we acquired and use the already inserted one.
2153 */
2154 LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
2155 existing_buf_id = BufTableInsert(&newTag, newHash, victim_buf_hdr->buf_id);
2156 if (existing_buf_id >= 0)
2157 {
2158 BufferDesc *existing_buf_hdr;
2159 bool valid;
2160
2161 /*
2162 * Got a collision. Someone has already done what we were about to do.
2163 * We'll just handle this as if it were found in the buffer pool in
2164 * the first place. First, give up the buffer we were planning to
2165 * use.
2166 *
2167 * We could do this after releasing the partition lock, but then we'd
2168 * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry()
2169 * before acquiring the lock, for the rare case of such a collision.
2170 */
2171 UnpinBuffer(victim_buf_hdr);
2172
2173 /* remaining code should match code at top of routine */
2174
2175 existing_buf_hdr = GetBufferDescriptor(existing_buf_id);
2176
2177 valid = PinBuffer(existing_buf_hdr, strategy, false);
2178
2179 /* Can release the mapping lock as soon as we've pinned it */
2180 LWLockRelease(newPartitionLock);
2181
2182 *foundPtr = true;
2183
2184 if (!valid)
2185 {
2186 /*
2187 * We can only get here if (a) someone else is still reading in
2188 * the page, (b) a previous read attempt failed, or (c) someone
2189 * called StartReadBuffers() but not yet WaitReadBuffers().
2190 */
2191 *foundPtr = false;
2192 }
2193
2194 return existing_buf_hdr;
2195 }
2196
2197 /*
2198 * Need to lock the buffer header too in order to change its tag.
2199 */
2200 victim_buf_state = LockBufHdr(victim_buf_hdr);
2201
2202 /* some sanity checks while we hold the buffer header lock */
2203 Assert(BUF_STATE_GET_REFCOUNT(victim_buf_state) == 1);
2204 Assert(!(victim_buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY | BM_IO_IN_PROGRESS)));
2205
2206 victim_buf_hdr->tag = newTag;
2207
2208 /*
2209 * Make sure BM_PERMANENT is set for buffers that must be written at every
2210 * checkpoint. Unlogged buffers only need to be written at shutdown
2211 * checkpoints, except for their "init" forks, which need to be treated
2212 * just like permanent relations.
2213 */
2214 set_bits |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
2215 if (relpersistence == RELPERSISTENCE_PERMANENT || forkNum == INIT_FORKNUM)
2216 set_bits |= BM_PERMANENT;
2217
2218 UnlockBufHdrExt(victim_buf_hdr, victim_buf_state,
2219 set_bits, 0, 0);
2220
2221 LWLockRelease(newPartitionLock);
2222
2223 /*
2224 * Buffer contents are currently invalid.
2225 */
2226 *foundPtr = false;
2227
2228 return victim_buf_hdr;
2229}
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BM_PERMANENT
Definition: buf_internals.h:77
#define BUF_USAGECOUNT_ONE
Definition: buf_internals.h:54
static LWLock * BufMappingPartitionLock(uint32 hashcode)
int BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:90
uint32 BufTableHashCode(BufferTag *tagPtr)
Definition: buf_table.c:78
int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
Definition: buf_table.c:118
static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
Definition: bufmgr.c:2414
static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, bool skip_if_not_valid)
Definition: bufmgr.c:3162
static void ReservePrivateRefCountEntry(void)
Definition: bufmgr.c:284
static void UnpinBuffer(BufferDesc *buf)
Definition: bufmgr.c:3341
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
@ INIT_FORKNUM
Definition: relpath.h:61
void ResourceOwnerEnlarge(ResourceOwner owner)
Definition: resowner.c:449

References Assert(), BM_DIRTY, BM_IO_IN_PROGRESS, BM_PERMANENT, BM_TAG_VALID, BM_VALID, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), BufTableLookup(), CurrentResourceOwner, GetBufferDescriptor(), GetVictimBuffer(), INIT_FORKNUM, InitBufferTag(), RelFileLocatorBackend::locator, LockBufHdr(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PinBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrRelationData::smgr_rlocator, BufferDesc::tag, UnlockBufHdrExt(), and UnpinBuffer().

Referenced by PinBufferForBlock().

◆ BufferGetBlockNumber()

BlockNumber BufferGetBlockNumber ( Buffer  buffer)

Definition at line 4318 of file bufmgr.c.

4319{
4320 BufferDesc *bufHdr;
4321
4322 Assert(BufferIsPinned(buffer));
4323
4324 if (BufferIsLocal(buffer))
4325 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4326 else
4327 bufHdr = GetBufferDescriptor(buffer - 1);
4328
4329 /* pinned, so OK to read tag without spinlock */
4330 return bufHdr->tag.blockNum;
4331}
#define BufferIsLocal(buffer)
Definition: buf.h:37

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by _bt_binsrch_insert(), _bt_bottomupdel_pass(), _bt_check_unique(), _bt_checkpage(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_doinsert(), _bt_finish_split(), _bt_getroot(), _bt_insert_parent(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_moveright(), _bt_newlevel(), _bt_pagedel(), _bt_readpage(), _bt_restore_meta(), _bt_search(), _bt_simpledel_pass(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_checkpage(), _hash_doinsert(), _hash_first(), _hash_freeovflpage(), _hash_getnewbuf(), _hash_readnext(), _hash_readpage(), _hash_splitbucket(), allocNewBuffer(), AsyncReadBuffers(), BitmapHeapScanNextBlock(), blinsert(), BloomInitMetapage(), brin_doinsert(), brin_doupdate(), brin_getinsertbuffer(), brin_initialize_empty_new_buffer(), brin_page_cleanup(), brin_xlog_insert_update(), brinbuild(), brinGetTupleForHeapBlock(), btvacuumpage(), check_index_page(), CheckReadBuffersOperation(), collect_corrupt_items(), collectMatchBitmap(), createPostingTree(), dataBeginPlaceToPageLeaf(), dataPrepareDownlink(), doPickSplit(), entryPrepareDownlink(), fill_seq_fork_with_data(), ginEntryInsert(), ginFindParents(), ginFinishSplit(), ginPlaceToPage(), ginRedoDeleteListPages(), ginRedoUpdateMetapage(), ginScanToDelete(), gistbufferinginserttuples(), gistbuild(), gistcheckpage(), gistdeletepage(), gistformdownlink(), gistinserttuples(), gistMemorizeAllDownlinks(), gistplacetopage(), gistRelocateBuildBuffersOnSplit(), gistScanPage(), gistvacuumpage(), hash_xlog_add_ovfl_page(), heap_delete(), heap_fetch_next_buffer(), heap_hot_search_buffer(), heap_insert(), heap_multi_insert(), heap_page_would_be_all_visible(), heap_prepare_pagescan(), heap_update(), heap_xlog_confirm(), heap_xlog_lock(), heapam_scan_analyze_next_block(), heapgettup(), heapgettup_pagemode(), index_compute_xid_horizon_for_tuples(), lazy_scan_heap(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum_heap_rel(), makeSublist(), moveLeafs(), moveRightIfItNeeded(), pgstathashindex(), prune_freeze_plan(), read_stream_start_pending_read(), ReadBufferBI(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), RelationPutHeapTuple(), revmap_get_buffer(), revmap_physical_extend(), ScanSourceDatabasePgClassPage(), spgAddNodeAction(), spgbuild(), spgdoinsert(), SpGistSetLastUsedPage(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), StartReadBuffersImpl(), startScanEntry(), terminate_brin_buildstate(), vacuumLeafPage(), verify_heapam(), visibilitymap_clear(), visibilitymap_get_status(), visibilitymap_pin(), visibilitymap_pin_ok(), visibilitymap_set(), and visibilitymap_set_vmbits().

◆ BufferGetLSNAtomic()

XLogRecPtr BufferGetLSNAtomic ( Buffer  buffer)

Definition at line 4594 of file bufmgr.c.

4595{
4596 char *page = BufferGetPage(buffer);
4597 BufferDesc *bufHdr;
4598 XLogRecPtr lsn;
4599
4600 /*
4601 * If we don't need locking for correctness, fastpath out.
4602 */
4603 if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer))
4604 return PageGetLSN(page);
4605
4606 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4607 Assert(BufferIsValid(buffer));
4608 Assert(BufferIsPinned(buffer));
4609
4610 bufHdr = GetBufferDescriptor(buffer - 1);
4611 LockBufHdr(bufHdr);
4612 lsn = PageGetLSN(page);
4613 UnlockBufHdr(bufHdr);
4614
4615 return lsn;
4616}
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
static XLogRecPtr PageGetLSN(const PageData *page)
Definition: bufpage.h:385
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
uint64 XLogRecPtr
Definition: xlogdefs.h:21

References Assert(), PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), LockBufHdr(), PageGetLSN(), UnlockBufHdr(), and XLogHintBitIsNeeded.

Referenced by _bt_drop_lock_and_maybe_pin(), _bt_killitems(), gistdoinsert(), gistFindPath(), gistkillitems(), gistScanPage(), SetHintBits(), and XLogSaveBufferForHint().

◆ BufferGetTag()

void BufferGetTag ( Buffer  buffer,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber blknum 
)

Definition at line 4339 of file bufmgr.c.

4341{
4342 BufferDesc *bufHdr;
4343
4344 /* Do the same checks as BufferGetBlockNumber. */
4345 Assert(BufferIsPinned(buffer));
4346
4347 if (BufferIsLocal(buffer))
4348 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
4349 else
4350 bufHdr = GetBufferDescriptor(buffer - 1);
4351
4352 /* pinned, so OK to read tag without spinlock */
4353 *rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
4354 *forknum = BufTagGetForkNum(&bufHdr->tag);
4355 *blknum = bufHdr->tag.blockNum;
4356}

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), and BufferDesc::tag.

Referenced by fsm_search_avail(), ginRedoInsertEntry(), heap_inplace_update_and_unlock(), log_newpage_buffer(), ResolveCminCmaxDuringDecoding(), XLogRegisterBuffer(), and XLogSaveBufferForHint().

◆ BufferIsDirty()

bool BufferIsDirty ( Buffer  buffer)

Definition at line 3005 of file bufmgr.c.

3006{
3007 BufferDesc *bufHdr;
3008
3009 Assert(BufferIsPinned(buffer));
3010
3011 if (BufferIsLocal(buffer))
3012 {
3013 int bufid = -buffer - 1;
3014
3015 bufHdr = GetLocalBufferDescriptor(bufid);
3016 /* Content locks are not maintained for local buffers. */
3017 }
3018 else
3019 {
3020 bufHdr = GetBufferDescriptor(buffer - 1);
3022 }
3023
3024 return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
3025}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:2963
@ BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:207

References Assert(), BM_DIRTY, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, GetBufferDescriptor(), GetLocalBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by heap_multi_insert(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), log_heap_prune_and_freeze(), and XLogRegisterBuffer().

◆ BufferIsLockedByMe()

bool BufferIsLockedByMe ( Buffer  buffer)

Definition at line 2937 of file bufmgr.c.

2938{
2939 BufferDesc *bufHdr;
2940
2941 Assert(BufferIsPinned(buffer));
2942
2943 if (BufferIsLocal(buffer))
2944 {
2945 /* Content locks are not maintained for local buffers. */
2946 return true;
2947 }
2948 else
2949 {
2950 bufHdr = GetBufferDescriptor(buffer - 1);
2952 }
2953}

References Assert(), PrivateRefCountEntry::buffer, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), and LWLockHeldByMe().

Referenced by FlushOneBuffer(), and MarkBufferDirtyHint().

◆ BufferIsLockedByMeInMode()

bool BufferIsLockedByMeInMode ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 2963 of file bufmgr.c.

2964{
2965 BufferDesc *bufHdr;
2966
2967 Assert(BufferIsPinned(buffer));
2968
2969 if (BufferIsLocal(buffer))
2970 {
2971 /* Content locks are not maintained for local buffers. */
2972 return true;
2973 }
2974 else
2975 {
2976 LWLockMode lw_mode;
2977
2978 switch (mode)
2979 {
2981 lw_mode = LW_EXCLUSIVE;
2982 break;
2983 case BUFFER_LOCK_SHARE:
2984 lw_mode = LW_SHARED;
2985 break;
2986 default:
2988 }
2989
2990 bufHdr = GetBufferDescriptor(buffer - 1);
2992 lw_mode);
2993 }
2994}
@ BUFFER_LOCK_SHARE
Definition: bufmgr.h:206
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2021
LWLockMode
Definition: lwlock.h:111
static PgChecksumMode mode
Definition: pg_checksums.c:56

References Assert(), PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockHeldByMeInMode(), mode, and pg_unreachable.

Referenced by BufferIsDirty(), IsBufferCleanupOK(), MarkBufferDirty(), visibilitymap_set(), visibilitymap_set_vmbits(), and XLogRegisterBuffer().

◆ BufferIsPermanent()

bool BufferIsPermanent ( Buffer  buffer)

Definition at line 4564 of file bufmgr.c.

4565{
4566 BufferDesc *bufHdr;
4567
4568 /* Local buffers are used only for temp relations. */
4569 if (BufferIsLocal(buffer))
4570 return false;
4571
4572 /* Make sure we've got a real buffer, and that we hold a pin on it. */
4573 Assert(BufferIsValid(buffer));
4574 Assert(BufferIsPinned(buffer));
4575
4576 /*
4577 * BM_PERMANENT can't be changed while we hold a pin on the buffer, so we
4578 * need not bother with the buffer header spinlock. Even if someone else
4579 * changes the buffer header state while we're doing this, the state is
4580 * changed atomically, so we'll read the old value or the new value, but
4581 * not random garbage.
4582 */
4583 bufHdr = GetBufferDescriptor(buffer - 1);
4584 return (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT) != 0;
4585}

References Assert(), BM_PERMANENT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), GetBufferDescriptor(), pg_atomic_read_u32(), and BufferDesc::state.

Referenced by SetHintBits().

◆ BufferSync()

static void BufferSync ( int  flags)
static

Definition at line 3437 of file bufmgr.c.

3438{
3439 uint32 buf_state;
3440 int buf_id;
3441 int num_to_scan;
3442 int num_spaces;
3443 int num_processed;
3444 int num_written;
3445 CkptTsStatus *per_ts_stat = NULL;
3446 Oid last_tsid;
3447 binaryheap *ts_heap;
3448 int i;
3449 uint32 mask = BM_DIRTY;
3450 WritebackContext wb_context;
3451
3452 /*
3453 * Unless this is a shutdown checkpoint or we have been explicitly told,
3454 * we write only permanent, dirty buffers. But at shutdown or end of
3455 * recovery, we write all dirty buffers.
3456 */
3459 mask |= BM_PERMANENT;
3460
3461 /*
3462 * Loop over all buffers, and mark the ones that need to be written with
3463 * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_scan), so that we
3464 * can estimate how much work needs to be done.
3465 *
3466 * This allows us to write only those pages that were dirty when the
3467 * checkpoint began, and not those that get dirtied while it proceeds.
3468 * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us
3469 * later in this function, or by normal backends or the bgwriter cleaning
3470 * scan, the flag is cleared. Any buffer dirtied after this point won't
3471 * have the flag set.
3472 *
3473 * Note that if we fail to write some buffer, we may leave buffers with
3474 * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would
3475 * certainly need to be written for the next checkpoint attempt, too.
3476 */
3477 num_to_scan = 0;
3478 for (buf_id = 0; buf_id < NBuffers; buf_id++)
3479 {
3480 BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
3481 uint32 set_bits = 0;
3482
3483 /*
3484 * Header spinlock is enough to examine BM_DIRTY, see comment in
3485 * SyncOneBuffer.
3486 */
3487 buf_state = LockBufHdr(bufHdr);
3488
3489 if ((buf_state & mask) == mask)
3490 {
3491 CkptSortItem *item;
3492
3493 set_bits = BM_CHECKPOINT_NEEDED;
3494
3495 item = &CkptBufferIds[num_to_scan++];
3496 item->buf_id = buf_id;
3497 item->tsId = bufHdr->tag.spcOid;
3498 item->relNumber = BufTagGetRelNumber(&bufHdr->tag);
3499 item->forkNum = BufTagGetForkNum(&bufHdr->tag);
3500 item->blockNum = bufHdr->tag.blockNum;
3501 }
3502
3503 UnlockBufHdrExt(bufHdr, buf_state,
3504 set_bits, 0,
3505 0);
3506
3507 /* Check for barrier events in case NBuffers is large. */
3510 }
3511
3512 if (num_to_scan == 0)
3513 return; /* nothing to do */
3514
3516
3517 TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
3518
3519 /*
3520 * Sort buffers that need to be written to reduce the likelihood of random
3521 * IO. The sorting is also important for the implementation of balancing
3522 * writes between tablespaces. Without balancing writes we'd potentially
3523 * end up writing to the tablespaces one-by-one; possibly overloading the
3524 * underlying system.
3525 */
3526 sort_checkpoint_bufferids(CkptBufferIds, num_to_scan);
3527
3528 num_spaces = 0;
3529
3530 /*
3531 * Allocate progress status for each tablespace with buffers that need to
3532 * be flushed. This requires the to-be-flushed array to be sorted.
3533 */
3534 last_tsid = InvalidOid;
3535 for (i = 0; i < num_to_scan; i++)
3536 {
3537 CkptTsStatus *s;
3538 Oid cur_tsid;
3539
3540 cur_tsid = CkptBufferIds[i].tsId;
3541
3542 /*
3543 * Grow array of per-tablespace status structs, every time a new
3544 * tablespace is found.
3545 */
3546 if (last_tsid == InvalidOid || last_tsid != cur_tsid)
3547 {
3548 Size sz;
3549
3550 num_spaces++;
3551
3552 /*
3553 * Not worth adding grow-by-power-of-2 logic here - even with a
3554 * few hundred tablespaces this should be fine.
3555 */
3556 sz = sizeof(CkptTsStatus) * num_spaces;
3557
3558 if (per_ts_stat == NULL)
3559 per_ts_stat = (CkptTsStatus *) palloc(sz);
3560 else
3561 per_ts_stat = (CkptTsStatus *) repalloc(per_ts_stat, sz);
3562
3563 s = &per_ts_stat[num_spaces - 1];
3564 memset(s, 0, sizeof(*s));
3565 s->tsId = cur_tsid;
3566
3567 /*
3568 * The first buffer in this tablespace. As CkptBufferIds is sorted
3569 * by tablespace all (s->num_to_scan) buffers in this tablespace
3570 * will follow afterwards.
3571 */
3572 s->index = i;
3573
3574 /*
3575 * progress_slice will be determined once we know how many buffers
3576 * are in each tablespace, i.e. after this loop.
3577 */
3578
3579 last_tsid = cur_tsid;
3580 }
3581 else
3582 {
3583 s = &per_ts_stat[num_spaces - 1];
3584 }
3585
3586 s->num_to_scan++;
3587
3588 /* Check for barrier events. */
3591 }
3592
3593 Assert(num_spaces > 0);
3594
3595 /*
3596 * Build a min-heap over the write-progress in the individual tablespaces,
3597 * and compute how large a portion of the total progress a single
3598 * processed buffer is.
3599 */
3600 ts_heap = binaryheap_allocate(num_spaces,
3602 NULL);
3603
3604 for (i = 0; i < num_spaces; i++)
3605 {
3606 CkptTsStatus *ts_stat = &per_ts_stat[i];
3607
3608 ts_stat->progress_slice = (float8) num_to_scan / ts_stat->num_to_scan;
3609
3610 binaryheap_add_unordered(ts_heap, PointerGetDatum(ts_stat));
3611 }
3612
3613 binaryheap_build(ts_heap);
3614
3615 /*
3616 * Iterate through to-be-checkpointed buffers and write the ones (still)
3617 * marked with BM_CHECKPOINT_NEEDED. The writes are balanced between
3618 * tablespaces; otherwise the sorting would lead to only one tablespace
3619 * receiving writes at a time, making inefficient use of the hardware.
3620 */
3621 num_processed = 0;
3622 num_written = 0;
3623 while (!binaryheap_empty(ts_heap))
3624 {
3625 BufferDesc *bufHdr = NULL;
3626 CkptTsStatus *ts_stat = (CkptTsStatus *)
3628
3629 buf_id = CkptBufferIds[ts_stat->index].buf_id;
3630 Assert(buf_id != -1);
3631
3632 bufHdr = GetBufferDescriptor(buf_id);
3633
3634 num_processed++;
3635
3636 /*
3637 * We don't need to acquire the lock here, because we're only looking
3638 * at a single bit. It's possible that someone else writes the buffer
3639 * and clears the flag right after we check, but that doesn't matter
3640 * since SyncOneBuffer will then do nothing. However, there is a
3641 * further race condition: it's conceivable that between the time we
3642 * examine the bit here and the time SyncOneBuffer acquires the lock,
3643 * someone else not only wrote the buffer but replaced it with another
3644 * page and dirtied it. In that improbable case, SyncOneBuffer will
3645 * write the buffer though we didn't need to. It doesn't seem worth
3646 * guarding against this, though.
3647 */
3649 {
3650 if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
3651 {
3652 TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
3654 num_written++;
3655 }
3656 }
3657
3658 /*
3659 * Measure progress independent of actually having to flush the buffer
3660 * - otherwise writing become unbalanced.
3661 */
3662 ts_stat->progress += ts_stat->progress_slice;
3663 ts_stat->num_scanned++;
3664 ts_stat->index++;
3665
3666 /* Have all the buffers from the tablespace been processed? */
3667 if (ts_stat->num_scanned == ts_stat->num_to_scan)
3668 {
3669 binaryheap_remove_first(ts_heap);
3670 }
3671 else
3672 {
3673 /* update heap with the new progress */
3674 binaryheap_replace_first(ts_heap, PointerGetDatum(ts_stat));
3675 }
3676
3677 /*
3678 * Sleep to throttle our I/O rate.
3679 *
3680 * (This will check for barrier events even if it doesn't sleep.)
3681 */
3682 CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
3683 }
3684
3685 /*
3686 * Issue all pending flushes. Only checkpointer calls BufferSync(), so
3687 * IOContext will always be IOCONTEXT_NORMAL.
3688 */
3690
3691 pfree(per_ts_stat);
3692 per_ts_stat = NULL;
3693 binaryheap_free(ts_heap);
3694
3695 /*
3696 * Update checkpoint statistics. As noted above, this doesn't include
3697 * buffers written by other backends or bgwriter scan.
3698 */
3699 CheckpointStats.ckpt_bufs_written += num_written;
3700
3701 TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
3702}
void binaryheap_build(binaryheap *heap)
Definition: binaryheap.c:138
void binaryheap_replace_first(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:255
bh_node_type binaryheap_first(binaryheap *heap)
Definition: binaryheap.c:177
bh_node_type binaryheap_remove_first(binaryheap *heap)
Definition: binaryheap.c:192
void binaryheap_free(binaryheap *heap)
Definition: binaryheap.c:75
void binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
Definition: binaryheap.c:116
binaryheap * binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
Definition: binaryheap.c:39
#define binaryheap_empty(h)
Definition: binaryheap.h:65
CkptSortItem * CkptBufferIds
Definition: buf_init.c:25
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
#define BM_CHECKPOINT_NEEDED
Definition: buf_internals.h:76
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
Definition: bufmgr.c:6478
int checkpoint_flush_after
Definition: bufmgr.c:200
void WritebackContextInit(WritebackContext *context, int *max_pending)
Definition: bufmgr.c:6501
void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
Definition: bufmgr.c:6563
struct CkptTsStatus CkptTsStatus
double float8
Definition: c.h:649
size_t Size
Definition: c.h:624
void CheckpointWriteDelay(int flags, double progress)
Definition: checkpointer.c:785
volatile sig_atomic_t ProcSignalBarrierPending
Definition: globals.c:40
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
PgStat_CheckpointerStats PendingCheckpointerStats
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:332
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:322
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
void ProcessProcSignalBarrier(void)
Definition: procsignal.c:499
int ckpt_bufs_written
Definition: xlog.h:167
ForkNumber forkNum
RelFileNumber relNumber
BlockNumber blockNum
float8 progress_slice
Definition: bufmgr.c:141
int index
Definition: bufmgr.c:149
int num_scanned
Definition: bufmgr.c:146
float8 progress
Definition: bufmgr.c:140
int num_to_scan
Definition: bufmgr.c:144
Oid tsId
Definition: bufmgr.c:131
PgStat_Counter buffers_written
Definition: pgstat.h:266
CheckpointStatsData CheckpointStats
Definition: xlog.c:211
#define CHECKPOINT_FLUSH_UNLOGGED
Definition: xlog.h:143
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References Assert(), binaryheap_add_unordered(), binaryheap_allocate(), binaryheap_build(), binaryheap_empty, binaryheap_first(), binaryheap_free(), binaryheap_remove_first(), binaryheap_replace_first(), buftag::blockNum, CkptSortItem::blockNum, BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_PERMANENT, CkptSortItem::buf_id, BUF_WRITTEN, PgStat_CheckpointerStats::buffers_written, BufTagGetForkNum(), BufTagGetRelNumber(), CHECKPOINT_END_OF_RECOVERY, checkpoint_flush_after, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_IS_SHUTDOWN, CheckpointStats, CheckpointWriteDelay(), CheckpointStatsData::ckpt_bufs_written, CkptBufferIds, DatumGetPointer(), CkptSortItem::forkNum, GetBufferDescriptor(), i, CkptTsStatus::index, InvalidOid, IOCONTEXT_NORMAL, IssuePendingWritebacks(), LockBufHdr(), NBuffers, CkptTsStatus::num_scanned, CkptTsStatus::num_to_scan, palloc(), PendingCheckpointerStats, pfree(), pg_atomic_read_u32(), PointerGetDatum(), ProcessProcSignalBarrier(), ProcSignalBarrierPending, CkptTsStatus::progress, CkptTsStatus::progress_slice, CkptSortItem::relNumber, repalloc(), buftag::spcOid, BufferDesc::state, SyncOneBuffer(), BufferDesc::tag, ts_ckpt_progress_comparator(), CkptTsStatus::tsId, CkptSortItem::tsId, UnlockBufHdrExt(), and WritebackContextInit().

Referenced by CheckPointBuffers().

◆ buffertag_comparator()

static int buffertag_comparator ( const BufferTag ba,
const BufferTag bb 
)
inlinestatic

Definition at line 6413 of file bufmgr.c.

6414{
6415 int ret;
6416 RelFileLocator rlocatora;
6417 RelFileLocator rlocatorb;
6418
6419 rlocatora = BufTagGetRelFileLocator(ba);
6420 rlocatorb = BufTagGetRelFileLocator(bb);
6421
6422 ret = rlocator_comparator(&rlocatora, &rlocatorb);
6423
6424 if (ret != 0)
6425 return ret;
6426
6427 if (BufTagGetForkNum(ba) < BufTagGetForkNum(bb))
6428 return -1;
6429 if (BufTagGetForkNum(ba) > BufTagGetForkNum(bb))
6430 return 1;
6431
6432 if (ba->blockNum < bb->blockNum)
6433 return -1;
6434 if (ba->blockNum > bb->blockNum)
6435 return 1;
6436
6437 return 0;
6438}
static int rlocator_comparator(const void *p1, const void *p2)
Definition: bufmgr.c:6332

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), and rlocator_comparator().

◆ CheckBufferIsPinnedOnce()

void CheckBufferIsPinnedOnce ( Buffer  buffer)

Definition at line 5746 of file bufmgr.c.

5747{
5748 if (BufferIsLocal(buffer))
5749 {
5750 if (LocalRefCount[-buffer - 1] != 1)
5751 elog(ERROR, "incorrect local pin count: %d",
5752 LocalRefCount[-buffer - 1]);
5753 }
5754 else
5755 {
5756 if (GetPrivateRefCount(buffer) != 1)
5757 elog(ERROR, "incorrect local pin count: %d",
5758 GetPrivateRefCount(buffer));
5759 }
5760}
#define ERROR
Definition: elog.h:39

References PrivateRefCountEntry::buffer, BufferIsLocal, elog, ERROR, GetPrivateRefCount(), and LocalRefCount.

Referenced by GetVictimBuffer(), lazy_scan_heap(), and LockBufferForCleanup().

◆ CheckForBufferLeaks()

static void CheckForBufferLeaks ( void  )
static

Definition at line 4154 of file bufmgr.c.

4155{
4156#ifdef USE_ASSERT_CHECKING
4157 int RefCountErrors = 0;
4159 int i;
4160 char *s;
4161
4162 /* check the array */
4163 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
4164 {
4166 {
4167 res = &PrivateRefCountArray[i];
4168
4170 elog(WARNING, "buffer refcount leak: %s", s);
4171 pfree(s);
4172
4173 RefCountErrors++;
4174 }
4175 }
4176
4177 /* if necessary search the hash */
4179 {
4180 HASH_SEQ_STATUS hstat;
4181
4183 while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL)
4184 {
4186 elog(WARNING, "buffer refcount leak: %s", s);
4187 pfree(s);
4188 RefCountErrors++;
4189 }
4190 }
4191
4192 Assert(RefCountErrors == 0);
4193#endif
4194}
#define InvalidBuffer
Definition: buf.h:25
static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:238
char * DebugPrintBufferRefcount(Buffer buffer)
Definition: bufmgr.c:4261
#define REFCOUNT_ARRAY_ENTRIES
Definition: bufmgr.c:122
static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]
Definition: bufmgr.c:239
static HTAB * PrivateRefCountHash
Definition: bufmgr.c:240
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1415
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1380

References Assert(), PrivateRefCountEntry::buffer, DebugPrintBufferRefcount(), elog, hash_seq_init(), hash_seq_search(), i, InvalidBuffer, pfree(), PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, and WARNING.

Referenced by AtEOXact_Buffers(), and AtProcExit_Buffers().

◆ CheckPointBuffers()

void CheckPointBuffers ( int  flags)

Definition at line 4304 of file bufmgr.c.

4305{
4306 BufferSync(flags);
4307}
static void BufferSync(int flags)
Definition: bufmgr.c:3437

References BufferSync().

Referenced by CheckPointGuts().

◆ CheckReadBuffersOperation()

static void CheckReadBuffersOperation ( ReadBuffersOperation operation,
bool  is_complete 
)
static

Definition at line 1602 of file bufmgr.c.

1603{
1604#ifdef USE_ASSERT_CHECKING
1605 Assert(operation->nblocks_done <= operation->nblocks);
1606 Assert(!is_complete || operation->nblocks == operation->nblocks_done);
1607
1608 for (int i = 0; i < operation->nblocks; i++)
1609 {
1610 Buffer buffer = operation->buffers[i];
1611 BufferDesc *buf_hdr = BufferIsLocal(buffer) ?
1612 GetLocalBufferDescriptor(-buffer - 1) :
1613 GetBufferDescriptor(buffer - 1);
1614
1615 Assert(BufferGetBlockNumber(buffer) == operation->blocknum + i);
1617
1618 if (i < operation->nblocks_done)
1620 }
1621#endif
1622}

References Assert(), ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, PrivateRefCountEntry::buffer, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, pg_atomic_read_u32(), and BufferDesc::state.

Referenced by StartReadBuffersImpl(), and WaitReadBuffers().

◆ ckpt_buforder_comparator()

static int ckpt_buforder_comparator ( const CkptSortItem a,
const CkptSortItem b 
)
inlinestatic

Definition at line 6447 of file bufmgr.c.

6448{
6449 /* compare tablespace */
6450 if (a->tsId < b->tsId)
6451 return -1;
6452 else if (a->tsId > b->tsId)
6453 return 1;
6454 /* compare relation */
6455 if (a->relNumber < b->relNumber)
6456 return -1;
6457 else if (a->relNumber > b->relNumber)
6458 return 1;
6459 /* compare fork */
6460 else if (a->forkNum < b->forkNum)
6461 return -1;
6462 else if (a->forkNum > b->forkNum)
6463 return 1;
6464 /* compare block number */
6465 else if (a->blockNum < b->blockNum)
6466 return -1;
6467 else if (a->blockNum > b->blockNum)
6468 return 1;
6469 /* equal page IDs are unlikely, but not impossible */
6470 return 0;
6471}
int b
Definition: isn.c:74
int a
Definition: isn.c:73

References a, and b.

◆ ConditionalLockBuffer()

bool ConditionalLockBuffer ( Buffer  buffer)

◆ ConditionalLockBufferForCleanup()

bool ConditionalLockBufferForCleanup ( Buffer  buffer)

Definition at line 5952 of file bufmgr.c.

5953{
5954 BufferDesc *bufHdr;
5955 uint32 buf_state,
5956 refcount;
5957
5958 Assert(BufferIsValid(buffer));
5959
5960 /* see AIO related comment in LockBufferForCleanup() */
5961
5962 if (BufferIsLocal(buffer))
5963 {
5964 refcount = LocalRefCount[-buffer - 1];
5965 /* There should be exactly one pin */
5966 Assert(refcount > 0);
5967 if (refcount != 1)
5968 return false;
5969 /* Nobody else to wait for */
5970 return true;
5971 }
5972
5973 /* There should be exactly one local pin */
5974 refcount = GetPrivateRefCount(buffer);
5975 Assert(refcount);
5976 if (refcount != 1)
5977 return false;
5978
5979 /* Try to acquire lock */
5980 if (!ConditionalLockBuffer(buffer))
5981 return false;
5982
5983 bufHdr = GetBufferDescriptor(buffer - 1);
5984 buf_state = LockBufHdr(bufHdr);
5985 refcount = BUF_STATE_GET_REFCOUNT(buf_state);
5986
5987 Assert(refcount > 0);
5988 if (refcount == 1)
5989 {
5990 /* Successfully acquired exclusive lock with pincount 1 */
5991 UnlockBufHdr(bufHdr);
5992 return true;
5993 }
5994
5995 /* Failed, so release the lock */
5996 UnlockBufHdr(bufHdr);
5998 return false;
5999}
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5699
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5725
@ BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:205

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsValid(), ConditionalLockBuffer(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBuffer(), LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_finish_split(), _hash_getbuf_with_condlock_cleanup(), heap_page_prune_opt(), and lazy_scan_heap().

◆ CreateAndCopyRelationData()

void CreateAndCopyRelationData ( RelFileLocator  src_rlocator,
RelFileLocator  dst_rlocator,
bool  permanent 
)

Definition at line 5337 of file bufmgr.c.

5339{
5340 char relpersistence;
5341 SMgrRelation src_rel;
5342 SMgrRelation dst_rel;
5343
5344 /* Set the relpersistence. */
5345 relpersistence = permanent ?
5346 RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED;
5347
5348 src_rel = smgropen(src_rlocator, INVALID_PROC_NUMBER);
5349 dst_rel = smgropen(dst_rlocator, INVALID_PROC_NUMBER);
5350
5351 /*
5352 * Create and copy all forks of the relation. During create database we
5353 * have a separate cleanup mechanism which deletes complete database
5354 * directory. Therefore, each individual relation doesn't need to be
5355 * registered for cleanup.
5356 */
5357 RelationCreateStorage(dst_rlocator, relpersistence, false);
5358
5359 /* copy main fork. */
5360 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM,
5361 permanent);
5362
5363 /* copy those extra forks that exist */
5364 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
5365 forkNum <= MAX_FORKNUM; forkNum++)
5366 {
5367 if (smgrexists(src_rel, forkNum))
5368 {
5369 smgrcreate(dst_rel, forkNum, false);
5370
5371 /*
5372 * WAL log creation if the relation is persistent, or this is the
5373 * init fork of an unlogged relation.
5374 */
5375 if (permanent || forkNum == INIT_FORKNUM)
5376 log_smgrcreate(&dst_rlocator, forkNum);
5377
5378 /* Copy a fork's data, block by block. */
5379 RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, forkNum,
5380 permanent);
5381 }
5382 }
5383}
static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent)
Definition: bufmgr.c:5223
@ MAIN_FORKNUM
Definition: relpath.h:58
#define MAX_FORKNUM
Definition: relpath.h:70
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
Definition: smgr.c:240
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:481
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:462
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:187

References INIT_FORKNUM, INVALID_PROC_NUMBER, log_smgrcreate(), MAIN_FORKNUM, MAX_FORKNUM, RelationCopyStorageUsingBuffer(), RelationCreateStorage(), smgrcreate(), smgrexists(), and smgropen().

Referenced by CreateDatabaseUsingWalLog().

◆ DebugPrintBufferRefcount()

char * DebugPrintBufferRefcount ( Buffer  buffer)

Definition at line 4261 of file bufmgr.c.

4262{
4263 BufferDesc *buf;
4264 int32 loccount;
4265 char *result;
4266 ProcNumber backend;
4267 uint32 buf_state;
4268
4269 Assert(BufferIsValid(buffer));
4270 if (BufferIsLocal(buffer))
4271 {
4272 buf = GetLocalBufferDescriptor(-buffer - 1);
4273 loccount = LocalRefCount[-buffer - 1];
4274 backend = MyProcNumber;
4275 }
4276 else
4277 {
4278 buf = GetBufferDescriptor(buffer - 1);
4279 loccount = GetPrivateRefCount(buffer);
4280 backend = INVALID_PROC_NUMBER;
4281 }
4282
4283 /* theoretically we should lock the bufhdr here */
4284 buf_state = pg_atomic_read_u32(&buf->state);
4285
4286 result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)",
4287 buffer,
4289 BufTagGetForkNum(&buf->tag)).str,
4290 buf->tag.blockNum, buf_state & BUF_FLAG_MASK,
4291 BUF_STATE_GET_REFCOUNT(buf_state), loccount);
4292 return result;
4293}
#define BUF_FLAG_MASK
Definition: buf_internals.h:56
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43

References Assert(), buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), BufTagGetForkNum(), BufTagGetRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), GetPrivateRefCount(), INVALID_PROC_NUMBER, LocalRefCount, MyProcNumber, pg_atomic_read_u32(), psprintf(), and relpathbackend.

Referenced by buffer_call_start_io(), buffer_call_terminate_io(), CheckForBufferLeaks(), CheckForLocalBufferLeaks(), and ResOwnerPrintBufferPin().

◆ DropDatabaseBuffers()

void DropDatabaseBuffers ( Oid  dbid)

Definition at line 4990 of file bufmgr.c.

4991{
4992 int i;
4993
4994 /*
4995 * We needn't consider local buffers, since by assumption the target
4996 * database isn't our own.
4997 */
4998
4999 for (i = 0; i < NBuffers; i++)
5000 {
5001 BufferDesc *bufHdr = GetBufferDescriptor(i);
5002
5003 /*
5004 * As in DropRelationBuffers, an unlocked precheck should be safe and
5005 * saves some cycles.
5006 */
5007 if (bufHdr->tag.dbOid != dbid)
5008 continue;
5009
5010 LockBufHdr(bufHdr);
5011 if (bufHdr->tag.dbOid == dbid)
5012 InvalidateBuffer(bufHdr); /* releases spinlock */
5013 else
5014 UnlockBufHdr(bufHdr);
5015 }
5016}
static void InvalidateBuffer(BufferDesc *buf)
Definition: bufmgr.c:2248

References buftag::dbOid, GetBufferDescriptor(), i, InvalidateBuffer(), LockBufHdr(), NBuffers, BufferDesc::tag, and UnlockBufHdr().

Referenced by createdb_failure_callback(), dbase_redo(), dropdb(), and movedb().

◆ DropRelationBuffers()

void DropRelationBuffers ( SMgrRelation  smgr_reln,
ForkNumber forkNum,
int  nforks,
BlockNumber firstDelBlock 
)

Definition at line 4640 of file bufmgr.c.

4642{
4643 int i;
4644 int j;
4645 RelFileLocatorBackend rlocator;
4646 BlockNumber nForkBlock[MAX_FORKNUM];
4647 uint64 nBlocksToInvalidate = 0;
4648
4649 rlocator = smgr_reln->smgr_rlocator;
4650
4651 /* If it's a local relation, it's localbuf.c's problem. */
4652 if (RelFileLocatorBackendIsTemp(rlocator))
4653 {
4654 if (rlocator.backend == MyProcNumber)
4655 DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
4656 firstDelBlock);
4657
4658 return;
4659 }
4660
4661 /*
4662 * To remove all the pages of the specified relation forks from the buffer
4663 * pool, we need to scan the entire buffer pool but we can optimize it by
4664 * finding the buffers from BufMapping table provided we know the exact
4665 * size of each fork of the relation. The exact size is required to ensure
4666 * that we don't leave any buffer for the relation being dropped as
4667 * otherwise the background writer or checkpointer can lead to a PANIC
4668 * error while flushing buffers corresponding to files that don't exist.
4669 *
4670 * To know the exact size, we rely on the size cached for each fork by us
4671 * during recovery which limits the optimization to recovery and on
4672 * standbys but we can easily extend it once we have shared cache for
4673 * relation size.
4674 *
4675 * In recovery, we cache the value returned by the first lseek(SEEK_END)
4676 * and the future writes keeps the cached value up-to-date. See
4677 * smgrextend. It is possible that the value of the first lseek is smaller
4678 * than the actual number of existing blocks in the file due to buggy
4679 * Linux kernels that might not have accounted for the recent write. But
4680 * that should be fine because there must not be any buffers after that
4681 * file size.
4682 */
4683 for (i = 0; i < nforks; i++)
4684 {
4685 /* Get the number of blocks for a relation's fork */
4686 nForkBlock[i] = smgrnblocks_cached(smgr_reln, forkNum[i]);
4687
4688 if (nForkBlock[i] == InvalidBlockNumber)
4689 {
4690 nBlocksToInvalidate = InvalidBlockNumber;
4691 break;
4692 }
4693
4694 /* calculate the number of blocks to be invalidated */
4695 nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
4696 }
4697
4698 /*
4699 * We apply the optimization iff the total number of blocks to invalidate
4700 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4701 */
4702 if (BlockNumberIsValid(nBlocksToInvalidate) &&
4703 nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4704 {
4705 for (j = 0; j < nforks; j++)
4706 FindAndDropRelationBuffers(rlocator.locator, forkNum[j],
4707 nForkBlock[j], firstDelBlock[j]);
4708 return;
4709 }
4710
4711 for (i = 0; i < NBuffers; i++)
4712 {
4713 BufferDesc *bufHdr = GetBufferDescriptor(i);
4714
4715 /*
4716 * We can make this a tad faster by prechecking the buffer tag before
4717 * we attempt to lock the buffer; this saves a lot of lock
4718 * acquisitions in typical cases. It should be safe because the
4719 * caller must have AccessExclusiveLock on the relation, or some other
4720 * reason to be certain that no one is loading new pages of the rel
4721 * into the buffer pool. (Otherwise we might well miss such pages
4722 * entirely.) Therefore, while the tag might be changing while we
4723 * look at it, it can't be changing *to* a value we care about, only
4724 * *away* from such a value. So false negatives are impossible, and
4725 * false positives are safe because we'll recheck after getting the
4726 * buffer lock.
4727 *
4728 * We could check forkNum and blockNum as well as the rlocator, but
4729 * the incremental win from doing so seems small.
4730 */
4731 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator))
4732 continue;
4733
4734 LockBufHdr(bufHdr);
4735
4736 for (j = 0; j < nforks; j++)
4737 {
4738 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator.locator) &&
4739 BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
4740 bufHdr->tag.blockNum >= firstDelBlock[j])
4741 {
4742 InvalidateBuffer(bufHdr); /* releases spinlock */
4743 break;
4744 }
4745 }
4746 if (j >= nforks)
4747 UnlockBufHdr(bufHdr);
4748 }
4749}
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_DROP_FULL_SCAN_THRESHOLD
Definition: bufmgr.c:91
static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock)
Definition: bufmgr.c:4930
int j
Definition: isn.c:78
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
Definition: localbuf.c:665
#define RelFileLocatorBackendIsTemp(rlocator)
BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:847

References RelFileLocatorBackend::backend, buftag::blockNum, BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetForkNum(), BufTagMatchesRelFileLocator(), DropRelationLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, InvalidateBuffer(), InvalidBlockNumber, j, RelFileLocatorBackend::locator, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, RelFileLocatorBackendIsTemp, SMgrRelationData::smgr_rlocator, smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrtruncate().

◆ DropRelationsAllBuffers()

void DropRelationsAllBuffers ( SMgrRelation smgr_reln,
int  nlocators 
)

Definition at line 4760 of file bufmgr.c.

4761{
4762 int i;
4763 int n = 0;
4764 SMgrRelation *rels;
4765 BlockNumber (*block)[MAX_FORKNUM + 1];
4766 uint64 nBlocksToInvalidate = 0;
4767 RelFileLocator *locators;
4768 bool cached = true;
4769 bool use_bsearch;
4770
4771 if (nlocators == 0)
4772 return;
4773
4774 rels = palloc_array(SMgrRelation, nlocators); /* non-local relations */
4775
4776 /* If it's a local relation, it's localbuf.c's problem. */
4777 for (i = 0; i < nlocators; i++)
4778 {
4779 if (RelFileLocatorBackendIsTemp(smgr_reln[i]->smgr_rlocator))
4780 {
4781 if (smgr_reln[i]->smgr_rlocator.backend == MyProcNumber)
4782 DropRelationAllLocalBuffers(smgr_reln[i]->smgr_rlocator.locator);
4783 }
4784 else
4785 rels[n++] = smgr_reln[i];
4786 }
4787
4788 /*
4789 * If there are no non-local relations, then we're done. Release the
4790 * memory and return.
4791 */
4792 if (n == 0)
4793 {
4794 pfree(rels);
4795 return;
4796 }
4797
4798 /*
4799 * This is used to remember the number of blocks for all the relations
4800 * forks.
4801 */
4802 block = (BlockNumber (*)[MAX_FORKNUM + 1])
4803 palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
4804
4805 /*
4806 * We can avoid scanning the entire buffer pool if we know the exact size
4807 * of each of the given relation forks. See DropRelationBuffers.
4808 */
4809 for (i = 0; i < n && cached; i++)
4810 {
4811 for (int j = 0; j <= MAX_FORKNUM; j++)
4812 {
4813 /* Get the number of blocks for a relation's fork. */
4814 block[i][j] = smgrnblocks_cached(rels[i], j);
4815
4816 /* We need to only consider the relation forks that exists. */
4817 if (block[i][j] == InvalidBlockNumber)
4818 {
4819 if (!smgrexists(rels[i], j))
4820 continue;
4821 cached = false;
4822 break;
4823 }
4824
4825 /* calculate the total number of blocks to be invalidated */
4826 nBlocksToInvalidate += block[i][j];
4827 }
4828 }
4829
4830 /*
4831 * We apply the optimization iff the total number of blocks to invalidate
4832 * is below the BUF_DROP_FULL_SCAN_THRESHOLD.
4833 */
4834 if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
4835 {
4836 for (i = 0; i < n; i++)
4837 {
4838 for (int j = 0; j <= MAX_FORKNUM; j++)
4839 {
4840 /* ignore relation forks that doesn't exist */
4841 if (!BlockNumberIsValid(block[i][j]))
4842 continue;
4843
4844 /* drop all the buffers for a particular relation fork */
4845 FindAndDropRelationBuffers(rels[i]->smgr_rlocator.locator,
4846 j, block[i][j], 0);
4847 }
4848 }
4849
4850 pfree(block);
4851 pfree(rels);
4852 return;
4853 }
4854
4855 pfree(block);
4856 locators = palloc_array(RelFileLocator, n); /* non-local relations */
4857 for (i = 0; i < n; i++)
4858 locators[i] = rels[i]->smgr_rlocator.locator;
4859
4860 /*
4861 * For low number of relations to drop just use a simple walk through, to
4862 * save the bsearch overhead. The threshold to use is rather a guess than
4863 * an exactly determined value, as it depends on many factors (CPU and RAM
4864 * speeds, amount of shared buffers etc.).
4865 */
4866 use_bsearch = n > RELS_BSEARCH_THRESHOLD;
4867
4868 /* sort the list of rlocators if necessary */
4869 if (use_bsearch)
4870 qsort(locators, n, sizeof(RelFileLocator), rlocator_comparator);
4871
4872 for (i = 0; i < NBuffers; i++)
4873 {
4874 RelFileLocator *rlocator = NULL;
4875 BufferDesc *bufHdr = GetBufferDescriptor(i);
4876
4877 /*
4878 * As in DropRelationBuffers, an unlocked precheck should be safe and
4879 * saves some cycles.
4880 */
4881
4882 if (!use_bsearch)
4883 {
4884 int j;
4885
4886 for (j = 0; j < n; j++)
4887 {
4888 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &locators[j]))
4889 {
4890 rlocator = &locators[j];
4891 break;
4892 }
4893 }
4894 }
4895 else
4896 {
4897 RelFileLocator locator;
4898
4899 locator = BufTagGetRelFileLocator(&bufHdr->tag);
4900 rlocator = bsearch(&locator,
4901 locators, n, sizeof(RelFileLocator),
4903 }
4904
4905 /* buffer doesn't belong to any of the given relfilelocators; skip it */
4906 if (rlocator == NULL)
4907 continue;
4908
4909 LockBufHdr(bufHdr);
4910 if (BufTagMatchesRelFileLocator(&bufHdr->tag, rlocator))
4911 InvalidateBuffer(bufHdr); /* releases spinlock */
4912 else
4913 UnlockBufHdr(bufHdr);
4914 }
4915
4916 pfree(locators);
4917 pfree(rels);
4918}
#define RELS_BSEARCH_THRESHOLD
Definition: bufmgr.c:83
#define palloc_array(type, count)
Definition: fe_memutils.h:76
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
Definition: localbuf.c:702
#define qsort(a, b, c, d)
Definition: port.h:499

References BlockNumberIsValid(), BUF_DROP_FULL_SCAN_THRESHOLD, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), DropRelationAllLocalBuffers(), FindAndDropRelationBuffers(), GetBufferDescriptor(), i, if(), InvalidateBuffer(), InvalidBlockNumber, j, LockBufHdr(), MAX_FORKNUM, MyProcNumber, NBuffers, palloc(), palloc_array, pfree(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, rlocator_comparator(), smgrexists(), smgrnblocks_cached(), BufferDesc::tag, and UnlockBufHdr().

Referenced by smgrdounlinkall().

◆ EvictAllUnpinnedBuffers()

void EvictAllUnpinnedBuffers ( int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 6778 of file bufmgr.c.

6780{
6781 *buffers_evicted = 0;
6782 *buffers_skipped = 0;
6783 *buffers_flushed = 0;
6784
6785 for (int buf = 1; buf <= NBuffers; buf++)
6786 {
6787 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6788 uint32 buf_state;
6789 bool buffer_flushed;
6790
6792
6793 buf_state = pg_atomic_read_u32(&desc->state);
6794 if (!(buf_state & BM_VALID))
6795 continue;
6796
6799
6800 LockBufHdr(desc);
6801
6802 if (EvictUnpinnedBufferInternal(desc, &buffer_flushed))
6803 (*buffers_evicted)++;
6804 else
6805 (*buffers_skipped)++;
6806
6807 if (buffer_flushed)
6808 (*buffers_flushed)++;
6809 }
6810}
static bool EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed)
Definition: bufmgr.c:6687
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u32(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_evict_all().

◆ EvictRelUnpinnedBuffers()

void EvictRelUnpinnedBuffers ( Relation  rel,
int32 buffers_evicted,
int32 buffers_flushed,
int32 buffers_skipped 
)

Definition at line 6828 of file bufmgr.c.

6830{
6832
6833 *buffers_skipped = 0;
6834 *buffers_evicted = 0;
6835 *buffers_flushed = 0;
6836
6837 for (int buf = 1; buf <= NBuffers; buf++)
6838 {
6839 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6840 uint32 buf_state = pg_atomic_read_u32(&(desc->state));
6841 bool buffer_flushed;
6842
6844
6845 /* An unlocked precheck should be safe and saves some cycles. */
6846 if ((buf_state & BM_VALID) == 0 ||
6848 continue;
6849
6850 /* Make sure we can pin the buffer. */
6853
6854 buf_state = LockBufHdr(desc);
6855
6856 /* recheck, could have changed without the lock */
6857 if ((buf_state & BM_VALID) == 0 ||
6859 {
6860 UnlockBufHdr(desc);
6861 continue;
6862 }
6863
6864 if (EvictUnpinnedBufferInternal(desc, &buffer_flushed))
6865 (*buffers_evicted)++;
6866 else
6867 (*buffers_skipped)++;
6868
6869 if (buffer_flushed)
6870 (*buffers_flushed)++;
6871 }
6872}
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:647
RelFileLocator rd_locator
Definition: rel.h:57

References Assert(), BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), NBuffers, pg_atomic_read_u32(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_evict_relation().

◆ EvictUnpinnedBuffer()

bool EvictUnpinnedBuffer ( Buffer  buf,
bool *  buffer_flushed 
)

Definition at line 6749 of file bufmgr.c.

6750{
6751 BufferDesc *desc;
6752
6754
6755 /* Make sure we can pin the buffer. */
6758
6759 desc = GetBufferDescriptor(buf - 1);
6760 LockBufHdr(desc);
6761
6762 return EvictUnpinnedBufferInternal(desc, buffer_flushed);
6763}

References Assert(), buf, BufferIsLocal, BufferIsValid(), CurrentResourceOwner, EvictUnpinnedBufferInternal(), GetBufferDescriptor(), LockBufHdr(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by invalidate_rel_block(), modify_rel_block(), and pg_buffercache_evict().

◆ EvictUnpinnedBufferInternal()

static bool EvictUnpinnedBufferInternal ( BufferDesc desc,
bool *  buffer_flushed 
)
static

Definition at line 6687 of file bufmgr.c.

6688{
6689 uint32 buf_state;
6690 bool result;
6691
6692 *buffer_flushed = false;
6693
6694 buf_state = pg_atomic_read_u32(&(desc->state));
6695 Assert(buf_state & BM_LOCKED);
6696
6697 if ((buf_state & BM_VALID) == 0)
6698 {
6699 UnlockBufHdr(desc);
6700 return false;
6701 }
6702
6703 /* Check that it's not pinned already. */
6704 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
6705 {
6706 UnlockBufHdr(desc);
6707 return false;
6708 }
6709
6710 PinBuffer_Locked(desc); /* releases spinlock */
6711
6712 /* If it was dirty, try to clean it once. */
6713 if (buf_state & BM_DIRTY)
6714 {
6716 *buffer_flushed = true;
6717 }
6718
6719 /* This will return false if it becomes dirty or someone else pins it. */
6720 result = InvalidateVictimBuffer(desc);
6721
6722 UnpinBuffer(desc);
6723
6724 return result;
6725}
#define BM_LOCKED
Definition: buf_internals.h:68
static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:4515
static void PinBuffer_Locked(BufferDesc *buf)
Definition: bufmgr.c:3273
static bool InvalidateVictimBuffer(BufferDesc *buf_hdr)
Definition: bufmgr.c:2343

References Assert(), BM_DIRTY, BM_LOCKED, BM_VALID, BUF_STATE_GET_REFCOUNT, FlushUnlockedBuffer(), InvalidateVictimBuffer(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, pg_atomic_read_u32(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), and EvictUnpinnedBuffer().

◆ ExtendBufferedRel()

Buffer ExtendBufferedRel ( BufferManagerRelation  bmr,
ForkNumber  forkNum,
BufferAccessStrategy  strategy,
uint32  flags 
)

Definition at line 939 of file bufmgr.c.

943{
944 Buffer buf;
945 uint32 extend_by = 1;
946
947 ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by,
948 &buf, &extend_by);
949
950 return buf;
951}
BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:971

References buf, and ExtendBufferedRelBy().

Referenced by _bt_allocbuf(), _hash_getnewbuf(), BloomNewBuffer(), brinbuild(), brinbuildempty(), fill_seq_fork_with_data(), ginbuildempty(), GinNewBuffer(), gistbuildempty(), gistNewBuffer(), ReadBuffer_common(), revmap_physical_extend(), and SpGistNewBuffer().

◆ ExtendBufferedRelBy()

BlockNumber ExtendBufferedRelBy ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
Buffer buffers,
uint32 extended_by 
)

Definition at line 971 of file bufmgr.c.

978{
979 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
980 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
981 Assert(extend_by > 0);
982
983 if (bmr.relpersistence == '\0')
984 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
985
986 return ExtendBufferedRelCommon(bmr, fork, strategy, flags,
987 extend_by, InvalidBlockNumber,
988 buffers, extended_by);
989}
static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2625
SMgrRelation smgr
Definition: bufmgr.h:110
Form_pg_class rd_rel
Definition: rel.h:111

References Assert(), ExtendBufferedRelCommon(), InvalidBlockNumber, RelationData::rd_rel, BufferManagerRelation::rel, BufferManagerRelation::relpersistence, and BufferManagerRelation::smgr.

Referenced by ExtendBufferedRel(), grow_rel(), and RelationAddBlocks().

◆ ExtendBufferedRelCommon()

static BlockNumber ExtendBufferedRelCommon ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2625 of file bufmgr.c.

2633{
2634 BlockNumber first_block;
2635
2636 TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
2637 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2638 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2639 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2640 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2641 extend_by);
2642
2643 if (bmr.relpersistence == RELPERSISTENCE_TEMP)
2644 first_block = ExtendBufferedRelLocal(bmr, fork, flags,
2645 extend_by, extend_upto,
2646 buffers, &extend_by);
2647 else
2648 first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags,
2649 extend_by, extend_upto,
2650 buffers, &extend_by);
2651 *extended_by = extend_by;
2652
2653 TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
2654 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.spcOid,
2655 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.dbOid,
2656 BMR_GET_SMGR(bmr)->smgr_rlocator.locator.relNumber,
2657 BMR_GET_SMGR(bmr)->smgr_rlocator.backend,
2658 *extended_by,
2659 first_block);
2660
2661 return first_block;
2662}
static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, ForkNumber fork, BufferAccessStrategy strategy, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: bufmgr.c:2669
#define BMR_GET_SMGR(bmr)
Definition: bufmgr.h:118
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Definition: localbuf.c:346

References BMR_GET_SMGR, ExtendBufferedRelLocal(), ExtendBufferedRelShared(), and BufferManagerRelation::relpersistence.

Referenced by ExtendBufferedRelBy(), and ExtendBufferedRelTo().

◆ ExtendBufferedRelShared()

static BlockNumber ExtendBufferedRelShared ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
uint32  extend_by,
BlockNumber  extend_upto,
Buffer buffers,
uint32 extended_by 
)
static

Definition at line 2669 of file bufmgr.c.

2677{
2678 BlockNumber first_block;
2679 IOContext io_context = IOContextForStrategy(strategy);
2680 instr_time io_start;
2681
2682 LimitAdditionalPins(&extend_by);
2683
2684 /*
2685 * Acquire victim buffers for extension without holding extension lock.
2686 * Writing out victim buffers is the most expensive part of extending the
2687 * relation, particularly when doing so requires WAL flushes. Zeroing out
2688 * the buffers is also quite expensive, so do that before holding the
2689 * extension lock as well.
2690 *
2691 * These pages are pinned by us and not valid. While we hold the pin they
2692 * can't be acquired as victim buffers by another backend.
2693 */
2694 for (uint32 i = 0; i < extend_by; i++)
2695 {
2696 Block buf_block;
2697
2698 buffers[i] = GetVictimBuffer(strategy, io_context);
2699 buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
2700
2701 /* new buffers are zero-filled */
2702 MemSet(buf_block, 0, BLCKSZ);
2703 }
2704
2705 /*
2706 * Lock relation against concurrent extensions, unless requested not to.
2707 *
2708 * We use the same extension lock for all forks. That's unnecessarily
2709 * restrictive, but currently extensions for forks don't happen often
2710 * enough to make it worth locking more granularly.
2711 *
2712 * Note that another backend might have extended the relation by the time
2713 * we get the lock.
2714 */
2715 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2717
2718 /*
2719 * If requested, invalidate size cache, so that smgrnblocks asks the
2720 * kernel.
2721 */
2722 if (flags & EB_CLEAR_SIZE_CACHE)
2723 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
2724
2725 first_block = smgrnblocks(BMR_GET_SMGR(bmr), fork);
2726
2727 /*
2728 * Now that we have the accurate relation size, check if the caller wants
2729 * us to extend to only up to a specific size. If there were concurrent
2730 * extensions, we might have acquired too many buffers and need to release
2731 * them.
2732 */
2733 if (extend_upto != InvalidBlockNumber)
2734 {
2735 uint32 orig_extend_by = extend_by;
2736
2737 if (first_block > extend_upto)
2738 extend_by = 0;
2739 else if ((uint64) first_block + extend_by > extend_upto)
2740 extend_by = extend_upto - first_block;
2741
2742 for (uint32 i = extend_by; i < orig_extend_by; i++)
2743 {
2744 BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
2745
2746 UnpinBuffer(buf_hdr);
2747 }
2748
2749 if (extend_by == 0)
2750 {
2751 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2753 *extended_by = extend_by;
2754 return first_block;
2755 }
2756 }
2757
2758 /* Fail if relation is already at maximum possible length */
2759 if ((uint64) first_block + extend_by >= MaxBlockNumber)
2760 ereport(ERROR,
2761 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
2762 errmsg("cannot extend relation %s beyond %u blocks",
2763 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str,
2764 MaxBlockNumber)));
2765
2766 /*
2767 * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
2768 *
2769 * This needs to happen before we extend the relation, because as soon as
2770 * we do, other backends can start to read in those pages.
2771 */
2772 for (uint32 i = 0; i < extend_by; i++)
2773 {
2774 Buffer victim_buf = buffers[i];
2775 BufferDesc *victim_buf_hdr = GetBufferDescriptor(victim_buf - 1);
2776 BufferTag tag;
2777 uint32 hash;
2778 LWLock *partition_lock;
2779 int existing_id;
2780
2781 /* in case we need to pin an existing buffer below */
2784
2785 InitBufferTag(&tag, &BMR_GET_SMGR(bmr)->smgr_rlocator.locator, fork,
2786 first_block + i);
2787 hash = BufTableHashCode(&tag);
2788 partition_lock = BufMappingPartitionLock(hash);
2789
2790 LWLockAcquire(partition_lock, LW_EXCLUSIVE);
2791
2792 existing_id = BufTableInsert(&tag, hash, victim_buf_hdr->buf_id);
2793
2794 /*
2795 * We get here only in the corner case where we are trying to extend
2796 * the relation but we found a pre-existing buffer. This can happen
2797 * because a prior attempt at extending the relation failed, and
2798 * because mdread doesn't complain about reads beyond EOF (when
2799 * zero_damaged_pages is ON) and so a previous attempt to read a block
2800 * beyond EOF could have left a "valid" zero-filled buffer.
2801 *
2802 * This has also been observed when relation was overwritten by
2803 * external process. Since the legitimate cases should always have
2804 * left a zero-filled buffer, complain if not PageIsNew.
2805 */
2806 if (existing_id >= 0)
2807 {
2808 BufferDesc *existing_hdr = GetBufferDescriptor(existing_id);
2809 Block buf_block;
2810 bool valid;
2811
2812 /*
2813 * Pin the existing buffer before releasing the partition lock,
2814 * preventing it from being evicted.
2815 */
2816 valid = PinBuffer(existing_hdr, strategy, false);
2817
2818 LWLockRelease(partition_lock);
2819 UnpinBuffer(victim_buf_hdr);
2820
2821 buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
2822 buf_block = BufHdrGetBlock(existing_hdr);
2823
2824 if (valid && !PageIsNew((Page) buf_block))
2825 ereport(ERROR,
2826 (errmsg("unexpected data beyond EOF in block %u of relation \"%s\"",
2827 existing_hdr->tag.blockNum,
2828 relpath(BMR_GET_SMGR(bmr)->smgr_rlocator, fork).str)));
2829
2830 /*
2831 * We *must* do smgr[zero]extend before succeeding, else the page
2832 * will not be reserved by the kernel, and the next P_NEW call
2833 * will decide to return the same page. Clear the BM_VALID bit,
2834 * do StartBufferIO() and proceed.
2835 *
2836 * Loop to handle the very small possibility that someone re-sets
2837 * BM_VALID between our clearing it and StartBufferIO inspecting
2838 * it.
2839 */
2840 do
2841 {
2842 pg_atomic_fetch_and_u32(&existing_hdr->state, ~BM_VALID);
2843 } while (!StartBufferIO(existing_hdr, true, false));
2844 }
2845 else
2846 {
2847 uint32 buf_state;
2848 uint32 set_bits = 0;
2849
2850 buf_state = LockBufHdr(victim_buf_hdr);
2851
2852 /* some sanity checks while we hold the buffer header lock */
2853 Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
2854 Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2855
2856 victim_buf_hdr->tag = tag;
2857
2858 set_bits |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
2859 if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
2860 set_bits |= BM_PERMANENT;
2861
2862 UnlockBufHdrExt(victim_buf_hdr, buf_state,
2863 set_bits, 0,
2864 0);
2865
2866 LWLockRelease(partition_lock);
2867
2868 /* XXX: could combine the locked operations in it with the above */
2869 StartBufferIO(victim_buf_hdr, true, false);
2870 }
2871 }
2872
2874
2875 /*
2876 * Note: if smgrzeroextend fails, we will end up with buffers that are
2877 * allocated but not marked BM_VALID. The next relation extension will
2878 * still select the same block number (because the relation didn't get any
2879 * longer on disk) and so future attempts to extend the relation will find
2880 * the same buffers (if they have not been recycled) but come right back
2881 * here to try smgrzeroextend again.
2882 *
2883 * We don't need to set checksum for all-zero pages.
2884 */
2885 smgrzeroextend(BMR_GET_SMGR(bmr), fork, first_block, extend_by, false);
2886
2887 /*
2888 * Release the file-extension lock; it's now OK for someone else to extend
2889 * the relation some more.
2890 *
2891 * We remove IO_IN_PROGRESS after this, as waking up waiting backends can
2892 * take noticeable time.
2893 */
2894 if (!(flags & EB_SKIP_EXTENSION_LOCK))
2896
2898 io_start, 1, extend_by * BLCKSZ);
2899
2900 /* Set BM_VALID, terminate IO, and wake up any waiters */
2901 for (uint32 i = 0; i < extend_by; i++)
2902 {
2903 Buffer buf = buffers[i];
2904 BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
2905 bool lock = false;
2906
2907 if (flags & EB_LOCK_FIRST && i == 0)
2908 lock = true;
2909 else if (flags & EB_LOCK_TARGET)
2910 {
2911 Assert(extend_upto != InvalidBlockNumber);
2912 if (first_block + i + 1 == extend_upto)
2913 lock = true;
2914 }
2915
2916 if (lock)
2918
2919 TerminateBufferIO(buf_hdr, false, BM_VALID, true, false);
2920 }
2921
2923
2924 *extended_by = extend_by;
2925
2926 return first_block;
2927}
static uint32 pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
Definition: atomics.h:396
#define MaxBlockNumber
Definition: block.h:35
#define BM_JUST_DIRTIED
Definition: buf_internals.h:74
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
#define BufHdrGetBlock(bufHdr)
Definition: bufmgr.c:72
void LimitAdditionalPins(uint32 *additional_pins)
Definition: bufmgr.c:2607
bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait)
Definition: bufmgr.c:6141
void * Block
Definition: bufmgr.h:26
@ EB_LOCK_TARGET
Definition: bufmgr.h:93
@ EB_CLEAR_SIZE_CACHE
Definition: bufmgr.h:90
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:75
@ EB_LOCK_FIRST
Definition: bufmgr.h:87
static bool PageIsNew(const PageData *page)
Definition: bufpage.h:233
#define MemSet(start, val, len)
Definition: c.h:1011
const char * str
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:424
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:474
#define ExclusiveLock
Definition: lockdefs.h:42
@ IOOP_EXTEND
Definition: pgstat.h:314
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
#define relpath(rlocator, forknum)
Definition: relpath.h:150
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:819
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
Definition: smgr.c:649
int64 shared_blks_written
Definition: instrument.h:29

References Assert(), buftag::blockNum, BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BM_TAG_VALID, BM_VALID, BMR_GET_SMGR, buf, BufferDesc::buf_id, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_ONE, BUFFER_LOCK_EXCLUSIVE, BufferDescriptorGetBuffer(), BufHdrGetBlock, BufMappingPartitionLock(), BufTableHashCode(), BufTableInsert(), CurrentResourceOwner, EB_CLEAR_SIZE_CACHE, EB_LOCK_FIRST, EB_LOCK_TARGET, EB_SKIP_EXTENSION_LOCK, ereport, errcode(), errmsg(), ERROR, ExclusiveLock, GetBufferDescriptor(), GetVictimBuffer(), hash(), i, INIT_FORKNUM, InitBufferTag(), InvalidBlockNumber, IOContextForStrategy(), IOOBJECT_RELATION, IOOP_EXTEND, LimitAdditionalPins(), LockBuffer(), LockBufHdr(), LockRelationForExtension(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MaxBlockNumber, MemSet, PageIsNew(), pg_atomic_fetch_and_u32(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), PinBuffer(), BufferManagerRelation::rel, relpath, BufferManagerRelation::relpersistence, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_written, smgrnblocks(), smgrzeroextend(), StartBufferIO(), BufferDesc::state, str, BufferDesc::tag, TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), UnlockRelationForExtension(), and UnpinBuffer().

Referenced by ExtendBufferedRelCommon().

◆ ExtendBufferedRelTo()

Buffer ExtendBufferedRelTo ( BufferManagerRelation  bmr,
ForkNumber  fork,
BufferAccessStrategy  strategy,
uint32  flags,
BlockNumber  extend_to,
ReadBufferMode  mode 
)

Definition at line 1000 of file bufmgr.c.

1006{
1008 uint32 extended_by = 0;
1009 Buffer buffer = InvalidBuffer;
1010 Buffer buffers[64];
1011
1012 Assert((bmr.rel != NULL) != (bmr.smgr != NULL));
1013 Assert(bmr.smgr == NULL || bmr.relpersistence != '\0');
1014 Assert(extend_to != InvalidBlockNumber && extend_to > 0);
1015
1016 if (bmr.relpersistence == '\0')
1017 bmr.relpersistence = bmr.rel->rd_rel->relpersistence;
1018
1019 /*
1020 * If desired, create the file if it doesn't exist. If
1021 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
1022 * an smgrexists call.
1023 */
1024 if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
1025 (BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == 0 ||
1026 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
1027 !smgrexists(BMR_GET_SMGR(bmr), fork))
1028 {
1030
1031 /* recheck, fork might have been created concurrently */
1032 if (!smgrexists(BMR_GET_SMGR(bmr), fork))
1033 smgrcreate(BMR_GET_SMGR(bmr), fork, flags & EB_PERFORMING_RECOVERY);
1034
1036 }
1037
1038 /*
1039 * If requested, invalidate size cache, so that smgrnblocks asks the
1040 * kernel.
1041 */
1042 if (flags & EB_CLEAR_SIZE_CACHE)
1043 BMR_GET_SMGR(bmr)->smgr_cached_nblocks[fork] = InvalidBlockNumber;
1044
1045 /*
1046 * Estimate how many pages we'll need to extend by. This avoids acquiring
1047 * unnecessarily many victim buffers.
1048 */
1050
1051 /*
1052 * Since no-one else can be looking at the page contents yet, there is no
1053 * difference between an exclusive lock and a cleanup-strength lock. Note
1054 * that we pass the original mode to ReadBuffer_common() below, when
1055 * falling back to reading the buffer to a concurrent relation extension.
1056 */
1058 flags |= EB_LOCK_TARGET;
1059
1060 while (current_size < extend_to)
1061 {
1062 uint32 num_pages = lengthof(buffers);
1063 BlockNumber first_block;
1064
1065 if ((uint64) current_size + num_pages > extend_to)
1066 num_pages = extend_to - current_size;
1067
1068 first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags,
1069 num_pages, extend_to,
1070 buffers, &extended_by);
1071
1072 current_size = first_block + extended_by;
1073 Assert(num_pages != 0 || current_size >= extend_to);
1074
1075 for (uint32 i = 0; i < extended_by; i++)
1076 {
1077 if (first_block + i != extend_to - 1)
1078 ReleaseBuffer(buffers[i]);
1079 else
1080 buffer = buffers[i];
1081 }
1082 }
1083
1084 /*
1085 * It's possible that another backend concurrently extended the relation.
1086 * In that case read the buffer.
1087 *
1088 * XXX: Should we control this via a flag?
1089 */
1090 if (buffer == InvalidBuffer)
1091 {
1092 Assert(extended_by == 0);
1093 buffer = ReadBuffer_common(bmr.rel, BMR_GET_SMGR(bmr), bmr.relpersistence,
1094 fork, extend_to - 1, mode, strategy);
1095 }
1096
1097 return buffer;
1098}
static Buffer ReadBuffer_common(Relation rel, SMgrRelation smgr, char smgr_persistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:1268
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5461
@ EB_PERFORMING_RECOVERY
Definition: bufmgr.h:78
@ EB_CREATE_FORK_IF_NEEDED
Definition: bufmgr.h:84
@ RBM_ZERO_AND_CLEANUP_LOCK
Definition: bufmgr.h:49
@ RBM_ZERO_AND_LOCK
Definition: bufmgr.h:47
#define lengthof(array)
Definition: c.h:801
static int64 current_size
Definition: pg_checksums.c:64

References Assert(), BMR_GET_SMGR, PrivateRefCountEntry::buffer, current_size, EB_CLEAR_SIZE_CACHE, EB_CREATE_FORK_IF_NEEDED, EB_LOCK_TARGET, EB_PERFORMING_RECOVERY, ExclusiveLock, ExtendBufferedRelCommon(), i, InvalidBlockNumber, InvalidBuffer, lengthof, LockRelationForExtension(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RelationData::rd_rel, ReadBuffer_common(), BufferManagerRelation::rel, ReleaseBuffer(), BufferManagerRelation::relpersistence, BufferManagerRelation::smgr, smgrcreate(), smgrexists(), smgrnblocks(), and UnlockRelationForExtension().

Referenced by fsm_extend(), vm_extend(), and XLogReadBufferExtended().

◆ FindAndDropRelationBuffers()

static void FindAndDropRelationBuffers ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  nForkBlock,
BlockNumber  firstDelBlock 
)
static

Definition at line 4930 of file bufmgr.c.

4933{
4934 BlockNumber curBlock;
4935
4936 for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
4937 {
4938 uint32 bufHash; /* hash value for tag */
4939 BufferTag bufTag; /* identity of requested block */
4940 LWLock *bufPartitionLock; /* buffer partition lock for it */
4941 int buf_id;
4942 BufferDesc *bufHdr;
4943
4944 /* create a tag so we can lookup the buffer */
4945 InitBufferTag(&bufTag, &rlocator, forkNum, curBlock);
4946
4947 /* determine its hash code and partition lock ID */
4948 bufHash = BufTableHashCode(&bufTag);
4949 bufPartitionLock = BufMappingPartitionLock(bufHash);
4950
4951 /* Check that it is in the buffer pool. If not, do nothing. */
4952 LWLockAcquire(bufPartitionLock, LW_SHARED);
4953 buf_id = BufTableLookup(&bufTag, bufHash);
4954 LWLockRelease(bufPartitionLock);
4955
4956 if (buf_id < 0)
4957 continue;
4958
4959 bufHdr = GetBufferDescriptor(buf_id);
4960
4961 /*
4962 * We need to lock the buffer header and recheck if the buffer is
4963 * still associated with the same block because the buffer could be
4964 * evicted by some other backend loading blocks for a different
4965 * relation after we release lock on the BufMapping table.
4966 */
4967 LockBufHdr(bufHdr);
4968
4969 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
4970 BufTagGetForkNum(&bufHdr->tag) == forkNum &&
4971 bufHdr->tag.blockNum >= firstDelBlock)
4972 InvalidateBuffer(bufHdr); /* releases spinlock */
4973 else
4974 UnlockBufHdr(bufHdr);
4975 }
4976}

References buftag::blockNum, BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), InitBufferTag(), InvalidateBuffer(), LockBufHdr(), LW_SHARED, LWLockAcquire(), LWLockRelease(), BufferDesc::tag, and UnlockBufHdr().

Referenced by DropRelationBuffers(), and DropRelationsAllBuffers().

◆ FlushBuffer()

static void FlushBuffer ( BufferDesc buf,
SMgrRelation  reln,
IOObject  io_object,
IOContext  io_context 
)
static

Definition at line 4378 of file bufmgr.c.

4380{
4381 XLogRecPtr recptr;
4382 ErrorContextCallback errcallback;
4383 instr_time io_start;
4384 Block bufBlock;
4385 char *bufToWrite;
4386 uint32 buf_state;
4387
4388 /*
4389 * Try to start an I/O operation. If StartBufferIO returns false, then
4390 * someone else flushed the buffer before we could, so we need not do
4391 * anything.
4392 */
4393 if (!StartBufferIO(buf, false, false))
4394 return;
4395
4396 /* Setup error traceback support for ereport() */
4398 errcallback.arg = buf;
4399 errcallback.previous = error_context_stack;
4400 error_context_stack = &errcallback;
4401
4402 /* Find smgr relation for buffer */
4403 if (reln == NULL)
4405
4406 TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag),
4407 buf->tag.blockNum,
4411
4412 buf_state = LockBufHdr(buf);
4413
4414 /*
4415 * Run PageGetLSN while holding header lock, since we don't have the
4416 * buffer locked exclusively in all cases.
4417 */
4418 recptr = BufferGetLSN(buf);
4419
4420 /* To check if block content changes while flushing. - vadim 01/17/97 */
4421 UnlockBufHdrExt(buf, buf_state,
4422 0, BM_JUST_DIRTIED,
4423 0);
4424
4425 /*
4426 * Force XLOG flush up to buffer's LSN. This implements the basic WAL
4427 * rule that log updates must hit disk before any of the data-file changes
4428 * they describe do.
4429 *
4430 * However, this rule does not apply to unlogged relations, which will be
4431 * lost after a crash anyway. Most unlogged relation pages do not bear
4432 * LSNs since we never emit WAL records for them, and therefore flushing
4433 * up through the buffer LSN would be useless, but harmless. However,
4434 * GiST indexes use LSNs internally to track page-splits, and therefore
4435 * unlogged GiST pages bear "fake" LSNs generated by
4436 * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
4437 * LSN counter could advance past the WAL insertion point; and if it did
4438 * happen, attempting to flush WAL through that location would fail, with
4439 * disastrous system-wide consequences. To make sure that can't happen,
4440 * skip the flush if the buffer isn't permanent.
4441 */
4442 if (buf_state & BM_PERMANENT)
4443 XLogFlush(recptr);
4444
4445 /*
4446 * Now it's safe to write the buffer to disk. Note that no one else should
4447 * have been able to write it, while we were busy with log flushing,
4448 * because we got the exclusive right to perform I/O by setting the
4449 * BM_IO_IN_PROGRESS bit.
4450 */
4451 bufBlock = BufHdrGetBlock(buf);
4452
4453 /*
4454 * Update page checksum if desired. Since we have only shared lock on the
4455 * buffer, other processes might be updating hint bits in it, so we must
4456 * copy the page to private storage if we do checksumming.
4457 */
4458 bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
4459
4461
4462 /*
4463 * bufToWrite is either the shared buffer or a copy, as appropriate.
4464 */
4465 smgrwrite(reln,
4466 BufTagGetForkNum(&buf->tag),
4467 buf->tag.blockNum,
4468 bufToWrite,
4469 false);
4470
4471 /*
4472 * When a strategy is in use, only flushes of dirty buffers already in the
4473 * strategy ring are counted as strategy writes (IOCONTEXT
4474 * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
4475 * statistics tracking.
4476 *
4477 * If a shared buffer initially added to the ring must be flushed before
4478 * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
4479 *
4480 * If a shared buffer which was added to the ring later because the
4481 * current strategy buffer is pinned or in use or because all strategy
4482 * buffers were dirty and rejected (for BAS_BULKREAD operations only)
4483 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
4484 * (from_ring will be false).
4485 *
4486 * When a strategy is not in use, the write can only be a "regular" write
4487 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
4488 */
4490 IOOP_WRITE, io_start, 1, BLCKSZ);
4491
4493
4494 /*
4495 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
4496 * end the BM_IO_IN_PROGRESS state.
4497 */
4498 TerminateBufferIO(buf, true, 0, true, false);
4499
4500 TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag),
4501 buf->tag.blockNum,
4505
4506 /* Pop the error context stack */
4507 error_context_stack = errcallback.previous;
4508}
#define BufferGetLSN(bufHdr)
Definition: bufmgr.c:73
static void shared_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:6300
char * PageSetChecksumCopy(Page page, BlockNumber blkno)
Definition: bufpage.c:1509
ErrorContextCallback * error_context_stack
Definition: elog.c:95
@ IOOP_WRITE
Definition: pgstat.h:316
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.h:131
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2783

References ErrorContextCallback::arg, BM_JUST_DIRTIED, BM_PERMANENT, buf, BufferGetLSN, BufHdrGetBlock, BufTagGetForkNum(), BufTagGetRelFileLocator(), ErrorContextCallback::callback, RelFileLocator::dbOid, error_context_stack, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITE, RelFileLocatorBackend::locator, LockBufHdr(), PageSetChecksumCopy(), pgBufferUsage, pgstat_count_io_op_time(), pgstat_prepare_io_time(), ErrorContextCallback::previous, RelFileLocator::relNumber, BufferUsage::shared_blks_written, shared_buffer_write_error_callback(), SMgrRelationData::smgr_rlocator, smgropen(), smgrwrite(), RelFileLocator::spcOid, StartBufferIO(), TerminateBufferIO(), track_io_timing, UnlockBufHdrExt(), and XLogFlush().

Referenced by FlushOneBuffer(), FlushUnlockedBuffer(), and GetVictimBuffer().

◆ FlushDatabaseBuffers()

void FlushDatabaseBuffers ( Oid  dbid)

Definition at line 5401 of file bufmgr.c.

5402{
5403 int i;
5404 BufferDesc *bufHdr;
5405
5406 for (i = 0; i < NBuffers; i++)
5407 {
5408 uint32 buf_state;
5409
5410 bufHdr = GetBufferDescriptor(i);
5411
5412 /*
5413 * As in DropRelationBuffers, an unlocked precheck should be safe and
5414 * saves some cycles.
5415 */
5416 if (bufHdr->tag.dbOid != dbid)
5417 continue;
5418
5419 /* Make sure we can handle the pin */
5422
5423 buf_state = LockBufHdr(bufHdr);
5424 if (bufHdr->tag.dbOid == dbid &&
5425 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5426 {
5427 PinBuffer_Locked(bufHdr);
5429 UnpinBuffer(bufHdr);
5430 }
5431 else
5432 UnlockBufHdr(bufHdr);
5433 }
5434}

References BM_DIRTY, BM_VALID, CurrentResourceOwner, buftag::dbOid, FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), NBuffers, PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by dbase_redo().

◆ FlushOneBuffer()

void FlushOneBuffer ( Buffer  buffer)

Definition at line 5441 of file bufmgr.c.

5442{
5443 BufferDesc *bufHdr;
5444
5445 /* currently not needed, but no fundamental reason not to support */
5446 Assert(!BufferIsLocal(buffer));
5447
5448 Assert(BufferIsPinned(buffer));
5449
5450 bufHdr = GetBufferDescriptor(buffer - 1);
5451
5452 Assert(BufferIsLockedByMe(buffer));
5453
5455}
static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context)
Definition: bufmgr.c:4378
bool BufferIsLockedByMe(Buffer buffer)
Definition: bufmgr.c:2937

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsLockedByMe(), BufferIsPinned, FlushBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, and IOOBJECT_RELATION.

Referenced by hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), invalidate_rel_block(), and XLogReadBufferForRedoExtended().

◆ FlushRelationBuffers()

void FlushRelationBuffers ( Relation  rel)

Definition at line 5037 of file bufmgr.c.

5038{
5039 int i;
5040 BufferDesc *bufHdr;
5041 SMgrRelation srel = RelationGetSmgr(rel);
5042
5043 if (RelationUsesLocalBuffers(rel))
5044 {
5045 for (i = 0; i < NLocBuffer; i++)
5046 {
5047 uint32 buf_state;
5048
5049 bufHdr = GetLocalBufferDescriptor(i);
5050 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5051 ((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
5052 (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5053 {
5054 ErrorContextCallback errcallback;
5055
5056 /* Setup error traceback support for ereport() */
5058 errcallback.arg = bufHdr;
5059 errcallback.previous = error_context_stack;
5060 error_context_stack = &errcallback;
5061
5062 /* Make sure we can handle the pin */
5065
5066 /*
5067 * Pin/unpin mostly to make valgrind work, but it also seems
5068 * like the right thing to do.
5069 */
5070 PinLocalBuffer(bufHdr, false);
5071
5072
5073 FlushLocalBuffer(bufHdr, srel);
5074
5076
5077 /* Pop the error context stack */
5078 error_context_stack = errcallback.previous;
5079 }
5080 }
5081
5082 return;
5083 }
5084
5085 for (i = 0; i < NBuffers; i++)
5086 {
5087 uint32 buf_state;
5088
5089 bufHdr = GetBufferDescriptor(i);
5090
5091 /*
5092 * As in DropRelationBuffers, an unlocked precheck should be safe and
5093 * saves some cycles.
5094 */
5095 if (!BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator))
5096 continue;
5097
5098 /* Make sure we can handle the pin */
5101
5102 buf_state = LockBufHdr(bufHdr);
5103 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
5104 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5105 {
5106 PinBuffer_Locked(bufHdr);
5108 UnpinBuffer(bufHdr);
5109 }
5110 else
5111 UnlockBufHdr(bufHdr);
5112 }
5113}
static void local_buffer_write_error_callback(void *arg)
Definition: bufmgr.c:6316
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
Definition: localbuf.c:183
void UnpinLocalBuffer(Buffer buffer)
Definition: localbuf.c:841
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
Definition: localbuf.c:805
int NLocBuffer
Definition: localbuf.c:45
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:577

References ErrorContextCallback::arg, BM_DIRTY, BM_VALID, BufferDescriptorGetBuffer(), BufTagMatchesRelFileLocator(), ErrorContextCallback::callback, CurrentResourceOwner, error_context_stack, FlushLocalBuffer(), FlushUnlockedBuffer(), GetBufferDescriptor(), GetLocalBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, local_buffer_write_error_callback(), LockBufHdr(), NBuffers, NLocBuffer, pg_atomic_read_u32(), PinBuffer_Locked(), PinLocalBuffer(), ErrorContextCallback::previous, RelationData::rd_locator, RelationGetSmgr(), RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by fill_seq_with_data(), heapam_relation_copy_data(), and index_copy_data().

◆ FlushRelationsAllBuffers()

void FlushRelationsAllBuffers ( SMgrRelation smgrs,
int  nrels 
)

Definition at line 5125 of file bufmgr.c.

5126{
5127 int i;
5128 SMgrSortArray *srels;
5129 bool use_bsearch;
5130
5131 if (nrels == 0)
5132 return;
5133
5134 /* fill-in array for qsort */
5135 srels = palloc_array(SMgrSortArray, nrels);
5136
5137 for (i = 0; i < nrels; i++)
5138 {
5139 Assert(!RelFileLocatorBackendIsTemp(smgrs[i]->smgr_rlocator));
5140
5141 srels[i].rlocator = smgrs[i]->smgr_rlocator.locator;
5142 srels[i].srel = smgrs[i];
5143 }
5144
5145 /*
5146 * Save the bsearch overhead for low number of relations to sync. See
5147 * DropRelationsAllBuffers for details.
5148 */
5149 use_bsearch = nrels > RELS_BSEARCH_THRESHOLD;
5150
5151 /* sort the list of SMgrRelations if necessary */
5152 if (use_bsearch)
5153 qsort(srels, nrels, sizeof(SMgrSortArray), rlocator_comparator);
5154
5155 for (i = 0; i < NBuffers; i++)
5156 {
5157 SMgrSortArray *srelent = NULL;
5158 BufferDesc *bufHdr = GetBufferDescriptor(i);
5159 uint32 buf_state;
5160
5161 /*
5162 * As in DropRelationBuffers, an unlocked precheck should be safe and
5163 * saves some cycles.
5164 */
5165
5166 if (!use_bsearch)
5167 {
5168 int j;
5169
5170 for (j = 0; j < nrels; j++)
5171 {
5172 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srels[j].rlocator))
5173 {
5174 srelent = &srels[j];
5175 break;
5176 }
5177 }
5178 }
5179 else
5180 {
5181 RelFileLocator rlocator;
5182
5183 rlocator = BufTagGetRelFileLocator(&bufHdr->tag);
5184 srelent = bsearch(&rlocator,
5185 srels, nrels, sizeof(SMgrSortArray),
5187 }
5188
5189 /* buffer doesn't belong to any of the given relfilelocators; skip it */
5190 if (srelent == NULL)
5191 continue;
5192
5193 /* Make sure we can handle the pin */
5196
5197 buf_state = LockBufHdr(bufHdr);
5198 if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) &&
5199 (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
5200 {
5201 PinBuffer_Locked(bufHdr);
5203 UnpinBuffer(bufHdr);
5204 }
5205 else
5206 UnlockBufHdr(bufHdr);
5207 }
5208
5209 pfree(srels);
5210}
SMgrRelation srel
Definition: bufmgr.c:162
RelFileLocator rlocator
Definition: bufmgr.c:161

References Assert(), BM_DIRTY, BM_VALID, BufTagGetRelFileLocator(), BufTagMatchesRelFileLocator(), CurrentResourceOwner, FlushUnlockedBuffer(), GetBufferDescriptor(), i, IOCONTEXT_NORMAL, IOOBJECT_RELATION, j, RelFileLocatorBackend::locator, LockBufHdr(), NBuffers, palloc_array, pfree(), PinBuffer_Locked(), qsort, RelFileLocatorBackendIsTemp, RELS_BSEARCH_THRESHOLD, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), SMgrSortArray::rlocator, rlocator_comparator(), SMgrRelationData::smgr_rlocator, SMgrSortArray::srel, BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by smgrdosyncall().

◆ FlushUnlockedBuffer()

◆ ForgetPrivateRefCountEntry()

static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry ref)
static

Definition at line 540 of file bufmgr.c.

541{
542 Assert(ref->data.refcount == 0);
543
544 if (ref >= &PrivateRefCountArray[0] &&
546 {
547 ref->buffer = InvalidBuffer;
549
550
551 /*
552 * Mark the just used entry as reserved - in many scenarios that
553 * allows us to avoid ever having to search the array/hash for free
554 * entries.
555 */
557 }
558 else
559 {
560 bool found;
561 Buffer buffer = ref->buffer;
562
564 Assert(found);
567 }
568}
static int ReservedRefCountSlot
Definition: bufmgr.c:243
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:952
@ HASH_REMOVE
Definition: hsearch.h:115
PrivateRefCountData data
Definition: bufmgr.c:118

References Assert(), PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, HASH_REMOVE, hash_search(), InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountData::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by UnpinBufferNoOwner().

◆ GetAdditionalPinLimit()

uint32 GetAdditionalPinLimit ( void  )

Definition at line 2581 of file bufmgr.c.

2582{
2583 uint32 estimated_pins_held;
2584
2585 /*
2586 * We get the number of "overflowed" pins for free, but don't know the
2587 * number of pins in PrivateRefCountArray. The cost of calculating that
2588 * exactly doesn't seem worth it, so just assume the max.
2589 */
2590 estimated_pins_held = PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
2591
2592 /* Is this backend already holding more than its fair share? */
2593 if (estimated_pins_held > MaxProportionalPins)
2594 return 0;
2595
2596 return MaxProportionalPins - estimated_pins_held;
2597}
static uint32 MaxProportionalPins
Definition: bufmgr.c:246

References MaxProportionalPins, PrivateRefCountOverflowed, and REFCOUNT_ARRAY_ENTRIES.

Referenced by LimitAdditionalPins(), and read_stream_start_pending_read().

◆ GetPinLimit()

uint32 GetPinLimit ( void  )

Definition at line 2569 of file bufmgr.c.

2570{
2571 return MaxProportionalPins;
2572}

References MaxProportionalPins.

Referenced by GetAccessStrategy(), and read_stream_begin_impl().

◆ GetPrivateRefCount()

static int32 GetPrivateRefCount ( Buffer  buffer)
inlinestatic

Definition at line 517 of file bufmgr.c.

518{
520
521 Assert(BufferIsValid(buffer));
522 Assert(!BufferIsLocal(buffer));
523
524 /*
525 * Not moving the entry - that's ok for the current users, but we might
526 * want to change this one day.
527 */
528 ref = GetPrivateRefCountEntry(buffer, false);
529
530 if (ref == NULL)
531 return 0;
532 return ref->data.refcount;
533}
static PrivateRefCountEntry * GetPrivateRefCountEntry(Buffer buffer, bool do_move)
Definition: bufmgr.c:482

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), PrivateRefCountEntry::data, GetPrivateRefCountEntry(), and PrivateRefCountData::refcount.

Referenced by CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DebugPrintBufferRefcount(), HoldingBufferPinThatDelaysRecovery(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), and MarkBufferDirtyHint().

◆ GetPrivateRefCountEntry()

static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer  buffer,
bool  do_move 
)
inlinestatic

Definition at line 482 of file bufmgr.c.

483{
484 Assert(BufferIsValid(buffer));
485 Assert(!BufferIsLocal(buffer));
486
487 /*
488 * It's very common to look up the same buffer repeatedly. To make that
489 * fast, we have a one-entry cache.
490 *
491 * In contrast to the loop in GetPrivateRefCountEntrySlow(), here it
492 * faster to check PrivateRefCountArray[].buffer, as in the case of a hit
493 * fewer addresses are computed and fewer cachelines are accessed. Whereas
494 * in GetPrivateRefCountEntrySlow()'s case, checking
495 * PrivateRefCountArrayKeys saves a lot of memory accesses.
496 */
497 if (likely(PrivateRefCountEntryLast != -1) &&
499 {
501 }
502
503 /*
504 * The code for the cached lookup is small enough to be worth inlining
505 * into the caller. In the miss case however, that empirically doesn't
506 * seem worth it.
507 */
508 return GetPrivateRefCountEntrySlow(buffer, do_move);
509}
static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move)
Definition: bufmgr.c:393
static int PrivateRefCountEntryLast
Definition: bufmgr.c:244
#define likely(x)
Definition: c.h:417

References Assert(), PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), GetPrivateRefCountEntrySlow(), likely, PrivateRefCountArray, and PrivateRefCountEntryLast.

Referenced by GetPrivateRefCount(), IncrBufferRefCount(), PinBuffer(), PinBuffer_Locked(), and UnpinBufferNoOwner().

◆ GetPrivateRefCountEntrySlow()

static pg_noinline PrivateRefCountEntry * GetPrivateRefCountEntrySlow ( Buffer  buffer,
bool  do_move 
)
static

Definition at line 393 of file bufmgr.c.

394{
396 int match = -1;
397 int i;
398
399 /*
400 * First search for references in the array, that'll be sufficient in the
401 * majority of cases.
402 */
403 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
404 {
405 if (PrivateRefCountArrayKeys[i] == buffer)
406 {
407 match = i;
408 /* see ReservePrivateRefCountEntry() for why we don't return */
409 }
410 }
411
412 if (likely(match != -1))
413 {
414 /* update cache for the next lookup */
416
417 return &PrivateRefCountArray[match];
418 }
419
420 /*
421 * By here we know that the buffer, if already pinned, isn't residing in
422 * the array.
423 *
424 * Only look up the buffer in the hashtable if we've previously overflowed
425 * into it.
426 */
428 return NULL;
429
430 res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL);
431
432 if (res == NULL)
433 return NULL;
434 else if (!do_move)
435 {
436 /* caller doesn't want us to move the hash entry into the array */
437 return res;
438 }
439 else
440 {
441 /* move buffer from hashtable into the free array slot */
442 bool found;
444
445 /* Ensure there's a free array slot */
447
448 /* Use up the reserved slot */
452 Assert(free->buffer == InvalidBuffer);
453
454 /* and fill it */
455 free->buffer = buffer;
456 free->data = res->data;
458 /* update cache for the next lookup */
460
462
463
464 /* delete from hashtable */
466 Assert(found);
469
470 return free;
471 }
472}
#define free(a)
Definition: header.h:65
@ HASH_FIND
Definition: hsearch.h:113

References Assert(), PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, free, HASH_FIND, HASH_REMOVE, hash_search(), i, InvalidBuffer, likely, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountHash, PrivateRefCountOverflowed, REFCOUNT_ARRAY_ENTRIES, ReservedRefCountSlot, and ReservePrivateRefCountEntry().

Referenced by GetPrivateRefCountEntry().

◆ GetVictimBuffer()

static Buffer GetVictimBuffer ( BufferAccessStrategy  strategy,
IOContext  io_context 
)
static

Definition at line 2414 of file bufmgr.c.

2415{
2416 BufferDesc *buf_hdr;
2417 Buffer buf;
2418 uint32 buf_state;
2419 bool from_ring;
2420
2421 /*
2422 * Ensure, before we pin a victim buffer, that there's a free refcount
2423 * entry and resource owner slot for the pin.
2424 */
2427
2428 /* we return here if a prospective victim buffer gets used concurrently */
2429again:
2430
2431 /*
2432 * Select a victim buffer. The buffer is returned pinned and owned by
2433 * this backend.
2434 */
2435 buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
2436 buf = BufferDescriptorGetBuffer(buf_hdr);
2437
2438 /*
2439 * We shouldn't have any other pins for this buffer.
2440 */
2442
2443 /*
2444 * If the buffer was dirty, try to write it out. There is a race
2445 * condition here, in that someone might dirty it after we released the
2446 * buffer header lock above, or even while we are writing it out (since
2447 * our share-lock won't prevent hint-bit updates). We will recheck the
2448 * dirty bit after re-locking the buffer header.
2449 */
2450 if (buf_state & BM_DIRTY)
2451 {
2452 LWLock *content_lock;
2453
2454 Assert(buf_state & BM_TAG_VALID);
2455 Assert(buf_state & BM_VALID);
2456
2457 /*
2458 * We need a share-lock on the buffer contents to write it out (else
2459 * we might write invalid data, eg because someone else is compacting
2460 * the page contents while we write). We must use a conditional lock
2461 * acquisition here to avoid deadlock. Even though the buffer was not
2462 * pinned (and therefore surely not locked) when StrategyGetBuffer
2463 * returned it, someone else could have pinned and exclusive-locked it
2464 * by the time we get here. If we try to get the lock unconditionally,
2465 * we'd block waiting for them; if they later block waiting for us,
2466 * deadlock ensues. (This has been observed to happen when two
2467 * backends are both trying to split btree index pages, and the second
2468 * one just happens to be trying to split the page the first one got
2469 * from StrategyGetBuffer.)
2470 */
2471 content_lock = BufferDescriptorGetContentLock(buf_hdr);
2472 if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
2473 {
2474 /*
2475 * Someone else has locked the buffer, so give it up and loop back
2476 * to get another one.
2477 */
2478 UnpinBuffer(buf_hdr);
2479 goto again;
2480 }
2481
2482 /*
2483 * If using a nondefault strategy, and writing the buffer would
2484 * require a WAL flush, let the strategy decide whether to go ahead
2485 * and write/reuse the buffer or to choose another victim. We need a
2486 * lock to inspect the page LSN, so this can't be done inside
2487 * StrategyGetBuffer.
2488 */
2489 if (strategy != NULL)
2490 {
2491 XLogRecPtr lsn;
2492
2493 /* Read the LSN while holding buffer header lock */
2494 buf_state = LockBufHdr(buf_hdr);
2495 lsn = BufferGetLSN(buf_hdr);
2496 UnlockBufHdr(buf_hdr);
2497
2498 if (XLogNeedsFlush(lsn)
2499 && StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2500 {
2501 LWLockRelease(content_lock);
2502 UnpinBuffer(buf_hdr);
2503 goto again;
2504 }
2505 }
2506
2507 /* OK, do the I/O */
2508 FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
2509 LWLockRelease(content_lock);
2510
2512 &buf_hdr->tag);
2513 }
2514
2515
2516 if (buf_state & BM_VALID)
2517 {
2518 /*
2519 * When a BufferAccessStrategy is in use, blocks evicted from shared
2520 * buffers are counted as IOOP_EVICT in the corresponding context
2521 * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2522 * strategy in two cases: 1) while initially claiming buffers for the
2523 * strategy ring 2) to replace an existing strategy ring buffer
2524 * because it is pinned or in use and cannot be reused.
2525 *
2526 * Blocks evicted from buffers already in the strategy ring are
2527 * counted as IOOP_REUSE in the corresponding strategy context.
2528 *
2529 * At this point, we can accurately count evictions and reuses,
2530 * because we have successfully claimed the valid buffer. Previously,
2531 * we may have been forced to release the buffer due to concurrent
2532 * pinners or erroring out.
2533 */
2535 from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2536 }
2537
2538 /*
2539 * If the buffer has an entry in the buffer mapping table, delete it. This
2540 * can fail because another backend could have pinned or dirtied the
2541 * buffer.
2542 */
2543 if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
2544 {
2545 UnpinBuffer(buf_hdr);
2546 goto again;
2547 }
2548
2549 /* a final set of sanity checks */
2550#ifdef USE_ASSERT_CHECKING
2551 buf_state = pg_atomic_read_u32(&buf_hdr->state);
2552
2553 Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
2554 Assert(!(buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY)));
2555
2557#endif
2558
2559 return buf;
2560}
WritebackContext BackendWritebackContext
Definition: buf_init.c:24
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition: bufmgr.c:5746
void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag)
Definition: bufmgr.c:6513
BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
Definition: freelist.c:174
bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
Definition: freelist.c:787
@ IOOP_EVICT
Definition: pgstat.h:307
@ IOOP_REUSE
Definition: pgstat.h:310
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3129

References Assert(), BackendWritebackContext, BM_DIRTY, BM_TAG_VALID, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferGetLSN, CheckBufferIsPinnedOnce(), CurrentResourceOwner, FlushBuffer(), InvalidateVictimBuffer(), IOOBJECT_RELATION, IOOP_EVICT, IOOP_REUSE, LockBufHdr(), LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), pg_atomic_read_u32(), pgstat_count_io_op(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::state, StrategyGetBuffer(), StrategyRejectBuffer(), BufferDesc::tag, UnlockBufHdr(), UnpinBuffer(), and XLogNeedsFlush().

Referenced by BufferAlloc(), and ExtendBufferedRelShared().

◆ HoldingBufferPinThatDelaysRecovery()

bool HoldingBufferPinThatDelaysRecovery ( void  )

Definition at line 5926 of file bufmgr.c.

5927{
5928 int bufid = GetStartupBufferPinWaitBufId();
5929
5930 /*
5931 * If we get woken slowly then it's possible that the Startup process was
5932 * already woken by other backends before we got here. Also possible that
5933 * we get here by multiple interrupts or interrupts at inappropriate
5934 * times, so make sure we do nothing if the bufid is not set.
5935 */
5936 if (bufid < 0)
5937 return false;
5938
5939 if (GetPrivateRefCount(bufid + 1) > 0)
5940 return true;
5941
5942 return false;
5943}
int GetStartupBufferPinWaitBufId(void)
Definition: proc.c:771

References GetPrivateRefCount(), and GetStartupBufferPinWaitBufId().

Referenced by CheckRecoveryConflictDeadlock(), and ProcessRecoveryConflictInterrupt().

◆ IncrBufferRefCount()

void IncrBufferRefCount ( Buffer  buffer)

◆ InitBufferManagerAccess()

void InitBufferManagerAccess ( void  )

Definition at line 4101 of file bufmgr.c.

4102{
4103 HASHCTL hash_ctl;
4104
4105 /*
4106 * An advisory limit on the number of pins each backend should hold, based
4107 * on shared_buffers and the maximum number of connections possible.
4108 * That's very pessimistic, but outside toy-sized shared_buffers it should
4109 * allow plenty of pins. LimitAdditionalPins() and
4110 * GetAdditionalPinLimit() can be used to check the remaining balance.
4111 */
4113
4114 memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray));
4116
4117 hash_ctl.keysize = sizeof(Buffer);
4118 hash_ctl.entrysize = sizeof(PrivateRefCountEntry);
4119
4120 PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl,
4122
4123 /*
4124 * AtProcExit_Buffers needs LWLock access, and thereby has to be called at
4125 * the corresponding phase of backend shutdown.
4126 */
4127 Assert(MyProc != NULL);
4129}
static void AtProcExit_Buffers(int code, Datum arg)
Definition: bufmgr.c:4136
struct PrivateRefCountEntry PrivateRefCountEntry
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:358
int MaxBackends
Definition: globals.c:146
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
#define NUM_AUXILIARY_PROCS
Definition: proc.h:463
PGPROC * MyProc
Definition: proc.c:67
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76

References Assert(), AtProcExit_Buffers(), HASHCTL::entrysize, HASH_BLOBS, hash_create(), HASH_ELEM, HASHCTL::keysize, MaxBackends, MaxProportionalPins, MyProc, NBuffers, NUM_AUXILIARY_PROCS, on_shmem_exit(), PrivateRefCountArray, PrivateRefCountArrayKeys, and PrivateRefCountHash.

Referenced by BaseInit().

◆ InvalidateBuffer()

static void InvalidateBuffer ( BufferDesc buf)
static

Definition at line 2248 of file bufmgr.c.

2249{
2250 BufferTag oldTag;
2251 uint32 oldHash; /* hash value for oldTag */
2252 LWLock *oldPartitionLock; /* buffer partition lock for it */
2253 uint32 oldFlags;
2254 uint32 buf_state;
2255
2256 /* Save the original buffer tag before dropping the spinlock */
2257 oldTag = buf->tag;
2258
2260
2261 /*
2262 * Need to compute the old tag's hashcode and partition lock ID. XXX is it
2263 * worth storing the hashcode in BufferDesc so we need not recompute it
2264 * here? Probably not.
2265 */
2266 oldHash = BufTableHashCode(&oldTag);
2267 oldPartitionLock = BufMappingPartitionLock(oldHash);
2268
2269retry:
2270
2271 /*
2272 * Acquire exclusive mapping lock in preparation for changing the buffer's
2273 * association.
2274 */
2275 LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
2276
2277 /* Re-lock the buffer header */
2278 buf_state = LockBufHdr(buf);
2279
2280 /* If it's changed while we were waiting for lock, do nothing */
2281 if (!BufferTagsEqual(&buf->tag, &oldTag))
2282 {
2284 LWLockRelease(oldPartitionLock);
2285 return;
2286 }
2287
2288 /*
2289 * We assume the reason for it to be pinned is that either we were
2290 * asynchronously reading the page in before erroring out or someone else
2291 * is flushing the page out. Wait for the IO to finish. (This could be
2292 * an infinite loop if the refcount is messed up... it would be nice to
2293 * time out after awhile, but there seems no way to be sure how many loops
2294 * may be needed. Note that if the other guy has pinned the buffer but
2295 * not yet done StartBufferIO, WaitIO will fall through and we'll
2296 * effectively be busy-looping here.)
2297 */
2298 if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
2299 {
2301 LWLockRelease(oldPartitionLock);
2302 /* safety check: should definitely not be our *own* pin */
2304 elog(ERROR, "buffer is pinned in InvalidateBuffer");
2305 WaitIO(buf);
2306 goto retry;
2307 }
2308
2309 /*
2310 * Clear out the buffer's tag and flags. We must do this to ensure that
2311 * linear scans of the buffer array don't think the buffer is valid.
2312 */
2313 oldFlags = buf_state & BUF_FLAG_MASK;
2314 ClearBufferTag(&buf->tag);
2315
2316 UnlockBufHdrExt(buf, buf_state,
2317 0,
2319 0);
2320
2321 /*
2322 * Remove the buffer from the lookup hashtable, if it was in there.
2323 */
2324 if (oldFlags & BM_TAG_VALID)
2325 BufTableDelete(&oldTag, oldHash);
2326
2327 /*
2328 * Done with mapping lock.
2329 */
2330 LWLockRelease(oldPartitionLock);
2331}
#define BUF_USAGECOUNT_MASK
Definition: buf_internals.h:53
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static void ClearBufferTag(BufferTag *tag)
void BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
Definition: buf_table.c:148
static void WaitIO(BufferDesc *buf)
Definition: bufmgr.c:6062

References BM_TAG_VALID, buf, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), elog, ERROR, GetPrivateRefCount(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), and FindAndDropRelationBuffers().

◆ InvalidateVictimBuffer()

static bool InvalidateVictimBuffer ( BufferDesc buf_hdr)
static

Definition at line 2343 of file bufmgr.c.

2344{
2345 uint32 buf_state;
2346 uint32 hash;
2347 LWLock *partition_lock;
2348 BufferTag tag;
2349
2351
2352 /* have buffer pinned, so it's safe to read tag without lock */
2353 tag = buf_hdr->tag;
2354
2355 hash = BufTableHashCode(&tag);
2356 partition_lock = BufMappingPartitionLock(hash);
2357
2358 LWLockAcquire(partition_lock, LW_EXCLUSIVE);
2359
2360 /* lock the buffer header */
2361 buf_state = LockBufHdr(buf_hdr);
2362
2363 /*
2364 * We have the buffer pinned nobody else should have been able to unset
2365 * this concurrently.
2366 */
2367 Assert(buf_state & BM_TAG_VALID);
2368 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2369 Assert(BufferTagsEqual(&buf_hdr->tag, &tag));
2370
2371 /*
2372 * If somebody else pinned the buffer since, or even worse, dirtied it,
2373 * give up on this buffer: It's clearly in use.
2374 */
2375 if (BUF_STATE_GET_REFCOUNT(buf_state) != 1 || (buf_state & BM_DIRTY))
2376 {
2377 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2378
2379 UnlockBufHdr(buf_hdr);
2380 LWLockRelease(partition_lock);
2381
2382 return false;
2383 }
2384
2385 /*
2386 * Clear out the buffer's tag and flags and usagecount. This is not
2387 * strictly required, as BM_TAG_VALID/BM_VALID needs to be checked before
2388 * doing anything with the buffer. But currently it's beneficial, as the
2389 * cheaper pre-check for several linear scans of shared buffers use the
2390 * tag (see e.g. FlushDatabaseBuffers()).
2391 */
2392 ClearBufferTag(&buf_hdr->tag);
2393 UnlockBufHdrExt(buf_hdr, buf_state,
2394 0,
2396 0);
2397
2398 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2399
2400 /* finally delete buffer from the buffer mapping table */
2401 BufTableDelete(&tag, hash);
2402
2403 LWLockRelease(partition_lock);
2404
2405 buf_state = pg_atomic_read_u32(&buf_hdr->state);
2406 Assert(!(buf_state & (BM_DIRTY | BM_VALID | BM_TAG_VALID)));
2407 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
2409
2410 return true;
2411}

References Assert(), BM_DIRTY, BM_TAG_VALID, BM_VALID, BUF_FLAG_MASK, BUF_STATE_GET_REFCOUNT, BUF_USAGECOUNT_MASK, BufferDescriptorGetBuffer(), BufferTagsEqual(), BufMappingPartitionLock(), BufTableDelete(), BufTableHashCode(), ClearBufferTag(), GetPrivateRefCount(), hash(), LockBufHdr(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), pg_atomic_read_u32(), BufferDesc::state, BufferDesc::tag, UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), and GetVictimBuffer().

◆ IsBufferCleanupOK()

bool IsBufferCleanupOK ( Buffer  buffer)

Definition at line 6010 of file bufmgr.c.

6011{
6012 BufferDesc *bufHdr;
6013 uint32 buf_state;
6014
6015 Assert(BufferIsValid(buffer));
6016
6017 /* see AIO related comment in LockBufferForCleanup() */
6018
6019 if (BufferIsLocal(buffer))
6020 {
6021 /* There should be exactly one pin */
6022 if (LocalRefCount[-buffer - 1] != 1)
6023 return false;
6024 /* Nobody else to wait for */
6025 return true;
6026 }
6027
6028 /* There should be exactly one local pin */
6029 if (GetPrivateRefCount(buffer) != 1)
6030 return false;
6031
6032 bufHdr = GetBufferDescriptor(buffer - 1);
6033
6034 /* caller must hold exclusive lock on buffer */
6036
6037 buf_state = LockBufHdr(bufHdr);
6038
6039 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
6040 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
6041 {
6042 /* pincount is OK. */
6043 UnlockBufHdr(bufHdr);
6044 return true;
6045 }
6046
6047 UnlockBufHdr(bufHdr);
6048 return false;
6049}

References Assert(), BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsValid(), GetBufferDescriptor(), GetPrivateRefCount(), LocalRefCount, LockBufHdr(), and UnlockBufHdr().

Referenced by _hash_doinsert(), _hash_expandtable(), _hash_splitbucket(), and hashbucketcleanup().

◆ IssuePendingWritebacks()

void IssuePendingWritebacks ( WritebackContext wb_context,
IOContext  io_context 
)

Definition at line 6563 of file bufmgr.c.

6564{
6565 instr_time io_start;
6566 int i;
6567
6568 if (wb_context->nr_pending == 0)
6569 return;
6570
6571 /*
6572 * Executing the writes in-order can make them a lot faster, and allows to
6573 * merge writeback requests to consecutive blocks into larger writebacks.
6574 */
6575 sort_pending_writebacks(wb_context->pending_writebacks,
6576 wb_context->nr_pending);
6577
6579
6580 /*
6581 * Coalesce neighbouring writes, but nothing else. For that we iterate
6582 * through the, now sorted, array of pending flushes, and look forward to
6583 * find all neighbouring (or identical) writes.
6584 */
6585 for (i = 0; i < wb_context->nr_pending; i++)
6586 {
6589 SMgrRelation reln;
6590 int ahead;
6591 BufferTag tag;
6592 RelFileLocator currlocator;
6593 Size nblocks = 1;
6594
6595 cur = &wb_context->pending_writebacks[i];
6596 tag = cur->tag;
6597 currlocator = BufTagGetRelFileLocator(&tag);
6598
6599 /*
6600 * Peek ahead, into following writeback requests, to see if they can
6601 * be combined with the current one.
6602 */
6603 for (ahead = 0; i + ahead + 1 < wb_context->nr_pending; ahead++)
6604 {
6605
6606 next = &wb_context->pending_writebacks[i + ahead + 1];
6607
6608 /* different file, stop */
6609 if (!RelFileLocatorEquals(currlocator,
6610 BufTagGetRelFileLocator(&next->tag)) ||
6611 BufTagGetForkNum(&cur->tag) != BufTagGetForkNum(&next->tag))
6612 break;
6613
6614 /* ok, block queued twice, skip */
6615 if (cur->tag.blockNum == next->tag.blockNum)
6616 continue;
6617
6618 /* only merge consecutive writes */
6619 if (cur->tag.blockNum + 1 != next->tag.blockNum)
6620 break;
6621
6622 nblocks++;
6623 cur = next;
6624 }
6625
6626 i += ahead;
6627
6628 /* and finally tell the kernel to write the data to storage */
6629 reln = smgropen(currlocator, INVALID_PROC_NUMBER);
6630 smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
6631 }
6632
6633 /*
6634 * Assume that writeback requests are only issued for buffers containing
6635 * blocks of permanent relations.
6636 */
6638 IOOP_WRITEBACK, io_start, wb_context->nr_pending, 0);
6639
6640 wb_context->nr_pending = 0;
6641}
static int32 next
Definition: blutils.c:224
struct cursor * cur
Definition: ecpg.c:29
@ IOOP_WRITEBACK
Definition: pgstat.h:311
#define RelFileLocatorEquals(locator1, locator2)
void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks)
Definition: smgr.c:805
PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES]

References buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), cur, i, INVALID_PROC_NUMBER, IOOBJECT_RELATION, IOOP_WRITEBACK, next, WritebackContext::nr_pending, WritebackContext::pending_writebacks, pgstat_count_io_op_time(), pgstat_prepare_io_time(), RelFileLocatorEquals, smgropen(), smgrwriteback(), and track_io_timing.

Referenced by BufferSync(), and ScheduleBufferTagForWriteback().

◆ LimitAdditionalPins()

void LimitAdditionalPins ( uint32 additional_pins)

Definition at line 2607 of file bufmgr.c.

2608{
2609 uint32 limit;
2610
2611 if (*additional_pins <= 1)
2612 return;
2613
2614 limit = GetAdditionalPinLimit();
2615 limit = Max(limit, 1);
2616 if (limit < *additional_pins)
2617 *additional_pins = limit;
2618}
uint32 GetAdditionalPinLimit(void)
Definition: bufmgr.c:2581
#define Max(x, y)
Definition: c.h:989

References GetAdditionalPinLimit(), and Max.

Referenced by ExtendBufferedRelShared().

◆ local_buffer_readv_complete()

static PgAioResult local_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 7724 of file bufmgr.c.

7726{
7727 return buffer_readv_complete(ioh, prior_result, cb_data, true);
7728}
static pg_attribute_always_inline PgAioResult buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data, bool is_temp)
Definition: bufmgr.c:7469

References buffer_readv_complete().

◆ local_buffer_readv_stage()

static void local_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 7718 of file bufmgr.c.

7719{
7720 buffer_stage_common(ioh, false, true);
7721}
static pg_attribute_always_inline void buffer_stage_common(PgAioHandle *ioh, bool is_write, bool is_temp)
Definition: bufmgr.c:7077

References buffer_stage_common().

◆ local_buffer_write_error_callback()

static void local_buffer_write_error_callback ( void *  arg)
static

Definition at line 6316 of file bufmgr.c.

6317{
6318 BufferDesc *bufHdr = (BufferDesc *) arg;
6319
6320 if (bufHdr != NULL)
6321 errcontext("writing block %u of relation \"%s\"",
6322 bufHdr->tag.blockNum,
6325 BufTagGetForkNum(&bufHdr->tag)).str);
6326}
#define errcontext
Definition: elog.h:198
void * arg

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, MyProcNumber, relpathbackend, and BufferDesc::tag.

Referenced by FlushRelationBuffers().

◆ LockBuffer()

void LockBuffer ( Buffer  buffer,
BufferLockMode  mode 
)

Definition at line 5699 of file bufmgr.c.

5700{
5701 BufferDesc *buf;
5702
5703 Assert(BufferIsPinned(buffer));
5704 if (BufferIsLocal(buffer))
5705 return; /* local buffers need no lock */
5706
5707 buf = GetBufferDescriptor(buffer - 1);
5708
5709 if (mode == BUFFER_LOCK_UNLOCK)
5711 else if (mode == BUFFER_LOCK_SHARE)
5713 else if (mode == BUFFER_LOCK_EXCLUSIVE)
5715 else
5716 elog(ERROR, "unrecognized buffer lock mode: %d", mode);
5717}

References Assert(), buf, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferDescriptorGetContentLock(), BufferIsLocal, BufferIsPinned, elog, ERROR, GetBufferDescriptor(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), and mode.

Referenced by _bt_lockbuf(), _bt_unlockbuf(), _bt_upgradelockbufcleanup(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_finish_split(), _hash_first(), _hash_freeovflpage(), _hash_getbuf(), _hash_getbuf_with_strategy(), _hash_getcachedmetap(), _hash_init(), _hash_kill_items(), _hash_readnext(), _hash_readpage(), _hash_readprev(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), BitmapHeapScanNextBlock(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), BloomNewBuffer(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_page_cleanup(), bringetbitmap(), brinGetStats(), brinGetTupleForHeapBlock(), brininsert(), brinLockRevmapPageForUpdate(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), brinsummarize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), collect_corrupt_items(), collect_visibility_data(), collectMatchBitmap(), ConditionalLockBufferForCleanup(), count_nondeletable_pages(), create_toy_buffer(), entryLoadMoreItems(), ExtendBufferedRelShared(), FreeSpaceMapPrepareTruncateRel(), fsm_readbuf(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), get_raw_page_internal(), GetVisibilityMapPins(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginEntryInsert(), ginFindLeafPage(), ginFindParents(), ginFinishOldSplit(), ginFinishSplit(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginInsertValue(), GinNewBuffer(), ginScanToDelete(), ginStepRight(), ginTraverseLock(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTreeLeaves(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfinishsplit(), gistfixsplit(), gistformdownlink(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_inplace_lock(), heap_inplace_unlock(), heap_inplace_update_and_unlock(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_opt(), heap_prepare_pagescan(), heap_update(), heap_xlog_visible(), heapam_index_build_range_scan(), heapam_index_fetch_tuple(), heapam_index_validate_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_block(), heapam_scan_sample_next_tuple(), heapam_tuple_satisfies_snapshot(), heapgettup(), initBloomState(), invalidate_rel_block(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), LockBufferForCleanup(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pg_visibility(), pgrowlocks(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), pgstatindex_impl(), read_seq_tuple(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), ScanSourceDatabasePgClass(), shiftList(), spgdoinsert(), spgGetCache(), SpGistNewBuffer(), spgprocesspending(), spgvacuumpage(), spgWalk(), startScanEntry(), statapprox_heap(), summarize_range(), UnlockReleaseBuffer(), update_most_recent_deletion_info(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), vm_readbuf(), XLogReadBufferForRedoExtended(), XLogRecordPageWithFreeSpace(), and ZeroAndLockBuffer().

◆ LockBufferForCleanup()

void LockBufferForCleanup ( Buffer  buffer)

Definition at line 5779 of file bufmgr.c.

5780{
5781 BufferDesc *bufHdr;
5782 TimestampTz waitStart = 0;
5783 bool waiting = false;
5784 bool logged_recovery_conflict = false;
5785
5786 Assert(BufferIsPinned(buffer));
5787 Assert(PinCountWaitBuf == NULL);
5788
5790
5791 /*
5792 * We do not yet need to be worried about in-progress AIOs holding a pin,
5793 * as we, so far, only support doing reads via AIO and this function can
5794 * only be called once the buffer is valid (i.e. no read can be in
5795 * flight).
5796 */
5797
5798 /* Nobody else to wait for */
5799 if (BufferIsLocal(buffer))
5800 return;
5801
5802 bufHdr = GetBufferDescriptor(buffer - 1);
5803
5804 for (;;)
5805 {
5806 uint32 buf_state;
5807 uint32 unset_bits = 0;
5808
5809 /* Try to acquire lock */
5811 buf_state = LockBufHdr(bufHdr);
5812
5813 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5814 if (BUF_STATE_GET_REFCOUNT(buf_state) == 1)
5815 {
5816 /* Successfully acquired exclusive lock with pincount 1 */
5817 UnlockBufHdr(bufHdr);
5818
5819 /*
5820 * Emit the log message if recovery conflict on buffer pin was
5821 * resolved but the startup process waited longer than
5822 * deadlock_timeout for it.
5823 */
5824 if (logged_recovery_conflict)
5826 waitStart, GetCurrentTimestamp(),
5827 NULL, false);
5828
5829 if (waiting)
5830 {
5831 /* reset ps display to remove the suffix if we added one */
5833 waiting = false;
5834 }
5835 return;
5836 }
5837 /* Failed, so mark myself as waiting for pincount 1 */
5838 if (buf_state & BM_PIN_COUNT_WAITER)
5839 {
5840 UnlockBufHdr(bufHdr);
5842 elog(ERROR, "multiple backends attempting to wait for pincount 1");
5843 }
5845 PinCountWaitBuf = bufHdr;
5846 UnlockBufHdrExt(bufHdr, buf_state,
5848 0);
5850
5851 /* Wait to be signaled by UnpinBuffer() */
5852 if (InHotStandby)
5853 {
5854 if (!waiting)
5855 {
5856 /* adjust the process title to indicate that it's waiting */
5857 set_ps_display_suffix("waiting");
5858 waiting = true;
5859 }
5860
5861 /*
5862 * Emit the log message if the startup process is waiting longer
5863 * than deadlock_timeout for recovery conflict on buffer pin.
5864 *
5865 * Skip this if first time through because the startup process has
5866 * not started waiting yet in this case. So, the wait start
5867 * timestamp is set after this logic.
5868 */
5869 if (waitStart != 0 && !logged_recovery_conflict)
5870 {
5872
5873 if (TimestampDifferenceExceeds(waitStart, now,
5875 {
5877 waitStart, now, NULL, true);
5878 logged_recovery_conflict = true;
5879 }
5880 }
5881
5882 /*
5883 * Set the wait start timestamp if logging is enabled and first
5884 * time through.
5885 */
5886 if (log_recovery_conflict_waits && waitStart == 0)
5887 waitStart = GetCurrentTimestamp();
5888
5889 /* Publish the bufid that Startup process waits on */
5890 SetStartupBufferPinWaitBufId(buffer - 1);
5891 /* Set alarm and then wait to be signaled by UnpinBuffer() */
5893 /* Reset the published bufid */
5895 }
5896 else
5897 ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
5898
5899 /*
5900 * Remove flag marking us as waiter. Normally this will not be set
5901 * anymore, but ProcWaitForSignal() can return for other signals as
5902 * well. We take care to only reset the flag if we're the waiter, as
5903 * theoretically another backend could have started waiting. That's
5904 * impossible with the current usages due to table level locking, but
5905 * better be safe.
5906 */
5907 buf_state = LockBufHdr(bufHdr);
5908 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5910 unset_bits |= BM_PIN_COUNT_WAITER;
5911
5912 UnlockBufHdrExt(bufHdr, buf_state,
5913 0, unset_bits,
5914 0);
5915
5916 PinCountWaitBuf = NULL;
5917 /* Loop back and try again */
5918 }
5919}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
#define BM_PIN_COUNT_WAITER
Definition: buf_internals.h:75
static BufferDesc * PinCountWaitBuf
Definition: bufmgr.c:205
int64 TimestampTz
Definition: timestamp.h:39
@ PROCSIG_RECOVERY_CONFLICT_BUFFERPIN
Definition: procsignal.h:47
void set_ps_display_remove_suffix(void)
Definition: ps_status.c:439
void set_ps_display_suffix(const char *suffix)
Definition: ps_status.c:387
int DeadlockTimeout
Definition: proc.c:58
void SetStartupBufferPinWaitBufId(int bufid)
Definition: proc.c:759
void ProcWaitForSignal(uint32 wait_event_info)
Definition: proc.c:1984
void ResolveRecoveryConflictWithBufferPin(void)
Definition: standby.c:793
bool log_recovery_conflict_waits
Definition: standby.c:42
void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting)
Definition: standby.c:274
int wait_backend_pgprocno
static volatile sig_atomic_t waiting
Definition: waiteventset.c:171
#define InHotStandby
Definition: xlogutils.h:60

References Assert(), BM_PIN_COUNT_WAITER, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsLocal, BufferIsPinned, CheckBufferIsPinnedOnce(), DeadlockTimeout, elog, ERROR, GetBufferDescriptor(), GetCurrentTimestamp(), InHotStandby, LockBuffer(), LockBufHdr(), log_recovery_conflict_waits, LogRecoveryConflict(), MyProcNumber, now(), PinCountWaitBuf, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, ProcWaitForSignal(), ResolveRecoveryConflictWithBufferPin(), set_ps_display_remove_suffix(), set_ps_display_suffix(), SetStartupBufferPinWaitBufId(), TimestampDifferenceExceeds(), UnlockBufHdr(), UnlockBufHdrExt(), BufferDesc::wait_backend_pgprocno, and waiting.

Referenced by _bt_upgradelockbufcleanup(), ginVacuumPostingTree(), hashbulkdelete(), heap_force_common(), lazy_scan_heap(), XLogReadBufferForRedoExtended(), and ZeroAndLockBuffer().

◆ LockBufHdr()

uint32 LockBufHdr ( BufferDesc desc)

Definition at line 6359 of file bufmgr.c.

6360{
6361 SpinDelayStatus delayStatus;
6362 uint32 old_buf_state;
6363
6365
6366 init_local_spin_delay(&delayStatus);
6367
6368 while (true)
6369 {
6370 /* set BM_LOCKED flag */
6371 old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED);
6372 /* if it wasn't set before we're OK */
6373 if (!(old_buf_state & BM_LOCKED))
6374 break;
6375 perform_spin_delay(&delayStatus);
6376 }
6377 finish_spin_delay(&delayStatus);
6378 return old_buf_state | BM_LOCKED;
6379}
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:410
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:126
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:186
#define init_local_spin_delay(status)
Definition: s_lock.h:733

References Assert(), BM_LOCKED, BufferDescriptorGetBuffer(), BufferIsLocal, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), pg_atomic_fetch_or_u32(), and BufferDesc::state.

Referenced by AbortBufferIO(), apw_dump_now(), buffer_stage_common(), BufferAlloc(), BufferGetLSNAtomic(), BufferSync(), ConditionalLockBufferForCleanup(), create_toy_buffer(), DropDatabaseBuffers(), DropRelationBuffers(), DropRelationsAllBuffers(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FindAndDropRelationBuffers(), FlushBuffer(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetVictimBuffer(), InvalidateBuffer(), InvalidateVictimBuffer(), IsBufferCleanupOK(), LockBufferForCleanup(), MarkBufferDirtyHint(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), pg_buffercache_os_pages_internal(), pg_buffercache_pages(), StartBufferIO(), SyncOneBuffer(), TerminateBufferIO(), UnlockBuffers(), WaitIO(), and WakePinCountWaiter().

◆ MarkBufferDirty()

void MarkBufferDirty ( Buffer  buffer)

Definition at line 3037 of file bufmgr.c.

3038{
3039 BufferDesc *bufHdr;
3040 uint32 buf_state;
3041 uint32 old_buf_state;
3042
3043 if (!BufferIsValid(buffer))
3044 elog(ERROR, "bad buffer ID: %d", buffer);
3045
3046 if (BufferIsLocal(buffer))
3047 {
3048 MarkLocalBufferDirty(buffer);
3049 return;
3050 }
3051
3052 bufHdr = GetBufferDescriptor(buffer - 1);
3053
3054 Assert(BufferIsPinned(buffer));
3056
3057 /*
3058 * NB: We have to wait for the buffer header spinlock to be not held, as
3059 * TerminateBufferIO() relies on the spinlock.
3060 */
3061 old_buf_state = pg_atomic_read_u32(&bufHdr->state);
3062 for (;;)
3063 {
3064 if (old_buf_state & BM_LOCKED)
3065 old_buf_state = WaitBufHdrUnlocked(bufHdr);
3066
3067 buf_state = old_buf_state;
3068
3069 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
3070 buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
3071
3072 if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
3073 buf_state))
3074 break;
3075 }
3076
3077 /*
3078 * If the buffer was not dirty already, do vacuum accounting.
3079 */
3080 if (!(old_buf_state & BM_DIRTY))
3081 {
3083 if (VacuumCostActive)
3085 }
3086}
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:349
pg_noinline uint32 WaitBufHdrUnlocked(BufferDesc *buf)
Definition: bufmgr.c:6389
int VacuumCostPageDirty
Definition: globals.c:153
void MarkLocalBufferDirty(Buffer buffer)
Definition: localbuf.c:491
int64 shared_blks_dirtied
Definition: instrument.h:28

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_LOCKED, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferIsLocal, BufferIsLockedByMeInMode(), BufferIsPinned, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), MarkLocalBufferDirty(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), pgBufferUsage, BufferUsage::shared_blks_dirtied, BufferDesc::state, VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, and WaitBufHdrUnlocked().

Referenced by _bt_clear_incomplete_split(), _bt_dedup_pass(), _bt_delitems_delete(), _bt_delitems_vacuum(), _bt_getroot(), _bt_insertonpg(), _bt_mark_page_halfdead(), _bt_newlevel(), _bt_restore_meta(), _bt_set_cleanup_info(), _bt_split(), _bt_unlink_halfdead_page(), _hash_addovflpage(), _hash_doinsert(), _hash_expandtable(), _hash_freeovflpage(), _hash_init(), _hash_splitbucket(), _hash_squeezebucket(), _hash_vacuum_one_page(), addLeafTuple(), brin_doinsert(), brin_doupdate(), brin_initialize_empty_new_buffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinRevmapDesummarizeRange(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), createPostingTree(), dataExecPlaceToPageInternal(), dataExecPlaceToPageLeaf(), doPickSplit(), entryExecPlaceToPage(), fill_seq_fork_with_data(), FreeSpaceMapPrepareTruncateRel(), generic_redo(), GenericXLogFinish(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginDeletePage(), ginHeapTupleFastInsert(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginUpdateStats(), ginVacuumPostingTreeLeaf(), gistbuild(), gistbuildempty(), gistdeletepage(), gistplacetopage(), gistprunepage(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), hashbucketcleanup(), hashbulkdelete(), heap_abort_speculative(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_inplace_update_and_unlock(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_page_prune_and_freeze(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), lazy_scan_new_or_empty(), lazy_scan_prune(), lazy_vacuum_heap_page(), log_newpage_range(), MarkDirtyUnpinnedBufferInternal(), moveLeafs(), nextval_internal(), RelationAddBlocks(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), revmap_physical_extend(), saveNodeLink(), seq_redo(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), SpGistUpdateMetaPage(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), vacuumLeafPage(), vacuumLeafRoot(), vacuumRedirectAndPlaceholder(), visibilitymap_clear(), visibilitymap_prepare_truncate(), visibilitymap_set(), visibilitymap_set_vmbits(), writeListPage(), and XLogReadBufferForRedoExtended().

◆ MarkBufferDirtyHint()

void MarkBufferDirtyHint ( Buffer  buffer,
bool  buffer_std 
)

Definition at line 5525 of file bufmgr.c.

5526{
5527 BufferDesc *bufHdr;
5528 Page page = BufferGetPage(buffer);
5529
5530 if (!BufferIsValid(buffer))
5531 elog(ERROR, "bad buffer ID: %d", buffer);
5532
5533 if (BufferIsLocal(buffer))
5534 {
5535 MarkLocalBufferDirty(buffer);
5536 return;
5537 }
5538
5539 bufHdr = GetBufferDescriptor(buffer - 1);
5540
5541 Assert(GetPrivateRefCount(buffer) > 0);
5542 /* here, either share or exclusive lock is OK */
5543 Assert(BufferIsLockedByMe(buffer));
5544
5545 /*
5546 * This routine might get called many times on the same page, if we are
5547 * making the first scan after commit of an xact that added/deleted many
5548 * tuples. So, be as quick as we can if the buffer is already dirty. We
5549 * do this by not acquiring spinlock if it looks like the status bits are
5550 * already set. Since we make this test unlocked, there's a chance we
5551 * might fail to notice that the flags have just been cleared, and failed
5552 * to reset them, due to memory-ordering issues. But since this function
5553 * is only intended to be used in cases where failing to write out the
5554 * data would be harmless anyway, it doesn't really matter.
5555 */
5556 if ((pg_atomic_read_u32(&bufHdr->state) & (BM_DIRTY | BM_JUST_DIRTIED)) !=
5558 {
5560 bool dirtied = false;
5561 bool delayChkptFlags = false;
5562 uint32 buf_state;
5563
5564 /*
5565 * If we need to protect hint bit updates from torn writes, WAL-log a
5566 * full page image of the page. This full page image is only necessary
5567 * if the hint bit update is the first change to the page since the
5568 * last checkpoint.
5569 *
5570 * We don't check full_page_writes here because that logic is included
5571 * when we call XLogInsert() since the value changes dynamically.
5572 */
5573 if (XLogHintBitIsNeeded() &&
5575 {
5576 /*
5577 * If we must not write WAL, due to a relfilelocator-specific
5578 * condition or being in recovery, don't dirty the page. We can
5579 * set the hint, just not dirty the page as a result so the hint
5580 * is lost when we evict the page or shutdown.
5581 *
5582 * See src/backend/storage/page/README for longer discussion.
5583 */
5584 if (RecoveryInProgress() ||
5586 return;
5587
5588 /*
5589 * If the block is already dirty because we either made a change
5590 * or set a hint already, then we don't need to write a full page
5591 * image. Note that aggressive cleaning of blocks dirtied by hint
5592 * bit setting would increase the call rate. Bulk setting of hint
5593 * bits would reduce the call rate...
5594 *
5595 * We must issue the WAL record before we mark the buffer dirty.
5596 * Otherwise we might write the page before we write the WAL. That
5597 * causes a race condition, since a checkpoint might occur between
5598 * writing the WAL record and marking the buffer dirty. We solve
5599 * that with a kluge, but one that is already in use during
5600 * transaction commit to prevent race conditions. Basically, we
5601 * simply prevent the checkpoint WAL record from being written
5602 * until we have marked the buffer dirty. We don't start the
5603 * checkpoint flush until we have marked dirty, so our checkpoint
5604 * must flush the change to disk successfully or the checkpoint
5605 * never gets written, so crash recovery will fix.
5606 *
5607 * It's possible we may enter here without an xid, so it is
5608 * essential that CreateCheckPoint waits for virtual transactions
5609 * rather than full transactionids.
5610 */
5613 delayChkptFlags = true;
5614 lsn = XLogSaveBufferForHint(buffer, buffer_std);
5615 }
5616
5617 buf_state = LockBufHdr(bufHdr);
5618
5619 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
5620
5621 if (!(buf_state & BM_DIRTY))
5622 {
5623 dirtied = true; /* Means "will be dirtied by this action" */
5624
5625 /*
5626 * Set the page LSN if we wrote a backup block. We aren't supposed
5627 * to set this when only holding a share lock but as long as we
5628 * serialise it somehow we're OK. We choose to set LSN while
5629 * holding the buffer header lock, which causes any reader of an
5630 * LSN who holds only a share lock to also obtain a buffer header
5631 * lock before using PageGetLSN(), which is enforced in
5632 * BufferGetLSNAtomic().
5633 *
5634 * If checksums are enabled, you might think we should reset the
5635 * checksum here. That will happen when the page is written
5636 * sometime later in this checkpoint cycle.
5637 */
5638 if (XLogRecPtrIsValid(lsn))
5639 PageSetLSN(page, lsn);
5640 }
5641
5642 UnlockBufHdrExt(bufHdr, buf_state,
5644 0, 0);
5645
5646 if (delayChkptFlags)
5647 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
5648
5649 if (dirtied)
5650 {
5652 if (VacuumCostActive)
5654 }
5655 }
5656}
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
#define DELAY_CHKPT_START
Definition: proc.h:135
bool RelFileLocatorSkippingWAL(RelFileLocator rlocator)
Definition: storage.c:573
int delayChkptFlags
Definition: proc.h:257
bool RecoveryInProgress(void)
Definition: xlog.c:6404
#define XLogRecPtrIsValid(r)
Definition: xlogdefs.h:29
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
Definition: xloginsert.c:1087

References Assert(), BM_DIRTY, BM_JUST_DIRTIED, BM_PERMANENT, BUF_STATE_GET_REFCOUNT, PrivateRefCountEntry::buffer, BufferGetPage(), BufferIsLocal, BufferIsLockedByMe(), BufferIsValid(), BufTagGetRelFileLocator(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, ERROR, GetBufferDescriptor(), GetPrivateRefCount(), InvalidXLogRecPtr, LockBufHdr(), MarkLocalBufferDirty(), MyProc, PageSetLSN(), pg_atomic_read_u32(), pgBufferUsage, RecoveryInProgress(), RelFileLocatorSkippingWAL(), BufferUsage::shared_blks_dirtied, BufferDesc::state, BufferDesc::tag, UnlockBufHdrExt(), VacuumCostActive, VacuumCostBalance, VacuumCostPageDirty, XLogHintBitIsNeeded, XLogRecPtrIsValid, and XLogSaveBufferForHint().

Referenced by _bt_check_unique(), _bt_killitems(), _hash_kill_items(), brin_start_evacuating_page(), btvacuumpage(), fsm_search(), fsm_search_avail(), fsm_set_and_search(), fsm_vacuum_page(), gistkillitems(), heap_page_prune_and_freeze(), read_seq_tuple(), SetHintBits(), and XLogRecordPageWithFreeSpace().

◆ MarkDirtyAllUnpinnedBuffers()

void MarkDirtyAllUnpinnedBuffers ( int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 7028 of file bufmgr.c.

7031{
7032 *buffers_dirtied = 0;
7033 *buffers_already_dirty = 0;
7034 *buffers_skipped = 0;
7035
7036 for (int buf = 1; buf <= NBuffers; buf++)
7037 {
7038 BufferDesc *desc = GetBufferDescriptor(buf - 1);
7039 uint32 buf_state;
7040 bool buffer_already_dirty;
7041
7043
7044 buf_state = pg_atomic_read_u32(&desc->state);
7045 if (!(buf_state & BM_VALID))
7046 continue;
7047
7050
7051 LockBufHdr(desc);
7052
7053 if (MarkDirtyUnpinnedBufferInternal(buf, desc, &buffer_already_dirty))
7054 (*buffers_dirtied)++;
7055 else if (buffer_already_dirty)
7056 (*buffers_already_dirty)++;
7057 else
7058 (*buffers_skipped)++;
7059 }
7060}
static bool MarkDirtyUnpinnedBufferInternal(Buffer buf, BufferDesc *desc, bool *buffer_already_dirty)
Definition: bufmgr.c:6879

References BM_VALID, buf, CHECK_FOR_INTERRUPTS, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u32(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), and BufferDesc::state.

Referenced by pg_buffercache_mark_dirty_all().

◆ MarkDirtyRelUnpinnedBuffers()

void MarkDirtyRelUnpinnedBuffers ( Relation  rel,
int32 buffers_dirtied,
int32 buffers_already_dirty,
int32 buffers_skipped 
)

Definition at line 6971 of file bufmgr.c.

6975{
6977
6978 *buffers_dirtied = 0;
6979 *buffers_already_dirty = 0;
6980 *buffers_skipped = 0;
6981
6982 for (int buf = 1; buf <= NBuffers; buf++)
6983 {
6984 BufferDesc *desc = GetBufferDescriptor(buf - 1);
6985 uint32 buf_state = pg_atomic_read_u32(&(desc->state));
6986 bool buffer_already_dirty;
6987
6989
6990 /* An unlocked precheck should be safe and saves some cycles. */
6991 if ((buf_state & BM_VALID) == 0 ||
6993 continue;
6994
6995 /* Make sure we can pin the buffer. */
6998
6999 buf_state = LockBufHdr(desc);
7000
7001 /* recheck, could have changed without the lock */
7002 if ((buf_state & BM_VALID) == 0 ||
7004 {
7005 UnlockBufHdr(desc);
7006 continue;
7007 }
7008
7009 if (MarkDirtyUnpinnedBufferInternal(buf, desc, &buffer_already_dirty))
7010 (*buffers_dirtied)++;
7011 else if (buffer_already_dirty)
7012 (*buffers_already_dirty)++;
7013 else
7014 (*buffers_skipped)++;
7015 }
7016}

References Assert(), BM_VALID, buf, BufTagMatchesRelFileLocator(), CHECK_FOR_INTERRUPTS, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), NBuffers, pg_atomic_read_u32(), RelationData::rd_locator, RelationUsesLocalBuffers, ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferDesc::state, BufferDesc::tag, and UnlockBufHdr().

Referenced by pg_buffercache_mark_dirty_relation().

◆ MarkDirtyUnpinnedBuffer()

bool MarkDirtyUnpinnedBuffer ( Buffer  buf,
bool *  buffer_already_dirty 
)

Definition at line 6935 of file bufmgr.c.

6936{
6937 BufferDesc *desc;
6938 bool buffer_dirtied = false;
6939
6941
6942 /* Make sure we can pin the buffer. */
6945
6946 desc = GetBufferDescriptor(buf - 1);
6947 LockBufHdr(desc);
6948
6949 buffer_dirtied = MarkDirtyUnpinnedBufferInternal(buf, desc, buffer_already_dirty);
6950 /* Both can not be true at the same time */
6951 Assert(!(buffer_dirtied && *buffer_already_dirty));
6952
6953 return buffer_dirtied;
6954}

References Assert(), buf, BufferIsLocal, CurrentResourceOwner, GetBufferDescriptor(), LockBufHdr(), MarkDirtyUnpinnedBufferInternal(), ReservePrivateRefCountEntry(), and ResourceOwnerEnlarge().

Referenced by pg_buffercache_mark_dirty().

◆ MarkDirtyUnpinnedBufferInternal()

static bool MarkDirtyUnpinnedBufferInternal ( Buffer  buf,
BufferDesc desc,
bool *  buffer_already_dirty 
)
static

Definition at line 6879 of file bufmgr.c.

6881{
6882 uint32 buf_state;
6883 bool result = false;
6884
6885 *buffer_already_dirty = false;
6886
6887 buf_state = pg_atomic_read_u32(&(desc->state));
6888 Assert(buf_state & BM_LOCKED);
6889
6890 if ((buf_state & BM_VALID) == 0)
6891 {
6892 UnlockBufHdr(desc);
6893 return false;
6894 }
6895
6896 /* Check that it's not pinned already. */
6897 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
6898 {
6899 UnlockBufHdr(desc);
6900 return false;
6901 }
6902
6903 /* Pin the buffer and then release the buffer spinlock */
6904 PinBuffer_Locked(desc);
6905
6906 /* If it was not already dirty, mark it as dirty. */
6907 if (!(buf_state & BM_DIRTY))
6908 {
6911 result = true;
6913 }
6914 else
6915 *buffer_already_dirty = true;
6916
6917 UnpinBuffer(desc);
6918
6919 return result;
6920}
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:3037

References Assert(), BM_DIRTY, BM_LOCKED, BM_VALID, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetContentLock(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MarkBufferDirty(), pg_atomic_read_u32(), PinBuffer_Locked(), BufferDesc::state, UnlockBufHdr(), and UnpinBuffer().

Referenced by MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), and MarkDirtyUnpinnedBuffer().

◆ NewPrivateRefCountEntry()

static PrivateRefCountEntry * NewPrivateRefCountEntry ( Buffer  buffer)
static

Definition at line 363 of file bufmgr.c.

364{
366
367 /* only allowed to be called when a reservation has been made */
369
370 /* use up the reserved entry */
372
373 /* and fill it */
375 res->buffer = buffer;
376 res->data.refcount = 0;
377
378 /* update cache for the next lookup */
380
382
383 return res;
384}

References Assert(), PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountEntryLast, PrivateRefCountData::refcount, and ReservedRefCountSlot.

Referenced by TrackNewBufferPin().

◆ PinBuffer()

static bool PinBuffer ( BufferDesc buf,
BufferAccessStrategy  strategy,
bool  skip_if_not_valid 
)
static

Definition at line 3162 of file bufmgr.c.

3164{
3166 bool result;
3168
3171
3172 ref = GetPrivateRefCountEntry(b, true);
3173
3174 if (ref == NULL)
3175 {
3176 uint32 buf_state;
3177 uint32 old_buf_state;
3178
3179 old_buf_state = pg_atomic_read_u32(&buf->state);
3180 for (;;)
3181 {
3182 if (unlikely(skip_if_not_valid && !(old_buf_state & BM_VALID)))
3183 return false;
3184
3185 /*
3186 * We're not allowed to increase the refcount while the buffer
3187 * header spinlock is held. Wait for the lock to be released.
3188 */
3189 if (old_buf_state & BM_LOCKED)
3190 old_buf_state = WaitBufHdrUnlocked(buf);
3191
3192 buf_state = old_buf_state;
3193
3194 /* increase refcount */
3195 buf_state += BUF_REFCOUNT_ONE;
3196
3197 if (strategy == NULL)
3198 {
3199 /* Default case: increase usagecount unless already max. */
3201 buf_state += BUF_USAGECOUNT_ONE;
3202 }
3203 else
3204 {
3205 /*
3206 * Ring buffers shouldn't evict others from pool. Thus we
3207 * don't make usagecount more than 1.
3208 */
3209 if (BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
3210 buf_state += BUF_USAGECOUNT_ONE;
3211 }
3212
3213 if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
3214 buf_state))
3215 {
3216 result = (buf_state & BM_VALID) != 0;
3217
3219 break;
3220 }
3221 }
3222 }
3223 else
3224 {
3225 /*
3226 * If we previously pinned the buffer, it is likely to be valid, but
3227 * it may not be if StartReadBuffers() was called and
3228 * WaitReadBuffers() hasn't been called yet. We'll check by loading
3229 * the flags without locking. This is racy, but it's OK to return
3230 * false spuriously: when WaitReadBuffers() calls StartBufferIO(),
3231 * it'll see that it's now valid.
3232 *
3233 * Note: We deliberately avoid a Valgrind client request here.
3234 * Individual access methods can optionally superimpose buffer page
3235 * client requests on top of our client requests to enforce that
3236 * buffers are only accessed while locked (and pinned). It's possible
3237 * that the buffer page is legitimately non-accessible here. We
3238 * cannot meddle with that.
3239 */
3240 result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0;
3241
3242 Assert(ref->data.refcount > 0);
3243 ref->data.refcount++;
3245 }
3246
3247 return result;
3248}
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:86
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:60
void TrackNewBufferPin(Buffer buf)
Definition: bufmgr.c:3397

References Assert(), b, BM_LOCKED, BM_MAX_USAGE_COUNT, BM_VALID, buf, BUF_REFCOUNT_ONE, BUF_STATE_GET_USAGECOUNT, BUF_USAGECOUNT_ONE, BufferDescriptorGetBuffer(), BufferIsLocal, CurrentResourceOwner, PrivateRefCountEntry::data, GetPrivateRefCountEntry(), pg_atomic_compare_exchange_u32(), pg_atomic_read_u32(), PrivateRefCountData::refcount, ReservedRefCountSlot, ResourceOwnerRememberBuffer(), TrackNewBufferPin(), unlikely, and WaitBufHdrUnlocked().

Referenced by BufferAlloc(), ExtendBufferedRelShared(), and ReadRecentBuffer().

◆ PinBuffer_Locked()

static void PinBuffer_Locked ( BufferDesc buf)
static

Definition at line 3273 of file bufmgr.c.

3274{
3275 uint32 old_buf_state;
3276
3277 /*
3278 * As explained, We don't expect any preexisting pins. That allows us to
3279 * manipulate the PrivateRefCount after releasing the spinlock
3280 */
3282
3283 /*
3284 * Since we hold the buffer spinlock, we can update the buffer state and
3285 * release the lock in one operation.
3286 */
3287 old_buf_state = pg_atomic_read_u32(&buf->state);
3288
3289 UnlockBufHdrExt(buf, old_buf_state,
3290 0, 0, 1);
3291
3293}

References Assert(), buf, BufferDescriptorGetBuffer(), GetPrivateRefCountEntry(), pg_atomic_read_u32(), TrackNewBufferPin(), and UnlockBufHdrExt().

Referenced by EvictUnpinnedBufferInternal(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), MarkDirtyUnpinnedBufferInternal(), and SyncOneBuffer().

◆ PinBufferForBlock()

static pg_attribute_always_inline Buffer PinBufferForBlock ( Relation  rel,
SMgrRelation  smgr,
char  persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
BufferAccessStrategy  strategy,
bool *  foundPtr 
)
static

Definition at line 1185 of file bufmgr.c.

1192{
1193 BufferDesc *bufHdr;
1194 IOContext io_context;
1195 IOObject io_object;
1196
1197 Assert(blockNum != P_NEW);
1198
1199 /* Persistence should be set before */
1200 Assert((persistence == RELPERSISTENCE_TEMP ||
1201 persistence == RELPERSISTENCE_PERMANENT ||
1202 persistence == RELPERSISTENCE_UNLOGGED));
1203
1204 if (persistence == RELPERSISTENCE_TEMP)
1205 {
1206 io_context = IOCONTEXT_NORMAL;
1207 io_object = IOOBJECT_TEMP_RELATION;
1208 }
1209 else
1210 {
1211 io_context = IOContextForStrategy(strategy);
1212 io_object = IOOBJECT_RELATION;
1213 }
1214
1215 TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
1219 smgr->smgr_rlocator.backend);
1220
1221 if (persistence == RELPERSISTENCE_TEMP)
1222 {
1223 bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr);
1224 if (*foundPtr)
1226 }
1227 else
1228 {
1229 bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum,
1230 strategy, foundPtr, io_context);
1231 if (*foundPtr)
1233 }
1234 if (rel)
1235 {
1236 /*
1237 * While pgBufferUsage's "read" counter isn't bumped unless we reach
1238 * WaitReadBuffers() (so, not for hits, and not for buffers that are
1239 * zeroed instead), the per-relation stats always count them.
1240 */
1242 if (*foundPtr)
1244 }
1245 if (*foundPtr)
1246 {
1247 pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0);
1248 if (VacuumCostActive)
1250
1251 TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
1255 smgr->smgr_rlocator.backend,
1256 true);
1257 }
1258
1259 return BufferDescriptorGetBuffer(bufHdr);
1260}
static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr, IOContext io_context)
Definition: bufmgr.c:2075
#define P_NEW
Definition: bufmgr.h:198
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
Definition: localbuf.c:119
#define pgstat_count_buffer_read(rel)
Definition: pgstat.h:715

References Assert(), RelFileLocatorBackend::backend, BufferAlloc(), BufferDescriptorGetBuffer(), RelFileLocator::dbOid, IOCONTEXT_NORMAL, IOContextForStrategy(), IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_HIT, BufferUsage::local_blks_hit, LocalBufferAlloc(), RelFileLocatorBackend::locator, P_NEW, pgBufferUsage, pgstat_count_buffer_hit, pgstat_count_buffer_read, pgstat_count_io_op(), RelFileLocator::relNumber, BufferUsage::shared_blks_hit, SMgrRelationData::smgr_rlocator, RelFileLocator::spcOid, VacuumCostActive, VacuumCostBalance, and VacuumCostPageHit.

Referenced by ReadBuffer_common(), and StartReadBuffersImpl().

◆ PrefetchBuffer()

PrefetchBufferResult PrefetchBuffer ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 747 of file bufmgr.c.

748{
749 Assert(RelationIsValid(reln));
750 Assert(BlockNumberIsValid(blockNum));
751
752 if (RelationUsesLocalBuffers(reln))
753 {
754 /* see comments in ReadBufferExtended */
755 if (RELATION_IS_OTHER_TEMP(reln))
757 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
758 errmsg("cannot access temporary tables of other sessions")));
759
760 /* pass it off to localbuf.c */
761 return PrefetchLocalBuffer(RelationGetSmgr(reln), forkNum, blockNum);
762 }
763 else
764 {
765 /* pass it to the shared buffer version */
766 return PrefetchSharedBuffer(RelationGetSmgr(reln), forkNum, blockNum);
767 }
768}
PrefetchBufferResult PrefetchSharedBuffer(SMgrRelation smgr_reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:657
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
Definition: localbuf.c:72
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:668
#define RelationIsValid(relation)
Definition: rel.h:490

References Assert(), BlockNumberIsValid(), ereport, errcode(), errmsg(), ERROR, PrefetchLocalBuffer(), PrefetchSharedBuffer(), RELATION_IS_OTHER_TEMP, RelationGetSmgr(), RelationIsValid, and RelationUsesLocalBuffers.

Referenced by count_nondeletable_pages(), invalidate_rel_block(), and pg_prewarm().

◆ PrefetchSharedBuffer()

PrefetchBufferResult PrefetchSharedBuffer ( SMgrRelation  smgr_reln,
ForkNumber  forkNum,
BlockNumber  blockNum 
)

Definition at line 657 of file bufmgr.c.

660{
661 PrefetchBufferResult result = {InvalidBuffer, false};
662 BufferTag newTag; /* identity of requested block */
663 uint32 newHash; /* hash value for newTag */
664 LWLock *newPartitionLock; /* buffer partition lock for it */
665 int buf_id;
666
667 Assert(BlockNumberIsValid(blockNum));
668
669 /* create a tag so we can lookup the buffer */
670 InitBufferTag(&newTag, &smgr_reln->smgr_rlocator.locator,
671 forkNum, blockNum);
672
673 /* determine its hash code and partition lock ID */
674 newHash = BufTableHashCode(&newTag);
675 newPartitionLock = BufMappingPartitionLock(newHash);
676
677 /* see if the block is in the buffer pool already */
678 LWLockAcquire(newPartitionLock, LW_SHARED);
679 buf_id = BufTableLookup(&newTag, newHash);
680 LWLockRelease(newPartitionLock);
681
682 /* If not in buffers, initiate prefetch */
683 if (buf_id < 0)
684 {
685#ifdef USE_PREFETCH
686 /*
687 * Try to initiate an asynchronous read. This returns false in
688 * recovery if the relation file doesn't exist.
689 */
690 if ((io_direct_flags & IO_DIRECT_DATA) == 0 &&
691 smgrprefetch(smgr_reln, forkNum, blockNum, 1))
692 {
693 result.initiated_io = true;
694 }
695#endif /* USE_PREFETCH */
696 }
697 else
698 {
699 /*
700 * Report the buffer it was in at that time. The caller may be able
701 * to avoid a buffer table lookup, but it's not pinned and it must be
702 * rechecked!
703 */
704 result.recent_buffer = buf_id + 1;
705 }
706
707 /*
708 * If the block *is* in buffers, we do nothing. This is not really ideal:
709 * the block might be just about to be evicted, which would be stupid
710 * since we know we are going to need it soon. But the only easy answer
711 * is to bump the usage_count, which does not seem like a great solution:
712 * when the caller does ultimately touch the block, usage_count would get
713 * bumped again, resulting in too much favoritism for blocks that are
714 * involved in a prefetch sequence. A real fix would involve some
715 * additional per-buffer state, and it's not clear that there's enough of
716 * a problem to justify that.
717 */
718
719 return result;
720}
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_DATA
Definition: fd.h:54
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
Definition: smgr.c:678
Buffer recent_buffer
Definition: bufmgr.h:61

References Assert(), BlockNumberIsValid(), BufMappingPartitionLock(), BufTableHashCode(), BufTableLookup(), InitBufferTag(), PrefetchBufferResult::initiated_io, InvalidBuffer, IO_DIRECT_DATA, io_direct_flags, RelFileLocatorBackend::locator, LW_SHARED, LWLockAcquire(), LWLockRelease(), PrefetchBufferResult::recent_buffer, SMgrRelationData::smgr_rlocator, and smgrprefetch().

Referenced by PrefetchBuffer(), and XLogPrefetcherNextBlock().

◆ ProcessReadBuffersResult()

static void ProcessReadBuffersResult ( ReadBuffersOperation operation)
static

Definition at line 1668 of file bufmgr.c.

1669{
1670 PgAioReturn *aio_ret = &operation->io_return;
1671 PgAioResultStatus rs = aio_ret->result.status;
1672 int newly_read_blocks = 0;
1673
1674 Assert(pgaio_wref_valid(&operation->io_wref));
1675 Assert(aio_ret->result.status != PGAIO_RS_UNKNOWN);
1676
1677 /*
1678 * SMGR reports the number of blocks successfully read as the result of
1679 * the IO operation. Thus we can simply add that to ->nblocks_done.
1680 */
1681
1682 if (likely(rs != PGAIO_RS_ERROR))
1683 newly_read_blocks = aio_ret->result.result;
1684
1685 if (rs == PGAIO_RS_ERROR || rs == PGAIO_RS_WARNING)
1686 pgaio_result_report(aio_ret->result, &aio_ret->target_data,
1687 rs == PGAIO_RS_ERROR ? ERROR : WARNING);
1688 else if (aio_ret->result.status == PGAIO_RS_PARTIAL)
1689 {
1690 /*
1691 * We'll retry, so we just emit a debug message to the server log (or
1692 * not even that in prod scenarios).
1693 */
1694 pgaio_result_report(aio_ret->result, &aio_ret->target_data, DEBUG1);
1695 elog(DEBUG3, "partial read, will retry");
1696 }
1697
1698 Assert(newly_read_blocks > 0);
1699 Assert(newly_read_blocks <= MAX_IO_COMBINE_LIMIT);
1700
1701 operation->nblocks_done += newly_read_blocks;
1702
1703 Assert(operation->nblocks_done <= operation->nblocks);
1704}
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition: aio.c:971
PgAioResultStatus
Definition: aio_types.h:79
@ PGAIO_RS_UNKNOWN
Definition: aio_types.h:80
@ PGAIO_RS_PARTIAL
Definition: aio_types.h:82
#define DEBUG3
Definition: elog.h:28
PgAioResult result
Definition: aio_types.h:132
PgAioTargetData target_data
Definition: aio_types.h:133

References Assert(), DEBUG1, DEBUG3, elog, ERROR, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, likely, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, pgaio_result_report(), PGAIO_RS_ERROR, PGAIO_RS_PARTIAL, PGAIO_RS_UNKNOWN, PGAIO_RS_WARNING, pgaio_wref_valid(), PgAioResult::result, PgAioReturn::result, PgAioResult::status, PgAioReturn::target_data, and WARNING.

Referenced by WaitReadBuffers().

◆ ReadBuffer()

Buffer ReadBuffer ( Relation  reln,
BlockNumber  blockNum 
)

Definition at line 839 of file bufmgr.c.

840{
841 return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
842}
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:886
@ RBM_NORMAL
Definition: bufmgr.h:46

References MAIN_FORKNUM, RBM_NORMAL, and ReadBufferExtended().

Referenced by _bt_allocbuf(), _bt_getbuf(), _bt_search_insert(), _hash_getbuf(), _hash_getbuf_with_condlock_cleanup(), blbulkdelete(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brinGetStats(), brinGetTupleForHeapBlock(), brinRevmapDesummarizeRange(), brinRevmapInitialize(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), ginFindLeafPage(), ginFindParents(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), GinNewBuffer(), ginStepRight(), ginUpdateStats(), gistBufferingFindCorrectParent(), gistbufferinginserttuples(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistfixsplit(), gistGetMaxLevel(), gistkillitems(), gistNewBuffer(), gistProcessItup(), gistScanPage(), heap_abort_speculative(), heap_delete(), heap_fetch(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_lock_tuple(), heap_update(), initBloomState(), pg_visibility(), pgstatginindex_internal(), read_seq_tuple(), RelationGetBufferForTuple(), ReleaseAndReadBuffer(), revmap_get_buffer(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), shiftList(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), and spgWalk().

◆ ReadBuffer_common()

static pg_attribute_always_inline Buffer ReadBuffer_common ( Relation  rel,
SMgrRelation  smgr,
char  smgr_persistence,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
static

Definition at line 1268 of file bufmgr.c.

1272{
1273 ReadBuffersOperation operation;
1274 Buffer buffer;
1275 int flags;
1276 char persistence;
1277
1278 /*
1279 * Backward compatibility path, most code should use ExtendBufferedRel()
1280 * instead, as acquiring the extension lock inside ExtendBufferedRel()
1281 * scales a lot better.
1282 */
1283 if (unlikely(blockNum == P_NEW))
1284 {
1286
1287 /*
1288 * Since no-one else can be looking at the page contents yet, there is
1289 * no difference between an exclusive lock and a cleanup-strength
1290 * lock.
1291 */
1293 flags |= EB_LOCK_FIRST;
1294
1295 return ExtendBufferedRel(BMR_REL(rel), forkNum, strategy, flags);
1296 }
1297
1298 if (rel)
1299 persistence = rel->rd_rel->relpersistence;
1300 else
1301 persistence = smgr_persistence;
1302
1305 {
1306 bool found;
1307
1308 buffer = PinBufferForBlock(rel, smgr, persistence,
1309 forkNum, blockNum, strategy, &found);
1310 ZeroAndLockBuffer(buffer, mode, found);
1311 return buffer;
1312 }
1313
1314 /*
1315 * Signal that we are going to immediately wait. If we're immediately
1316 * waiting, there is no benefit in actually executing the IO
1317 * asynchronously, it would just add dispatch overhead.
1318 */
1320 if (mode == RBM_ZERO_ON_ERROR)
1322 operation.smgr = smgr;
1323 operation.rel = rel;
1324 operation.persistence = persistence;
1325 operation.forknum = forkNum;
1326 operation.strategy = strategy;
1327 if (StartReadBuffer(&operation,
1328 &buffer,
1329 blockNum,
1330 flags))
1331 WaitReadBuffers(&operation);
1332
1333 return buffer;
1334}
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:939
static void ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
Definition: bufmgr.c:1106
static pg_attribute_always_inline Buffer PinBufferForBlock(Relation rel, SMgrRelation smgr, char persistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr)
Definition: bufmgr.c:1185
void WaitReadBuffers(ReadBuffersOperation *operation)
Definition: bufmgr.c:1707
bool StartReadBuffer(ReadBuffersOperation *operation, Buffer *buffer, BlockNumber blocknum, int flags)
Definition: bufmgr.c:1583
@ RBM_ZERO_ON_ERROR
Definition: bufmgr.h:51
#define BMR_REL(p_rel)
Definition: bufmgr.h:114

References BMR_REL, PrivateRefCountEntry::buffer, EB_LOCK_FIRST, EB_SKIP_EXTENSION_LOCK, ExtendBufferedRel(), ReadBuffersOperation::forknum, mode, P_NEW, ReadBuffersOperation::persistence, PinBufferForBlock(), RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, RBM_ZERO_ON_ERROR, RelationData::rd_rel, READ_BUFFERS_SYNCHRONOUSLY, READ_BUFFERS_ZERO_ON_ERROR, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, StartReadBuffer(), ReadBuffersOperation::strategy, unlikely, WaitReadBuffers(), and ZeroAndLockBuffer().

Referenced by ExtendBufferedRelTo(), ReadBufferExtended(), and ReadBufferWithoutRelcache().

◆ ReadBufferExtended()

Buffer ReadBufferExtended ( Relation  reln,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy 
)
inline

Definition at line 886 of file bufmgr.c.

888{
889 Buffer buf;
890
891 /*
892 * Reject attempts to read non-local temporary relations; we would be
893 * likely to get wrong data since we have no visibility into the owning
894 * session's local buffers.
895 */
896 if (RELATION_IS_OTHER_TEMP(reln))
898 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
899 errmsg("cannot access temporary tables of other sessions")));
900
901 /*
902 * Read the buffer, and update pgstat counters to reflect a cache hit or
903 * miss.
904 */
905 buf = ReadBuffer_common(reln, RelationGetSmgr(reln), 0,
906 forkNum, blockNum, mode, strategy);
907
908 return buf;
909}

References buf, ereport, errcode(), errmsg(), ERROR, mode, ReadBuffer_common(), RELATION_IS_OTHER_TEMP, and RelationGetSmgr().

Referenced by _hash_getbuf_with_strategy(), _hash_getinitbuf(), _hash_getnewbuf(), blbulkdelete(), blgetbitmap(), BloomInitMetapage(), blvacuumcleanup(), bt_recheck_sibling_links(), btvacuumpage(), count_nondeletable_pages(), create_toy_buffer(), fsm_readbuf(), get_raw_page_internal(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), gin_refind_parent(), ginbulkdelete(), ginDeletePage(), ginScanToDelete(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hashbulkdelete(), heapam_scan_sample_next_block(), log_newpage_range(), modify_rel_block(), palloc_btree_page(), pgstat_btree_page(), pgstat_gist_page(), pgstat_hash_page(), pgstat_heap(), pgstathashindex(), pgstatindex_impl(), ReadBuffer(), ReadBufferBI(), spgprocesspending(), statapprox_heap(), and vm_readbuf().

◆ ReadBuffersCanStartIO()

static bool ReadBuffersCanStartIO ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1639 of file bufmgr.c.

1640{
1641 /*
1642 * If this backend currently has staged IO, we need to submit the pending
1643 * IO before waiting for the right to issue IO, to avoid the potential for
1644 * deadlocks (and, more commonly, unnecessary delays for other backends).
1645 */
1646 if (!nowait && pgaio_have_staged())
1647 {
1648 if (ReadBuffersCanStartIOOnce(buffer, true))
1649 return true;
1650
1651 /*
1652 * Unfortunately StartBufferIO() returning false doesn't allow to
1653 * distinguish between the buffer already being valid and IO already
1654 * being in progress. Since IO already being in progress is quite
1655 * rare, this approach seems fine.
1656 */
1658 }
1659
1660 return ReadBuffersCanStartIOOnce(buffer, nowait);
1661}
bool pgaio_have_staged(void)
Definition: aio.c:1107
static bool ReadBuffersCanStartIOOnce(Buffer buffer, bool nowait)
Definition: bufmgr.c:1626

References PrivateRefCountEntry::buffer, pgaio_have_staged(), pgaio_submit_staged(), and ReadBuffersCanStartIOOnce().

Referenced by AsyncReadBuffers().

◆ ReadBuffersCanStartIOOnce()

static bool ReadBuffersCanStartIOOnce ( Buffer  buffer,
bool  nowait 
)
inlinestatic

Definition at line 1626 of file bufmgr.c.

1627{
1628 if (BufferIsLocal(buffer))
1629 return StartLocalBufferIO(GetLocalBufferDescriptor(-buffer - 1),
1630 true, nowait);
1631 else
1632 return StartBufferIO(GetBufferDescriptor(buffer - 1), true, nowait);
1633}
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
Definition: localbuf.c:523

References PrivateRefCountEntry::buffer, BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), StartBufferIO(), and StartLocalBufferIO().

Referenced by ReadBuffersCanStartIO().

◆ ReadBufferWithoutRelcache()

Buffer ReadBufferWithoutRelcache ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
ReadBufferMode  mode,
BufferAccessStrategy  strategy,
bool  permanent 
)

Definition at line 923 of file bufmgr.c.

926{
927 SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
928
929 return ReadBuffer_common(NULL, smgr,
930 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
931 forkNum, blockNum,
932 mode, strategy);
933}

References INVALID_PROC_NUMBER, mode, ReadBuffer_common(), and smgropen().

Referenced by RelationCopyStorageUsingBuffer(), ScanSourceDatabasePgClass(), and XLogReadBufferExtended().

◆ ReadRecentBuffer()

bool ReadRecentBuffer ( RelFileLocator  rlocator,
ForkNumber  forkNum,
BlockNumber  blockNum,
Buffer  recent_buffer 
)

Definition at line 778 of file bufmgr.c.

780{
781 BufferDesc *bufHdr;
782 BufferTag tag;
783 uint32 buf_state;
784
785 Assert(BufferIsValid(recent_buffer));
786
789 InitBufferTag(&tag, &rlocator, forkNum, blockNum);
790
791 if (BufferIsLocal(recent_buffer))
792 {
793 int b = -recent_buffer - 1;
794
795 bufHdr = GetLocalBufferDescriptor(b);
796 buf_state = pg_atomic_read_u32(&bufHdr->state);
797
798 /* Is it still valid and holding the right tag? */
799 if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag))
800 {
801 PinLocalBuffer(bufHdr, true);
802
804
805 return true;
806 }
807 }
808 else
809 {
810 bufHdr = GetBufferDescriptor(recent_buffer - 1);
811
812 /*
813 * Is it still valid and holding the right tag? We do an unlocked tag
814 * comparison first, to make it unlikely that we'll increment the
815 * usage counter of the wrong buffer, if someone calls us with a very
816 * out of date recent_buffer. Then we'll check it again if we get the
817 * pin.
818 */
819 if (BufferTagsEqual(&tag, &bufHdr->tag) &&
820 PinBuffer(bufHdr, NULL, true))
821 {
822 if (BufferTagsEqual(&tag, &bufHdr->tag))
823 {
825 return true;
826 }
827 UnpinBuffer(bufHdr);
828 }
829 }
830
831 return false;
832}

References Assert(), b, BM_VALID, BufferIsLocal, BufferIsValid(), BufferTagsEqual(), CurrentResourceOwner, GetBufferDescriptor(), GetLocalBufferDescriptor(), InitBufferTag(), BufferUsage::local_blks_hit, pg_atomic_read_u32(), pgBufferUsage, PinBuffer(), PinLocalBuffer(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), BufferUsage::shared_blks_hit, BufferDesc::state, BufferDesc::tag, and UnpinBuffer().

Referenced by invalidate_rel_block(), and XLogReadBufferExtended().

◆ RelationCopyStorageUsingBuffer()

static void RelationCopyStorageUsingBuffer ( RelFileLocator  srclocator,
RelFileLocator  dstlocator,
ForkNumber  forkNum,
bool  permanent 
)
static

Definition at line 5223 of file bufmgr.c.

5226{
5227 Buffer srcBuf;
5228 Buffer dstBuf;
5229 Page srcPage;
5230 Page dstPage;
5231 bool use_wal;
5232 BlockNumber nblocks;
5233 BlockNumber blkno;
5235 BufferAccessStrategy bstrategy_src;
5236 BufferAccessStrategy bstrategy_dst;
5238 ReadStream *src_stream;
5239 SMgrRelation src_smgr;
5240
5241 /*
5242 * In general, we want to write WAL whenever wal_level > 'minimal', but we
5243 * can skip it when copying any fork of an unlogged relation other than
5244 * the init fork.
5245 */
5246 use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM);
5247
5248 /* Get number of blocks in the source relation. */
5249 nblocks = smgrnblocks(smgropen(srclocator, INVALID_PROC_NUMBER),
5250 forkNum);
5251
5252 /* Nothing to copy; just return. */
5253 if (nblocks == 0)
5254 return;
5255
5256 /*
5257 * Bulk extend the destination relation of the same size as the source
5258 * relation before starting to copy block by block.
5259 */
5260 memset(buf.data, 0, BLCKSZ);
5261 smgrextend(smgropen(dstlocator, INVALID_PROC_NUMBER), forkNum, nblocks - 1,
5262 buf.data, true);
5263
5264 /* This is a bulk operation, so use buffer access strategies. */
5265 bstrategy_src = GetAccessStrategy(BAS_BULKREAD);
5266 bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE);
5267
5268 /* Initialize streaming read */
5269 p.current_blocknum = 0;
5270 p.last_exclusive = nblocks;
5271 src_smgr = smgropen(srclocator, INVALID_PROC_NUMBER);
5272
5273 /*
5274 * It is safe to use batchmode as block_range_read_stream_cb takes no
5275 * locks.
5276 */
5279 bstrategy_src,
5280 src_smgr,
5281 permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
5282 forkNum,
5284 &p,
5285 0);
5286
5287 /* Iterate over each block of the source relation file. */
5288 for (blkno = 0; blkno < nblocks; blkno++)
5289 {
5291
5292 /* Read block from source relation. */
5293 srcBuf = read_stream_next_buffer(src_stream, NULL);
5295 srcPage = BufferGetPage(srcBuf);
5296
5297 dstBuf = ReadBufferWithoutRelcache(dstlocator, forkNum,
5298 BufferGetBlockNumber(srcBuf),
5299 RBM_ZERO_AND_LOCK, bstrategy_dst,
5300 permanent);
5301 dstPage = BufferGetPage(dstBuf);
5302
5304
5305 /* Copy page data from the source to the destination. */
5306 memcpy(dstPage, srcPage, BLCKSZ);
5307 MarkBufferDirty(dstBuf);
5308
5309 /* WAL-log the copied page. */
5310 if (use_wal)
5311 log_newpage_buffer(dstBuf, true);
5312
5314
5315 UnlockReleaseBuffer(dstBuf);
5316 UnlockReleaseBuffer(srcBuf);
5317 }
5318 Assert(read_stream_next_buffer(src_stream, NULL) == InvalidBuffer);
5319 read_stream_end(src_stream);
5320
5321 FreeAccessStrategy(bstrategy_src);
5322 FreeAccessStrategy(bstrategy_dst);
5323}
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5478
Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent)
Definition: bufmgr.c:923
@ BAS_BULKREAD
Definition: bufmgr.h:37
@ BAS_BULKWRITE
Definition: bufmgr.h:39
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:461
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:643
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
ReadStream * read_stream_begin_smgr_relation(int flags, BufferAccessStrategy strategy, SMgrRelation smgr, char smgr_persistence, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:761
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:791
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1089
BlockNumber block_range_read_stream_cb(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.c:162
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
#define READ_STREAM_FULL
Definition: read_stream.h:43
void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
Definition: smgr.c:620
#define XLogIsNeeded()
Definition: xlog.h:109
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1259

References Assert(), BAS_BULKREAD, BAS_BULKWRITE, block_range_read_stream_cb(), buf, BUFFER_LOCK_SHARE, BufferGetBlockNumber(), BufferGetPage(), CHECK_FOR_INTERRUPTS, BlockRangeReadStreamPrivate::current_blocknum, END_CRIT_SECTION, FreeAccessStrategy(), GetAccessStrategy(), INIT_FORKNUM, INVALID_PROC_NUMBER, InvalidBuffer, BlockRangeReadStreamPrivate::last_exclusive, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), RBM_ZERO_AND_LOCK, read_stream_begin_smgr_relation(), read_stream_end(), READ_STREAM_FULL, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, ReadBufferWithoutRelcache(), smgrextend(), smgrnblocks(), smgropen(), START_CRIT_SECTION, UnlockReleaseBuffer(), and XLogIsNeeded.

Referenced by CreateAndCopyRelationData().

◆ RelationGetNumberOfBlocksInFork()

BlockNumber RelationGetNumberOfBlocksInFork ( Relation  relation,
ForkNumber  forkNum 
)

Definition at line 4532 of file bufmgr.c.

4533{
4534 if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
4535 {
4536 /*
4537 * Not every table AM uses BLCKSZ wide fixed size blocks. Therefore
4538 * tableam returns the size in bytes - but for the purpose of this
4539 * routine, we want the number of blocks. Therefore divide, rounding
4540 * up.
4541 */
4542 uint64 szbytes;
4543
4544 szbytes = table_relation_size(relation, forkNum);
4545
4546 return (szbytes + (BLCKSZ - 1)) / BLCKSZ;
4547 }
4548 else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
4549 {
4550 return smgrnblocks(RelationGetSmgr(relation), forkNum);
4551 }
4552 else
4553 Assert(false);
4554
4555 return 0; /* keep compiler quiet */
4556}
static uint64 table_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.h:1847

References Assert(), RelationData::rd_rel, RelationGetSmgr(), smgrnblocks(), and table_relation_size().

Referenced by _hash_getnewbuf(), _hash_init(), autoprewarm_database_main(), get_raw_page_internal(), and pg_prewarm().

◆ ReleaseAndReadBuffer()

Buffer ReleaseAndReadBuffer ( Buffer  buffer,
Relation  relation,
BlockNumber  blockNum 
)

Definition at line 3102 of file bufmgr.c.

3105{
3106 ForkNumber forkNum = MAIN_FORKNUM;
3107 BufferDesc *bufHdr;
3108
3109 if (BufferIsValid(buffer))
3110 {
3111 Assert(BufferIsPinned(buffer));
3112 if (BufferIsLocal(buffer))
3113 {
3114 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
3115 if (bufHdr->tag.blockNum == blockNum &&
3116 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3117 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3118 return buffer;
3119 UnpinLocalBuffer(buffer);
3120 }
3121 else
3122 {
3123 bufHdr = GetBufferDescriptor(buffer - 1);
3124 /* we have pin, so it's ok to examine tag without spinlock */
3125 if (bufHdr->tag.blockNum == blockNum &&
3126 BufTagMatchesRelFileLocator(&bufHdr->tag, &relation->rd_locator) &&
3127 BufTagGetForkNum(&bufHdr->tag) == forkNum)
3128 return buffer;
3129 UnpinBuffer(bufHdr);
3130 }
3131 }
3132
3133 return ReadBuffer(relation, blockNum);
3134}
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:839

References Assert(), buftag::blockNum, PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsPinned, BufferIsValid(), BufTagGetForkNum(), BufTagMatchesRelFileLocator(), GetBufferDescriptor(), GetLocalBufferDescriptor(), MAIN_FORKNUM, RelationData::rd_locator, ReadBuffer(), BufferDesc::tag, UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_relandgetbuf(), ginFindLeafPage(), and heapam_index_fetch_tuple().

◆ ReleaseBuffer()

void ReleaseBuffer ( Buffer  buffer)

Definition at line 5461 of file bufmgr.c.

5462{
5463 if (!BufferIsValid(buffer))
5464 elog(ERROR, "bad buffer ID: %d", buffer);
5465
5466 if (BufferIsLocal(buffer))
5467 UnpinLocalBuffer(buffer);
5468 else
5469 UnpinBuffer(GetBufferDescriptor(buffer - 1));
5470}

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), elog, ERROR, GetBufferDescriptor(), UnpinBuffer(), and UnpinLocalBuffer().

Referenced by _bt_allocbuf(), _bt_pagedel(), _bt_relbuf(), _bt_search_insert(), _bt_unlink_halfdead_page(), _hash_dropbuf(), _hash_getbuf_with_condlock_cleanup(), autoprewarm_database_main(), BitmapHeapScanNextBlock(), blinsert(), BloomNewBuffer(), brin_getinsertbuffer(), brin_vacuum_scan(), bringetbitmap(), brinGetTupleForHeapBlock(), brininsert(), brinRevmapTerminate(), brinsummarize(), buffer_create_toy(), collect_corrupt_items(), collect_visibility_data(), entryLoadMoreItems(), ExecEndIndexOnlyScan(), ExtendBufferedRelTo(), FreeBulkInsertState(), freeGinBtreeStack(), fsm_search(), fsm_vacuum_page(), get_actual_variable_endpoint(), get_raw_page_internal(), GetRecordedFreeSpace(), gin_check_parent_keys_consistency(), gin_check_posting_tree_parent_keys_consistency(), ginDeletePage(), ginFindParents(), ginFinishSplit(), ginFreeScanKeys(), ginInsertCleanup(), GinNewBuffer(), ginScanToDelete(), gistdoinsert(), gistFindCorrectParent(), gistNewBuffer(), gistvacuum_delete_empty_pages(), grow_rel(), heap_abort_speculative(), heap_delete(), heap_endscan(), heap_fetch(), heap_fetch_next_buffer(), heap_force_common(), heap_insert(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_rescan(), heap_update(), heap_vac_scan_next_block(), heap_xlog_delete(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_update(), heap_xlog_visible(), heapam_index_fetch_reset(), heapam_scan_sample_next_block(), heapam_tuple_lock(), heapgettup(), heapgettup_pagemode(), invalidate_rel_block(), lazy_scan_heap(), lazy_vacuum_heap_rel(), modify_rel_block(), pg_prewarm(), pg_visibility(), pg_visibility_map(), pg_visibility_map_summary(), pgstatindex_impl(), read_rel_block_ll(), read_stream_reset(), ReadBufferBI(), RelationAddBlocks(), RelationGetBufferForTuple(), ReleaseBulkInsertStatePin(), revmap_get_buffer(), spgdoinsert(), SpGistGetBuffer(), SpGistNewBuffer(), SpGistUpdateMetaPage(), statapprox_heap(), summarize_range(), terminate_brin_buildstate(), tts_buffer_heap_clear(), tts_buffer_heap_materialize(), tts_buffer_heap_store_tuple(), UnlockReleaseBuffer(), verify_heapam(), visibilitymap_count(), visibilitymap_get_status(), visibilitymap_pin(), and XLogReadBufferExtended().

◆ ReservePrivateRefCountEntry()

static void ReservePrivateRefCountEntry ( void  )
static

Definition at line 284 of file bufmgr.c.

285{
286 /* Already reserved (or freed), nothing to do */
287 if (ReservedRefCountSlot != -1)
288 return;
289
290 /*
291 * First search for a free entry the array, that'll be sufficient in the
292 * majority of cases.
293 */
294 {
295 int i;
296
297 for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++)
298 {
300 {
302
303 /*
304 * We could return immediately, but iterating till the end of
305 * the array allows compiler-autovectorization.
306 */
307 }
308 }
309
310 if (ReservedRefCountSlot != -1)
311 return;
312 }
313
314 /*
315 * No luck. All array entries are full. Move one array entry into the hash
316 * table.
317 */
318 {
319 /*
320 * Move entry from the current clock position in the array into the
321 * hashtable. Use that slot.
322 */
323 int victim_slot;
324 PrivateRefCountEntry *victim_entry;
325 PrivateRefCountEntry *hashent;
326 bool found;
327
328 /* select victim slot */
330 victim_entry = &PrivateRefCountArray[victim_slot];
331 ReservedRefCountSlot = victim_slot;
332
333 /* Better be used, otherwise we shouldn't get here. */
335 Assert(PrivateRefCountArray[victim_slot].buffer != InvalidBuffer);
336 Assert(PrivateRefCountArrayKeys[victim_slot] == PrivateRefCountArray[victim_slot].buffer);
337
338 /* enter victim array entry into hashtable */
340 &PrivateRefCountArrayKeys[victim_slot],
342 &found);
343 Assert(!found);
344 /* move data from the entry in the array to the hash entry */
345 hashent->data = victim_entry->data;
346
347 /* clear the now free array slot */
349 victim_entry->buffer = InvalidBuffer;
350
351 /* clear the whole data member, just for future proofing */
352 memset(&victim_entry->data, 0, sizeof(victim_entry->data));
353 victim_entry->data.refcount = 0;
354
356 }
357}
static uint32 PrivateRefCountClock
Definition: bufmgr.c:242
@ HASH_ENTER
Definition: hsearch.h:114

References Assert(), PrivateRefCountEntry::buffer, PrivateRefCountEntry::data, HASH_ENTER, hash_search(), i, InvalidBuffer, PrivateRefCountArray, PrivateRefCountArrayKeys, PrivateRefCountClock, PrivateRefCountHash, PrivateRefCountOverflowed, PrivateRefCountData::refcount, REFCOUNT_ARRAY_ENTRIES, and ReservedRefCountSlot.

Referenced by BufferAlloc(), EvictAllUnpinnedBuffers(), EvictRelUnpinnedBuffers(), EvictUnpinnedBuffer(), ExtendBufferedRelShared(), FlushDatabaseBuffers(), FlushRelationBuffers(), FlushRelationsAllBuffers(), GetPrivateRefCountEntrySlow(), GetVictimBuffer(), MarkDirtyAllUnpinnedBuffers(), MarkDirtyRelUnpinnedBuffers(), MarkDirtyUnpinnedBuffer(), ReadRecentBuffer(), and SyncOneBuffer().

◆ ResOwnerPrintBufferIO()

static char * ResOwnerPrintBufferIO ( Datum  res)
static

Definition at line 6654 of file bufmgr.c.

6655{
6656 Buffer buffer = DatumGetInt32(res);
6657
6658 return psprintf("lost track of buffer IO on buffer %d", buffer);
6659}
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:212

References PrivateRefCountEntry::buffer, DatumGetInt32(), and psprintf().

◆ ResOwnerPrintBufferPin()

static char * ResOwnerPrintBufferPin ( Datum  res)
static

Definition at line 6677 of file bufmgr.c.

6678{
6680}

References DatumGetInt32(), and DebugPrintBufferRefcount().

◆ ResOwnerReleaseBufferIO()

static void ResOwnerReleaseBufferIO ( Datum  res)
static

Definition at line 6646 of file bufmgr.c.

6647{
6648 Buffer buffer = DatumGetInt32(res);
6649
6650 AbortBufferIO(buffer);
6651}
static void AbortBufferIO(Buffer buffer)
Definition: bufmgr.c:6261

References AbortBufferIO(), PrivateRefCountEntry::buffer, and DatumGetInt32().

◆ ResOwnerReleaseBufferPin()

static void ResOwnerReleaseBufferPin ( Datum  res)
static

Definition at line 6662 of file bufmgr.c.

6663{
6664 Buffer buffer = DatumGetInt32(res);
6665
6666 /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */
6667 if (!BufferIsValid(buffer))
6668 elog(ERROR, "bad buffer ID: %d", buffer);
6669
6670 if (BufferIsLocal(buffer))
6672 else
6674}
static void UnpinBufferNoOwner(BufferDesc *buf)
Definition: bufmgr.c:3350
void UnpinLocalBufferNoOwner(Buffer buffer)
Definition: localbuf.c:848

References PrivateRefCountEntry::buffer, BufferIsLocal, BufferIsValid(), DatumGetInt32(), elog, ERROR, GetBufferDescriptor(), UnpinBufferNoOwner(), and UnpinLocalBufferNoOwner().

◆ rlocator_comparator()

static int rlocator_comparator ( const void *  p1,
const void *  p2 
)
static

Definition at line 6332 of file bufmgr.c.

6333{
6334 RelFileLocator n1 = *(const RelFileLocator *) p1;
6335 RelFileLocator n2 = *(const RelFileLocator *) p2;
6336
6337 if (n1.relNumber < n2.relNumber)
6338 return -1;
6339 else if (n1.relNumber > n2.relNumber)
6340 return 1;
6341
6342 if (n1.dbOid < n2.dbOid)
6343 return -1;
6344 else if (n1.dbOid > n2.dbOid)
6345 return 1;
6346
6347 if (n1.spcOid < n2.spcOid)
6348 return -1;
6349 else if (n1.spcOid > n2.spcOid)
6350 return 1;
6351 else
6352 return 0;
6353}

References RelFileLocator::dbOid, RelFileLocator::relNumber, and RelFileLocator::spcOid.

Referenced by buffertag_comparator(), DropRelationsAllBuffers(), and FlushRelationsAllBuffers().

◆ ScheduleBufferTagForWriteback()

void ScheduleBufferTagForWriteback ( WritebackContext wb_context,
IOContext  io_context,
BufferTag tag 
)

Definition at line 6513 of file bufmgr.c.

6515{
6516 PendingWriteback *pending;
6517
6518 /*
6519 * As pg_flush_data() doesn't do anything with fsync disabled, there's no
6520 * point in tracking in that case.
6521 */
6523 !enableFsync)
6524 return;
6525
6526 /*
6527 * Add buffer to the pending writeback array, unless writeback control is
6528 * disabled.
6529 */
6530 if (*wb_context->max_pending > 0)
6531 {
6533
6534 pending = &wb_context->pending_writebacks[wb_context->nr_pending++];
6535
6536 pending->tag = *tag;
6537 }
6538
6539 /*
6540 * Perform pending flushes if the writeback limit is exceeded. This
6541 * includes the case where previously an item has been added, but control
6542 * is now disabled.
6543 */
6544 if (wb_context->nr_pending >= *wb_context->max_pending)
6545 IssuePendingWritebacks(wb_context, io_context);
6546}
bool enableFsync
Definition: globals.c:129
#define WRITEBACK_MAX_PENDING_FLUSHES

References Assert(), enableFsync, IO_DIRECT_DATA, io_direct_flags, IssuePendingWritebacks(), WritebackContext::max_pending, WritebackContext::nr_pending, WritebackContext::pending_writebacks, PendingWriteback::tag, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by GetVictimBuffer(), and SyncOneBuffer().

◆ shared_buffer_readv_complete()

static PgAioResult shared_buffer_readv_complete ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 7673 of file bufmgr.c.

7675{
7676 return buffer_readv_complete(ioh, prior_result, cb_data, false);
7677}

References buffer_readv_complete().

◆ shared_buffer_readv_complete_local()

static PgAioResult shared_buffer_readv_complete_local ( PgAioHandle ioh,
PgAioResult  prior_result,
uint8  cb_data 
)
static

Definition at line 7687 of file bufmgr.c.

7689{
7690 bool zeroed_any,
7691 ignored_any;
7692 uint8 zeroed_or_error_count,
7693 checkfail_count,
7694 first_off;
7695
7696 if (prior_result.status == PGAIO_RS_OK)
7697 return prior_result;
7698
7699 buffer_readv_decode_error(prior_result,
7700 &zeroed_any,
7701 &ignored_any,
7702 &zeroed_or_error_count,
7703 &checkfail_count,
7704 &first_off);
7705
7706 if (checkfail_count)
7707 {
7709
7711 checkfail_count);
7712 }
7713
7714 return prior_result;
7715}
@ PGAIO_RS_OK
Definition: aio_types.h:81

References buffer_readv_decode_error(), RelFileLocator::dbOid, pgaio_io_get_target_data(), PGAIO_RS_OK, pgstat_report_checksum_failures_in_db(), PgAioTargetData::rlocator, PgAioTargetData::smgr, and PgAioResult::status.

◆ shared_buffer_readv_stage()

static void shared_buffer_readv_stage ( PgAioHandle ioh,
uint8  cb_data 
)
static

Definition at line 7667 of file bufmgr.c.

7668{
7669 buffer_stage_common(ioh, false, false);
7670}

References buffer_stage_common().

◆ shared_buffer_write_error_callback()

static void shared_buffer_write_error_callback ( void *  arg)
static

Definition at line 6300 of file bufmgr.c.

6301{
6302 BufferDesc *bufHdr = (BufferDesc *) arg;
6303
6304 /* Buffer is pinned, so we can read the tag without locking the spinlock */
6305 if (bufHdr != NULL)
6306 errcontext("writing block %u of relation \"%s\"",
6307 bufHdr->tag.blockNum,
6309 BufTagGetForkNum(&bufHdr->tag)).str);
6310}

References arg, buftag::blockNum, BufTagGetForkNum(), BufTagGetRelFileLocator(), errcontext, relpathperm, and BufferDesc::tag.

Referenced by FlushBuffer().

◆ StartBufferIO()

bool StartBufferIO ( BufferDesc buf,
bool  forInput,
bool  nowait 
)

Definition at line 6141 of file bufmgr.c.

6142{
6143 uint32 buf_state;
6144
6146
6147 for (;;)
6148 {
6149 buf_state = LockBufHdr(buf);
6150
6151 if (!(buf_state & BM_IO_IN_PROGRESS))
6152 break;
6154 if (nowait)
6155 return false;
6156 WaitIO(buf);
6157 }
6158
6159 /* Once we get here, there is definitely no I/O active on this buffer */
6160
6161 /* Check if someone else already did the I/O */
6162 if (forInput ? (buf_state & BM_VALID) : !(buf_state & BM_DIRTY))
6163 {
6165 return false;
6166 }
6167
6168 UnlockBufHdrExt(buf, buf_state,
6170 0);
6171
6174
6175 return true;
6176}
static void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)

References BM_DIRTY, BM_IO_IN_PROGRESS, BM_VALID, buf, BufferDescriptorGetBuffer(), CurrentResourceOwner, LockBufHdr(), ResourceOwnerEnlarge(), ResourceOwnerRememberBufferIO(), UnlockBufHdr(), UnlockBufHdrExt(), and WaitIO().

Referenced by buffer_call_start_io(), ExtendBufferedRelShared(), FlushBuffer(), read_rel_block_ll(), ReadBuffersCanStartIOOnce(), and ZeroAndLockBuffer().

◆ StartReadBuffer()

bool StartReadBuffer ( ReadBuffersOperation operation,
Buffer buffer,
BlockNumber  blocknum,
int  flags 
)

Definition at line 1583 of file bufmgr.c.

1587{
1588 int nblocks = 1;
1589 bool result;
1590
1591 result = StartReadBuffersImpl(operation, buffer, blocknum, &nblocks, flags,
1592 false /* single block, no forwarding */ );
1593 Assert(nblocks == 1); /* single block can't be short */
1594
1595 return result;
1596}
static pg_attribute_always_inline bool StartReadBuffersImpl(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags, bool allow_forwarding)
Definition: bufmgr.c:1337

References Assert(), PrivateRefCountEntry::buffer, and StartReadBuffersImpl().

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ StartReadBuffers()

bool StartReadBuffers ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags 
)

Definition at line 1564 of file bufmgr.c.

1569{
1570 return StartReadBuffersImpl(operation, buffers, blockNum, nblocks, flags,
1571 true /* expect forwarded buffers */ );
1572}

References StartReadBuffersImpl().

Referenced by read_stream_start_pending_read().

◆ StartReadBuffersImpl()

static pg_attribute_always_inline bool StartReadBuffersImpl ( ReadBuffersOperation operation,
Buffer buffers,
BlockNumber  blockNum,
int *  nblocks,
int  flags,
bool  allow_forwarding 
)
static

Definition at line 1337 of file bufmgr.c.

1343{
1344 int actual_nblocks = *nblocks;
1345 int maxcombine = 0;
1346 bool did_start_io;
1347
1348 Assert(*nblocks == 1 || allow_forwarding);
1349 Assert(*nblocks > 0);
1350 Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
1351
1352 for (int i = 0; i < actual_nblocks; ++i)
1353 {
1354 bool found;
1355
1356 if (allow_forwarding && buffers[i] != InvalidBuffer)
1357 {
1358 BufferDesc *bufHdr;
1359
1360 /*
1361 * This is a buffer that was pinned by an earlier call to
1362 * StartReadBuffers(), but couldn't be handled in one operation at
1363 * that time. The operation was split, and the caller has passed
1364 * an already pinned buffer back to us to handle the rest of the
1365 * operation. It must continue at the expected block number.
1366 */
1367 Assert(BufferGetBlockNumber(buffers[i]) == blockNum + i);
1368
1369 /*
1370 * It might be an already valid buffer (a hit) that followed the
1371 * final contiguous block of an earlier I/O (a miss) marking the
1372 * end of it, or a buffer that some other backend has since made
1373 * valid by performing the I/O for us, in which case we can handle
1374 * it as a hit now. It is safe to check for a BM_VALID flag with
1375 * a relaxed load, because we got a fresh view of it while pinning
1376 * it in the previous call.
1377 *
1378 * On the other hand if we don't see BM_VALID yet, it must be an
1379 * I/O that was split by the previous call and we need to try to
1380 * start a new I/O from this block. We're also racing against any
1381 * other backend that might start the I/O or even manage to mark
1382 * it BM_VALID after this check, but StartBufferIO() will handle
1383 * those cases.
1384 */
1385 if (BufferIsLocal(buffers[i]))
1386 bufHdr = GetLocalBufferDescriptor(-buffers[i] - 1);
1387 else
1388 bufHdr = GetBufferDescriptor(buffers[i] - 1);
1390 found = pg_atomic_read_u32(&bufHdr->state) & BM_VALID;
1391 }
1392 else
1393 {
1394 buffers[i] = PinBufferForBlock(operation->rel,
1395 operation->smgr,
1396 operation->persistence,
1397 operation->forknum,
1398 blockNum + i,
1399 operation->strategy,
1400 &found);
1401 }
1402
1403 if (found)
1404 {
1405 /*
1406 * We have a hit. If it's the first block in the requested range,
1407 * we can return it immediately and report that WaitReadBuffers()
1408 * does not need to be called. If the initial value of *nblocks
1409 * was larger, the caller will have to call again for the rest.
1410 */
1411 if (i == 0)
1412 {
1413 *nblocks = 1;
1414
1415#ifdef USE_ASSERT_CHECKING
1416
1417 /*
1418 * Initialize enough of ReadBuffersOperation to make
1419 * CheckReadBuffersOperation() work. Outside of assertions
1420 * that's not necessary when no IO is issued.
1421 */
1422 operation->buffers = buffers;
1423 operation->blocknum = blockNum;
1424 operation->nblocks = 1;
1425 operation->nblocks_done = 1;
1426 CheckReadBuffersOperation(operation, true);
1427#endif
1428 return false;
1429 }
1430
1431 /*
1432 * Otherwise we already have an I/O to perform, but this block
1433 * can't be included as it is already valid. Split the I/O here.
1434 * There may or may not be more blocks requiring I/O after this
1435 * one, we haven't checked, but they can't be contiguous with this
1436 * one in the way. We'll leave this buffer pinned, forwarding it
1437 * to the next call, avoiding the need to unpin it here and re-pin
1438 * it in the next call.
1439 */
1440 actual_nblocks = i;
1441 break;
1442 }
1443 else
1444 {
1445 /*
1446 * Check how many blocks we can cover with the same IO. The smgr
1447 * implementation might e.g. be limited due to a segment boundary.
1448 */
1449 if (i == 0 && actual_nblocks > 1)
1450 {
1451 maxcombine = smgrmaxcombine(operation->smgr,
1452 operation->forknum,
1453 blockNum);
1454 if (unlikely(maxcombine < actual_nblocks))
1455 {
1456 elog(DEBUG2, "limiting nblocks at %u from %u to %u",
1457 blockNum, actual_nblocks, maxcombine);
1458 actual_nblocks = maxcombine;
1459 }
1460 }
1461 }
1462 }
1463 *nblocks = actual_nblocks;
1464
1465 /* Populate information needed for I/O. */
1466 operation->buffers = buffers;
1467 operation->blocknum = blockNum;
1468 operation->flags = flags;
1469 operation->nblocks = actual_nblocks;
1470 operation->nblocks_done = 0;
1471 pgaio_wref_clear(&operation->io_wref);
1472
1473 /*
1474 * When using AIO, start the IO in the background. If not, issue prefetch
1475 * requests if desired by the caller.
1476 *
1477 * The reason we have a dedicated path for IOMETHOD_SYNC here is to
1478 * de-risk the introduction of AIO somewhat. It's a large architectural
1479 * change, with lots of chances for unanticipated performance effects.
1480 *
1481 * Use of IOMETHOD_SYNC already leads to not actually performing IO
1482 * asynchronously, but without the check here we'd execute IO earlier than
1483 * we used to. Eventually this IOMETHOD_SYNC specific path should go away.
1484 */
1485 if (io_method != IOMETHOD_SYNC)
1486 {
1487 /*
1488 * Try to start IO asynchronously. It's possible that no IO needs to
1489 * be started, if another backend already performed the IO.
1490 *
1491 * Note that if an IO is started, it might not cover the entire
1492 * requested range, e.g. because an intermediary block has been read
1493 * in by another backend. In that case any "trailing" buffers we
1494 * already pinned above will be "forwarded" by read_stream.c to the
1495 * next call to StartReadBuffers().
1496 *
1497 * This is signalled to the caller by decrementing *nblocks *and*
1498 * reducing operation->nblocks. The latter is done here, but not below
1499 * WaitReadBuffers(), as in WaitReadBuffers() we can't "shorten" the
1500 * overall read size anymore, we need to retry until done in its
1501 * entirety or until failed.
1502 */
1503 did_start_io = AsyncReadBuffers(operation, nblocks);
1504
1505 operation->nblocks = *nblocks;
1506 }
1507 else
1508 {
1509 operation->flags |= READ_BUFFERS_SYNCHRONOUSLY;
1510
1511 if (flags & READ_BUFFERS_ISSUE_ADVICE)
1512 {
1513 /*
1514 * In theory we should only do this if PinBufferForBlock() had to
1515 * allocate new buffers above. That way, if two calls to
1516 * StartReadBuffers() were made for the same blocks before
1517 * WaitReadBuffers(), only the first would issue the advice.
1518 * That'd be a better simulation of true asynchronous I/O, which
1519 * would only start the I/O once, but isn't done here for
1520 * simplicity.
1521 */
1522 smgrprefetch(operation->smgr,
1523 operation->forknum,
1524 blockNum,
1525 actual_nblocks);
1526 }
1527
1528 /*
1529 * Indicate that WaitReadBuffers() should be called. WaitReadBuffers()
1530 * will initiate the necessary IO.
1531 */
1532 did_start_io = true;
1533 }
1534
1535 CheckReadBuffersOperation(operation, !did_start_io);
1536
1537 return did_start_io;
1538}
int io_method
Definition: aio.c:74
@ IOMETHOD_SYNC
Definition: aio.h:34
static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete)
Definition: bufmgr.c:1602
static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
Definition: bufmgr.c:1839
#define READ_BUFFERS_ISSUE_ADVICE
Definition: bufmgr.h:124
uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
Definition: smgr.c:697

References Assert(), AsyncReadBuffers(), ReadBuffersOperation::blocknum, BM_TAG_VALID, BM_VALID, BufferGetBlockNumber(), BufferIsLocal, ReadBuffersOperation::buffers, CheckReadBuffersOperation(), DEBUG2, elog, ReadBuffersOperation::flags, ReadBuffersOperation::forknum, GetBufferDescriptor(), GetLocalBufferDescriptor(), i, InvalidBuffer, io_method, ReadBuffersOperation::io_wref, IOMETHOD_SYNC, MAX_IO_COMBINE_LIMIT, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, pg_atomic_read_u32(), pgaio_wref_clear(), PinBufferForBlock(), READ_BUFFERS_ISSUE_ADVICE, READ_BUFFERS_SYNCHRONOUSLY, ReadBuffersOperation::rel, ReadBuffersOperation::smgr, smgrmaxcombine(), smgrprefetch(), BufferDesc::state, ReadBuffersOperation::strategy, and unlikely.

Referenced by StartReadBuffer(), and StartReadBuffers().

◆ SyncOneBuffer()

static int SyncOneBuffer ( int  buf_id,
bool  skip_recently_used,
WritebackContext wb_context 
)
static

Definition at line 4014 of file bufmgr.c.

4015{
4016 BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
4017 int result = 0;
4018 uint32 buf_state;
4019 BufferTag tag;
4020
4021 /* Make sure we can handle the pin */
4024
4025 /*
4026 * Check whether buffer needs writing.
4027 *
4028 * We can make this check without taking the buffer content lock so long
4029 * as we mark pages dirty in access methods *before* logging changes with
4030 * XLogInsert(): if someone marks the buffer dirty just after our check we
4031 * don't worry because our checkpoint.redo points before log record for
4032 * upcoming changes and so we are not required to write such dirty buffer.
4033 */
4034 buf_state = LockBufHdr(bufHdr);
4035
4036 if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
4037 BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
4038 {
4039 result |= BUF_REUSABLE;
4040 }
4041 else if (skip_recently_used)
4042 {
4043 /* Caller told us not to write recently-used buffers */
4044 UnlockBufHdr(bufHdr);
4045 return result;
4046 }
4047
4048 if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
4049 {
4050 /* It's clean, so nothing to do */
4051 UnlockBufHdr(bufHdr);
4052 return result;
4053 }
4054
4055 /*
4056 * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
4057 * buffer is clean by the time we've locked it.)
4058 */
4059 PinBuffer_Locked(bufHdr);
4060
4062
4063 tag = bufHdr->tag;
4064
4065 UnpinBuffer(bufHdr);
4066
4067 /*
4068 * SyncOneBuffer() is only called by checkpointer and bgwriter, so
4069 * IOContext will always be IOCONTEXT_NORMAL.
4070 */
4072
4073 return result | BUF_WRITTEN;
4074}

References BM_DIRTY, BM_VALID, BUF_REUSABLE, BUF_STATE_GET_REFCOUNT, BUF_STATE_GET_USAGECOUNT, BUF_WRITTEN, CurrentResourceOwner, FlushUnlockedBuffer(), GetBufferDescriptor(), IOCONTEXT_NORMAL, IOOBJECT_RELATION, LockBufHdr(), PinBuffer_Locked(), ReservePrivateRefCountEntry(), ResourceOwnerEnlarge(), ScheduleBufferTagForWriteback(), BufferDesc::tag, UnlockBufHdr(), and UnpinBuffer().

Referenced by BgBufferSync(), and BufferSync().

◆ TerminateBufferIO()

void TerminateBufferIO ( BufferDesc buf,
bool  clear_dirty,
uint32  set_flag_bits,
bool  forget_owner,
bool  release_aio 
)

Definition at line 6199 of file bufmgr.c.

6201{
6202 uint32 buf_state;
6203 uint32 unset_flag_bits = 0;
6204 int refcount_change = 0;
6205
6206 buf_state = LockBufHdr(buf);
6207
6208 Assert(buf_state & BM_IO_IN_PROGRESS);
6209 unset_flag_bits |= BM_IO_IN_PROGRESS;
6210
6211 /* Clear earlier errors, if this IO failed, it'll be marked again */
6212 unset_flag_bits |= BM_IO_ERROR;
6213
6214 if (clear_dirty && !(buf_state & BM_JUST_DIRTIED))
6215 unset_flag_bits |= BM_DIRTY | BM_CHECKPOINT_NEEDED;
6216
6217 if (release_aio)
6218 {
6219 /* release ownership by the AIO subsystem */
6220 Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
6221 refcount_change = -1;
6222 pgaio_wref_clear(&buf->io_wref);
6223 }
6224
6225 buf_state = UnlockBufHdrExt(buf, buf_state,
6226 set_flag_bits, unset_flag_bits,
6227 refcount_change);
6228
6229 if (forget_owner)
6232
6234
6235 /*
6236 * Support LockBufferForCleanup()
6237 *
6238 * We may have just released the last pin other than the waiter's. In most
6239 * cases, this backend holds another pin on the buffer. But, if, for
6240 * example, this backend is completing an IO issued by another backend, it
6241 * may be time to wake the waiter.
6242 */
6243 if (release_aio && (buf_state & BM_PIN_COUNT_WAITER))
6245}
static ConditionVariable * BufferDescriptorGetIOCV(const BufferDesc *bdesc)
static void WakePinCountWaiter(BufferDesc *buf)
Definition: bufmgr.c:3305
void ConditionVariableBroadcast(ConditionVariable *cv)

References Assert(), BM_CHECKPOINT_NEEDED, BM_DIRTY, BM_IO_ERROR, BM_IO_IN_PROGRESS, BM_JUST_DIRTIED, BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, BufferDescriptorGetBuffer(), BufferDescriptorGetIOCV(), ConditionVariableBroadcast(), CurrentResourceOwner, LockBufHdr(), pgaio_wref_clear(), ResourceOwnerForgetBufferIO(), UnlockBufHdrExt(), and WakePinCountWaiter().

Referenced by AbortBufferIO(), buffer_call_terminate_io(), buffer_readv_complete_one(), ExtendBufferedRelShared(), FlushBuffer(), and ZeroAndLockBuffer().

◆ TrackNewBufferPin()

void TrackNewBufferPin ( Buffer  buf)
inline

Definition at line 3397 of file bufmgr.c.

3398{
3400
3402 ref->data.refcount++;
3403
3405
3406 /*
3407 * This is the first pin for this page by this backend, mark its page as
3408 * defined to valgrind. While the page contents might not actually be
3409 * valid yet, we don't currently guarantee that such pages are marked
3410 * undefined or non-accessible.
3411 *
3412 * It's not necessarily the prettiest to do this here, but otherwise we'd
3413 * need this block of code in multiple places.
3414 */
3416 BLCKSZ);
3417}
static PrivateRefCountEntry * NewPrivateRefCountEntry(Buffer buffer)
Definition: bufmgr.c:363

References buf, BufHdrGetBlock, CurrentResourceOwner, PrivateRefCountEntry::data, GetBufferDescriptor(), NewPrivateRefCountEntry(), PrivateRefCountData::refcount, ResourceOwnerRememberBuffer(), and VALGRIND_MAKE_MEM_DEFINED.

Referenced by GetBufferFromRing(), PinBuffer(), PinBuffer_Locked(), and StrategyGetBuffer().

◆ ts_ckpt_progress_comparator()

static int ts_ckpt_progress_comparator ( Datum  a,
Datum  b,
void *  arg 
)
static

Definition at line 6478 of file bufmgr.c.

6479{
6482
6483 /* we want a min-heap, so return 1 for the a < b */
6484 if (sa->progress < sb->progress)
6485 return 1;
6486 else if (sa->progress == sb->progress)
6487 return 0;
6488 else
6489 return -1;
6490}

References a, b, DatumGetPointer(), and CkptTsStatus::progress.

Referenced by BufferSync().

◆ UnlockBuffers()

void UnlockBuffers ( void  )

Definition at line 5668 of file bufmgr.c.

5669{
5671
5672 if (buf)
5673 {
5674 uint32 buf_state;
5675 uint32 unset_bits = 0;
5676
5677 buf_state = LockBufHdr(buf);
5678
5679 /*
5680 * Don't complain if flag bit not set; it could have been reset but we
5681 * got a cancel/die interrupt before getting the signal.
5682 */
5683 if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
5684 buf->wait_backend_pgprocno == MyProcNumber)
5685 unset_bits = BM_PIN_COUNT_WAITER;
5686
5687 UnlockBufHdrExt(buf, buf_state,
5688 0, unset_bits,
5689 0);
5690
5691 PinCountWaitBuf = NULL;
5692 }
5693}

References BM_PIN_COUNT_WAITER, buf, LockBufHdr(), MyProcNumber, PinCountWaitBuf, and UnlockBufHdrExt().

Referenced by AbortSubTransaction(), AbortTransaction(), AtProcExit_Buffers(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), and WalWriterMain().

◆ UnlockReleaseBuffer()

void UnlockReleaseBuffer ( Buffer  buffer)

Definition at line 5478 of file bufmgr.c.

5479{
5481 ReleaseBuffer(buffer);
5482}

References PrivateRefCountEntry::buffer, BUFFER_LOCK_UNLOCK, LockBuffer(), and ReleaseBuffer().

Referenced by _bt_clear_incomplete_split(), _bt_restore_meta(), _hash_relbuf(), allocNewBuffer(), AlterSequence(), blbulkdelete(), blgetbitmap(), blinsert(), BloomInitMetapage(), blvacuumcleanup(), brin_doinsert(), brin_doupdate(), brin_evacuate_page(), brin_getinsertbuffer(), brin_xlog_createidx(), brin_xlog_desummarize_page(), brin_xlog_insert_update(), brin_xlog_revmap_extend(), brin_xlog_samepage_update(), brin_xlog_update(), brinbuild(), brinbuildempty(), brinGetStats(), brinRevmapDesummarizeRange(), bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), bt_recheck_sibling_links(), btree_xlog_dedup(), btree_xlog_delete(), btree_xlog_insert(), btree_xlog_mark_page_halfdead(), btree_xlog_newroot(), btree_xlog_split(), btree_xlog_unlink_page(), btree_xlog_vacuum(), collect_corrupt_items(), collect_visibility_data(), count_nondeletable_pages(), createPostingTree(), doPickSplit(), entryLoadMoreItems(), fill_seq_fork_with_data(), flushCachedPage(), FreeSpaceMapPrepareTruncateRel(), fsm_search(), fsm_set_and_search(), generic_redo(), gin_refind_parent(), ginbuild(), ginbuildempty(), ginbulkdelete(), ginGetStats(), ginHeapTupleFastInsert(), ginInsertCleanup(), ginPlaceToPage(), ginRedoClearIncompleteSplit(), ginRedoCreatePTree(), ginRedoDeleteListPages(), ginRedoDeletePage(), ginRedoInsert(), ginRedoInsertListPage(), ginRedoSplit(), ginRedoUpdateMetapage(), ginRedoVacuumDataLeafPage(), ginRedoVacuumPage(), ginScanToDelete(), ginStepRight(), ginUpdateStats(), ginvacuumcleanup(), ginVacuumPostingTree(), ginVacuumPostingTreeLeaves(), gistbufferinginserttuples(), gistbuild(), gistbuildempty(), gistdoinsert(), gistFindCorrectParent(), gistFindPath(), gistGetMaxLevel(), gistinserttuples(), gistkillitems(), gistplacetopage(), gistProcessItup(), gistRedoClearFollowRight(), gistRedoDeleteRecord(), gistRedoPageDelete(), gistRedoPageSplitRecord(), gistRedoPageUpdateRecord(), gistScanPage(), gistvacuum_delete_empty_pages(), gistvacuumpage(), hash_xlog_add_ovfl_page(), hash_xlog_delete(), hash_xlog_init_bitmap_page(), hash_xlog_init_meta_page(), hash_xlog_insert(), hash_xlog_move_page_contents(), hash_xlog_split_allocate_page(), hash_xlog_split_cleanup(), hash_xlog_split_complete(), hash_xlog_split_page(), hash_xlog_squeeze_page(), hash_xlog_update_meta_page(), hash_xlog_vacuum_one_page(), heap_delete(), heap_finish_speculative(), heap_force_common(), heap_get_latest_tid(), heap_index_delete_tuples(), heap_insert(), heap_lock_updated_tuple_rec(), heap_multi_insert(), heap_update(), heap_xlog_confirm(), heap_xlog_delete(), heap_xlog_inplace(), heap_xlog_insert(), heap_xlog_lock(), heap_xlog_lock_updated(), heap_xlog_multi_insert(), heap_xlog_prune_freeze(), heap_xlog_update(), heap_xlog_visible(), heapam_scan_analyze_next_tuple(), initBloomState(), lazy_scan_heap(), lazy_scan_new_or_empty(), lazy_vacuum_heap_rel(), log_newpage_range(), moveLeafs(), nextval_internal(), palloc_btree_page(), pg_get_sequence_data(), pg_sequence_last_value(), pg_visibility(), pgstat_gist_page(), pgstat_heap(), pgstatginindex_internal(), pgstathashindex(), RelationCopyStorageUsingBuffer(), RelationGetBufferForTuple(), ResetSequence(), revmap_physical_extend(), scanGetCandidate(), scanPendingInsert(), scanPostingTree(), ScanSourceDatabasePgClass(), seq_redo(), SequenceChangePersistence(), SetSequence(), shiftList(), spgAddNodeAction(), spgbuild(), spgdoinsert(), spgGetCache(), SpGistGetBuffer(), SpGistUpdateMetaPage(), spgMatchNodeAction(), spgprocesspending(), spgRedoAddLeaf(), spgRedoAddNode(), spgRedoMoveLeafs(), spgRedoPickSplit(), spgRedoSplitTuple(), spgRedoVacuumLeaf(), spgRedoVacuumRedirect(), spgRedoVacuumRoot(), spgSplitNodeAction(), spgvacuumpage(), spgWalk(), statapprox_heap(), verify_heapam(), verifyBackupPageConsistency(), visibilitymap_prepare_truncate(), writeListPage(), xlog_redo(), and XLogRecordPageWithFreeSpace().

◆ UnpinBuffer()

◆ UnpinBufferNoOwner()

static void UnpinBufferNoOwner ( BufferDesc buf)
static

Definition at line 3350 of file bufmgr.c.

3351{
3354
3356
3357 /* not moving as we're likely deleting it soon anyway */
3358 ref = GetPrivateRefCountEntry(b, false);
3359 Assert(ref != NULL);
3360 Assert(ref->data.refcount > 0);
3361 ref->data.refcount--;
3362 if (ref->data.refcount == 0)
3363 {
3364 uint32 old_buf_state;
3365
3366 /*
3367 * Mark buffer non-accessible to Valgrind.
3368 *
3369 * Note that the buffer may have already been marked non-accessible
3370 * within access method code that enforces that buffers are only
3371 * accessed while a buffer lock is held.
3372 */
3374
3375 /*
3376 * I'd better not still hold the buffer content lock. Can't use
3377 * BufferIsLockedByMe(), as that asserts the buffer is pinned.
3378 */
3380
3381 /* decrement the shared reference count */
3382 old_buf_state = pg_atomic_fetch_sub_u32(&buf->state, BUF_REFCOUNT_ONE);
3383
3384 /* Support LockBufferForCleanup() */
3385 if (old_buf_state & BM_PIN_COUNT_WAITER)
3387
3389 }
3390}
static uint32 pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:381
static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
Definition: bufmgr.c:540

References Assert(), b, BM_PIN_COUNT_WAITER, buf, BUF_REFCOUNT_ONE, BufferDescriptorGetBuffer(), BufferDescriptorGetContentLock(), BufferIsLocal, BufHdrGetBlock, PrivateRefCountEntry::data, ForgetPrivateRefCountEntry(), GetPrivateRefCountEntry(), LWLockHeldByMe(), pg_atomic_fetch_sub_u32(), PrivateRefCountData::refcount, VALGRIND_MAKE_MEM_NOACCESS, and WakePinCountWaiter().

Referenced by ResOwnerReleaseBufferPin(), and UnpinBuffer().

◆ WaitBufHdrUnlocked()

pg_noinline uint32 WaitBufHdrUnlocked ( BufferDesc buf)

Definition at line 6389 of file bufmgr.c.

6390{
6391 SpinDelayStatus delayStatus;
6392 uint32 buf_state;
6393
6394 init_local_spin_delay(&delayStatus);
6395
6396 buf_state = pg_atomic_read_u32(&buf->state);
6397
6398 while (buf_state & BM_LOCKED)
6399 {
6400 perform_spin_delay(&delayStatus);
6401 buf_state = pg_atomic_read_u32(&buf->state);
6402 }
6403
6404 finish_spin_delay(&delayStatus);
6405
6406 return buf_state;
6407}

References BM_LOCKED, buf, finish_spin_delay(), init_local_spin_delay, perform_spin_delay(), and pg_atomic_read_u32().

Referenced by GetBufferFromRing(), MarkBufferDirty(), PinBuffer(), and StrategyGetBuffer().

◆ WaitIO()

static void WaitIO ( BufferDesc buf)
static

Definition at line 6062 of file bufmgr.c.

6063{
6065
6067 for (;;)
6068 {
6069 uint32 buf_state;
6070 PgAioWaitRef iow;
6071
6072 /*
6073 * It may not be necessary to acquire the spinlock to check the flag
6074 * here, but since this test is essential for correctness, we'd better
6075 * play it safe.
6076 */
6077 buf_state = LockBufHdr(buf);
6078
6079 /*
6080 * Copy the wait reference while holding the spinlock. This protects
6081 * against a concurrent TerminateBufferIO() in another backend from
6082 * clearing the wref while it's being read.
6083 */
6084 iow = buf->io_wref;
6086
6087 /* no IO in progress, we don't need to wait */
6088 if (!(buf_state & BM_IO_IN_PROGRESS))
6089 break;
6090
6091 /*
6092 * The buffer has asynchronous IO in progress, wait for it to
6093 * complete.
6094 */
6095 if (pgaio_wref_valid(&iow))
6096 {
6097 pgaio_wref_wait(&iow);
6098
6099 /*
6100 * The AIO subsystem internally uses condition variables and thus
6101 * might remove this backend from the BufferDesc's CV. While that
6102 * wouldn't cause a correctness issue (the first CV sleep just
6103 * immediately returns if not already registered), it seems worth
6104 * avoiding unnecessary loop iterations, given that we take care
6105 * to do so at the start of the function.
6106 */
6108 continue;
6109 }
6110
6111 /* wait on BufferDesc->cv, e.g. for concurrent synchronous IO */
6112 ConditionVariableSleep(cv, WAIT_EVENT_BUFFER_IO);
6113 }
6115}
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition: aio.c:991
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)

References BM_IO_IN_PROGRESS, buf, BufferDescriptorGetIOCV(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), LockBufHdr(), pgaio_wref_valid(), pgaio_wref_wait(), and UnlockBufHdr().

Referenced by InvalidateBuffer(), and StartBufferIO().

◆ WaitReadBuffers()

void WaitReadBuffers ( ReadBuffersOperation operation)

Definition at line 1707 of file bufmgr.c.

1708{
1709 PgAioReturn *aio_ret = &operation->io_return;
1710 IOContext io_context;
1711 IOObject io_object;
1712
1713 if (operation->persistence == RELPERSISTENCE_TEMP)
1714 {
1715 io_context = IOCONTEXT_NORMAL;
1716 io_object = IOOBJECT_TEMP_RELATION;
1717 }
1718 else
1719 {
1720 io_context = IOContextForStrategy(operation->strategy);
1721 io_object = IOOBJECT_RELATION;
1722 }
1723
1724 /*
1725 * If we get here without an IO operation having been issued, the
1726 * io_method == IOMETHOD_SYNC path must have been used. Otherwise the
1727 * caller should not have called WaitReadBuffers().
1728 *
1729 * In the case of IOMETHOD_SYNC, we start - as we used to before the
1730 * introducing of AIO - the IO in WaitReadBuffers(). This is done as part
1731 * of the retry logic below, no extra code is required.
1732 *
1733 * This path is expected to eventually go away.
1734 */
1735 if (!pgaio_wref_valid(&operation->io_wref) && io_method != IOMETHOD_SYNC)
1736 elog(ERROR, "waiting for read operation that didn't read");
1737
1738 /*
1739 * To handle partial reads, and IOMETHOD_SYNC, we re-issue IO until we're
1740 * done. We may need multiple retries, not just because we could get
1741 * multiple partial reads, but also because some of the remaining
1742 * to-be-read buffers may have been read in by other backends, limiting
1743 * the IO size.
1744 */
1745 while (true)
1746 {
1747 int ignored_nblocks_progress;
1748
1749 CheckReadBuffersOperation(operation, false);
1750
1751 /*
1752 * If there is an IO associated with the operation, we may need to
1753 * wait for it.
1754 */
1755 if (pgaio_wref_valid(&operation->io_wref))
1756 {
1757 /*
1758 * Track the time spent waiting for the IO to complete. As
1759 * tracking a wait even if we don't actually need to wait
1760 *
1761 * a) is not cheap, due to the timestamping overhead
1762 *
1763 * b) reports some time as waiting, even if we never waited
1764 *
1765 * we first check if we already know the IO is complete.
1766 */
1767 if (aio_ret->result.status == PGAIO_RS_UNKNOWN &&
1768 !pgaio_wref_check_done(&operation->io_wref))
1769 {
1771
1772 pgaio_wref_wait(&operation->io_wref);
1773
1774 /*
1775 * The IO operation itself was already counted earlier, in
1776 * AsyncReadBuffers(), this just accounts for the wait time.
1777 */
1778 pgstat_count_io_op_time(io_object, io_context, IOOP_READ,
1779 io_start, 0, 0);
1780 }
1781 else
1782 {
1783 Assert(pgaio_wref_check_done(&operation->io_wref));
1784 }
1785
1786 /*
1787 * We now are sure the IO completed. Check the results. This
1788 * includes reporting on errors if there were any.
1789 */
1790 ProcessReadBuffersResult(operation);
1791 }
1792
1793 /*
1794 * Most of the time, the one IO we already started, will read in
1795 * everything. But we need to deal with partial reads and buffers not
1796 * needing IO anymore.
1797 */
1798 if (operation->nblocks_done == operation->nblocks)
1799 break;
1800
1802
1803 /*
1804 * This may only complete the IO partially, either because some
1805 * buffers were already valid, or because of a partial read.
1806 *
1807 * NB: In contrast to after the AsyncReadBuffers() call in
1808 * StartReadBuffers(), we do *not* reduce
1809 * ReadBuffersOperation->nblocks here, callers expect the full
1810 * operation to be completed at this point (as more operations may
1811 * have been queued).
1812 */
1813 AsyncReadBuffers(operation, &ignored_nblocks_progress);
1814 }
1815
1816 CheckReadBuffersOperation(operation, true);
1817
1818 /* NB: READ_DONE tracepoint was already executed in completion callback */
1819}
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition: aio.c:1005
static void ProcessReadBuffersResult(ReadBuffersOperation *operation)
Definition: bufmgr.c:1668

References Assert(), AsyncReadBuffers(), CHECK_FOR_INTERRUPTS, CheckReadBuffersOperation(), elog, ERROR, io_method, ReadBuffersOperation::io_return, ReadBuffersOperation::io_wref, IOCONTEXT_NORMAL, IOContextForStrategy(), IOMETHOD_SYNC, IOOBJECT_RELATION, IOOBJECT_TEMP_RELATION, IOOP_READ, ReadBuffersOperation::nblocks, ReadBuffersOperation::nblocks_done, ReadBuffersOperation::persistence, PGAIO_RS_UNKNOWN, pgaio_wref_check_done(), pgaio_wref_valid(), pgaio_wref_wait(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), ProcessReadBuffersResult(), PgAioReturn::result, PgAioResult::status, ReadBuffersOperation::strategy, and track_io_timing.

Referenced by read_stream_next_buffer(), and ReadBuffer_common().

◆ WakePinCountWaiter()

static void WakePinCountWaiter ( BufferDesc buf)
static

Definition at line 3305 of file bufmgr.c.

3306{
3307 /*
3308 * Acquire the buffer header lock, re-check that there's a waiter. Another
3309 * backend could have unpinned this buffer, and already woken up the
3310 * waiter.
3311 *
3312 * There's no danger of the buffer being replaced after we unpinned it
3313 * above, as it's pinned by the waiter. The waiter removes
3314 * BM_PIN_COUNT_WAITER if it stops waiting for a reason other than this
3315 * backend waking it up.
3316 */
3317 uint32 buf_state = LockBufHdr(buf);
3318
3319 if ((buf_state & BM_PIN_COUNT_WAITER) &&
3320 BUF_STATE_GET_REFCOUNT(buf_state) == 1)
3321 {
3322 /* we just released the last pin other than the waiter's */
3323 int wait_backend_pgprocno = buf->wait_backend_pgprocno;
3324
3325 UnlockBufHdrExt(buf, buf_state,
3327 0);
3328 ProcSendSignal(wait_backend_pgprocno);
3329 }
3330 else
3332}
void ProcSendSignal(ProcNumber procNumber)
Definition: proc.c:1996

References BM_PIN_COUNT_WAITER, buf, BUF_STATE_GET_REFCOUNT, LockBufHdr(), ProcSendSignal(), UnlockBufHdr(), and UnlockBufHdrExt().

Referenced by TerminateBufferIO(), and UnpinBufferNoOwner().

◆ WritebackContextInit()

void WritebackContextInit ( WritebackContext context,
int *  max_pending 
)

Definition at line 6501 of file bufmgr.c.

6502{
6503 Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
6504
6505 context->max_pending = max_pending;
6506 context->nr_pending = 0;
6507}

References Assert(), WritebackContext::max_pending, WritebackContext::nr_pending, and WRITEBACK_MAX_PENDING_FLUSHES.

Referenced by BackgroundWriterMain(), BufferManagerShmemInit(), and BufferSync().

◆ ZeroAndLockBuffer()

static void ZeroAndLockBuffer ( Buffer  buffer,
ReadBufferMode  mode,
bool  already_valid 
)
static

Definition at line 1106 of file bufmgr.c.

1107{
1108 BufferDesc *bufHdr;
1109 bool need_to_zero;
1110 bool isLocalBuf = BufferIsLocal(buffer);
1111
1113
1114 if (already_valid)
1115 {
1116 /*
1117 * If the caller already knew the buffer was valid, we can skip some
1118 * header interaction. The caller just wants to lock the buffer.
1119 */
1120 need_to_zero = false;
1121 }
1122 else if (isLocalBuf)
1123 {
1124 /* Simple case for non-shared buffers. */
1125 bufHdr = GetLocalBufferDescriptor(-buffer - 1);
1126 need_to_zero = StartLocalBufferIO(bufHdr, true, false);
1127 }
1128 else
1129 {
1130 /*
1131 * Take BM_IO_IN_PROGRESS, or discover that BM_VALID has been set
1132 * concurrently. Even though we aren't doing I/O, that ensures that
1133 * we don't zero a page that someone else has pinned. An exclusive
1134 * content lock wouldn't be enough, because readers are allowed to
1135 * drop the content lock after determining that a tuple is visible
1136 * (see buffer access rules in README).
1137 */
1138 bufHdr = GetBufferDescriptor(buffer - 1);
1139 need_to_zero = StartBufferIO(bufHdr, true, false);
1140 }
1141
1142 if (need_to_zero)
1143 {
1144 memset(BufferGetPage(buffer), 0, BLCKSZ);
1145
1146 /*
1147 * Grab the buffer content lock before marking the page as valid, to
1148 * make sure that no other backend sees the zeroed page before the
1149 * caller has had a chance to initialize it.
1150 *
1151 * Since no-one else can be looking at the page contents yet, there is
1152 * no difference between an exclusive lock and a cleanup-strength
1153 * lock. (Note that we cannot use LockBuffer() or
1154 * LockBufferForCleanup() here, because they assert that the buffer is
1155 * already valid.)
1156 */
1157 if (!isLocalBuf)
1159
1160 /* Set BM_VALID, terminate IO, and wake up any waiters */
1161 if (isLocalBuf)
1162 TerminateLocalBufferIO(bufHdr, false, BM_VALID, false);
1163 else
1164 TerminateBufferIO(bufHdr, false, BM_VALID, true, false);
1165 }
1166 else if (!isLocalBuf)
1167 {
1168 /*
1169 * The buffer is valid, so we can't zero it. The caller still expects
1170 * the page to be locked on return.
1171 */
1172 if (mode == RBM_ZERO_AND_LOCK)
1174 else
1175 LockBufferForCleanup(buffer);
1176 }
1177}
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5779

References Assert(), BM_VALID, PrivateRefCountEntry::buffer, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), BufferIsLocal, GetBufferDescriptor(), GetLocalBufferDescriptor(), LockBuffer(), LockBufferForCleanup(), mode, RBM_ZERO_AND_CLEANUP_LOCK, RBM_ZERO_AND_LOCK, StartBufferIO(), StartLocalBufferIO(), TerminateBufferIO(), and TerminateLocalBufferIO().

Referenced by ReadBuffer_common().

Variable Documentation

◆ aio_local_buffer_readv_cb

const PgAioHandleCallbacks aio_local_buffer_readv_cb
Initial value:
= {
.complete_local = local_buffer_readv_complete,
}
static PgAioResult local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition: bufmgr.c:7724
static void local_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition: bufmgr.c:7718
static void buffer_readv_report(PgAioResult result, const PgAioTargetData *td, int elevel)
Definition: bufmgr.c:7572

Definition at line 7740 of file bufmgr.c.

◆ aio_shared_buffer_readv_cb

const PgAioHandleCallbacks aio_shared_buffer_readv_cb
Initial value:
= {
.complete_shared = shared_buffer_readv_complete,
}
static PgAioResult shared_buffer_readv_complete_local(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition: bufmgr.c:7687
static void shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
Definition: bufmgr.c:7667
static PgAioResult shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
Definition: bufmgr.c:7673

Definition at line 7731 of file bufmgr.c.

◆ backend_flush_after

int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER

Definition at line 202 of file bufmgr.c.

Referenced by BufferManagerShmemInit().

◆ bgwriter_flush_after

int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER

Definition at line 201 of file bufmgr.c.

Referenced by BackgroundWriterMain().

◆ bgwriter_lru_maxpages

int bgwriter_lru_maxpages = 100

Definition at line 167 of file bufmgr.c.

Referenced by BgBufferSync().

◆ bgwriter_lru_multiplier

double bgwriter_lru_multiplier = 2.0

Definition at line 168 of file bufmgr.c.

Referenced by BgBufferSync().

◆ buffer_io_resowner_desc

const ResourceOwnerDesc buffer_io_resowner_desc
Initial value:
=
{
.name = "buffer io",
.release_priority = RELEASE_PRIO_BUFFER_IOS,
.ReleaseResource = ResOwnerReleaseBufferIO,
.DebugPrint = ResOwnerPrintBufferIO
}
static void ResOwnerReleaseBufferIO(Datum res)
Definition: bufmgr.c:6646
static char * ResOwnerPrintBufferIO(Datum res)
Definition: bufmgr.c:6654
#define RELEASE_PRIO_BUFFER_IOS
Definition: resowner.h:62
@ RESOURCE_RELEASE_BEFORE_LOCKS
Definition: resowner.h:54

Definition at line 260 of file bufmgr.c.

Referenced by ResourceOwnerForgetBufferIO(), and ResourceOwnerRememberBufferIO().

◆ buffer_pin_resowner_desc

const ResourceOwnerDesc buffer_pin_resowner_desc
Initial value:
=
{
.name = "buffer pin",
.release_priority = RELEASE_PRIO_BUFFER_PINS,
.ReleaseResource = ResOwnerReleaseBufferPin,
.DebugPrint = ResOwnerPrintBufferPin
}
static char * ResOwnerPrintBufferPin(Datum res)
Definition: bufmgr.c:6677
static void ResOwnerReleaseBufferPin(Datum res)
Definition: bufmgr.c:6662
#define RELEASE_PRIO_BUFFER_PINS
Definition: resowner.h:63

Definition at line 269 of file bufmgr.c.

Referenced by ResourceOwnerForgetBuffer(), and ResourceOwnerRememberBuffer().

◆ checkpoint_flush_after

int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER

Definition at line 200 of file bufmgr.c.

Referenced by BufferSync().

◆ effective_io_concurrency

◆ io_combine_limit

◆ io_combine_limit_guc

int io_combine_limit_guc = DEFAULT_IO_COMBINE_LIMIT

Definition at line 193 of file bufmgr.c.

Referenced by assign_io_max_combine_limit().

◆ io_max_combine_limit

◆ maintenance_io_concurrency

◆ MaxProportionalPins

uint32 MaxProportionalPins
static

Definition at line 246 of file bufmgr.c.

Referenced by GetAdditionalPinLimit(), GetPinLimit(), and InitBufferManagerAccess().

◆ PinCountWaitBuf

BufferDesc* PinCountWaitBuf = NULL
static

Definition at line 205 of file bufmgr.c.

Referenced by LockBufferForCleanup(), and UnlockBuffers().

◆ PrivateRefCountArray

◆ PrivateRefCountArrayKeys

◆ PrivateRefCountClock

uint32 PrivateRefCountClock = 0
static

Definition at line 242 of file bufmgr.c.

Referenced by ReservePrivateRefCountEntry().

◆ PrivateRefCountEntryLast

int PrivateRefCountEntryLast = -1
static

◆ PrivateRefCountHash

◆ PrivateRefCountOverflowed

◆ ReservedRefCountSlot

int ReservedRefCountSlot = -1
static

◆ track_io_timing

◆ zero_damaged_pages

bool zero_damaged_pages = false

Definition at line 166 of file bufmgr.c.

Referenced by AsyncReadBuffers(), mdreadv(), and read_rel_block_ll().