Skip to content

Commit 152b4c2

Browse files
melanieplagemanCommitfest Bot
authored andcommitted
Refactor SyncOneBuffer for bgwriter use only
Since xxx, only bgwriter uses SyncOneBuffer, so we can remove the skip_recently_used parameter and make that behavior the default. While we are at it, 5e89985 introduced the pattern of using a CAS loop instead of locking the buffer header and then calling PinBuffer_Locked(). Do that in SyncOneBuffer() so we can avoid taking the buffer header spinlock in the common case that the buffer is recently used. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/2FA0BAC7-5413-4ABD-94CA-4398FE77750D%40gmail.com
1 parent 52e3bdf commit 152b4c2

File tree

1 file changed

+56
-40
lines changed

1 file changed

+56
-40
lines changed

src/backend/storage/buffer/bufmgr.c

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,7 @@ static void UnpinBuffer(BufferDesc *buf);
515515
static void UnpinBufferNoOwner(BufferDesc *buf);
516516
static uint32 CheckpointerMaxBatchSize(void);
517517
static void BufferSync(int flags);
518-
static int SyncOneBuffer(int buf_id, bool skip_recently_used,
519-
WritebackContext *wb_context);
518+
static int SyncOneBuffer(int buf_id, WritebackContext *wb_context);
520519
static void WaitIO(BufferDesc *buf);
521520
static void AbortBufferIO(Buffer buffer);
522521
static void shared_buffer_write_error_callback(void *arg);
@@ -4000,8 +3999,7 @@ BgBufferSync(WritebackContext *wb_context)
40003999
/* Execute the LRU scan */
40014000
while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
40024001
{
4003-
int sync_state = SyncOneBuffer(next_to_clean, true,
4004-
wb_context);
4002+
int sync_state = SyncOneBuffer(next_to_clean, wb_context);
40054003

40064004
if (++next_to_clean >= NBuffers)
40074005
{
@@ -4064,8 +4062,8 @@ BgBufferSync(WritebackContext *wb_context)
40644062
/*
40654063
* SyncOneBuffer -- process a single buffer during syncing.
40664064
*
4067-
* If skip_recently_used is true, we don't write currently-pinned buffers, nor
4068-
* buffers marked recently used, as these are not replacement candidates.
4065+
* We don't write currently-pinned buffers, nor buffers marked recently used,
4066+
* as these are not replacement candidates.
40694067
*
40704068
* Returns a bitmask containing the following flag bits:
40714069
* BUF_WRITTEN: we wrote the buffer.
@@ -4076,62 +4074,80 @@ BgBufferSync(WritebackContext *wb_context)
40764074
* after locking it, but we don't care all that much.)
40774075
*/
40784076
static int
4079-
SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
4077+
SyncOneBuffer(int buf_id, WritebackContext *wb_context)
40804078
{
40814079
BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
40824080
int result = 0;
4081+
uint32 old_buf_state;
40834082
uint32 buf_state;
40844083
BufferTag tag;
40854084

4086-
/* Make sure we can handle the pin */
4087-
ReservePrivateRefCountEntry();
4088-
ResourceOwnerEnlarge(CurrentResourceOwner);
4089-
40904085
/*
4091-
* Check whether buffer needs writing.
4092-
*
4093-
* We can make this check without taking the buffer content lock so long
4094-
* as we mark pages dirty in access methods *before* logging changes with
4095-
* XLogInsert(): if someone marks the buffer dirty just after our check we
4096-
* don't worry because our checkpoint.redo points before log record for
4097-
* upcoming changes and so we are not required to write such dirty buffer.
4086+
* Check whether the buffer can be used and pin it if so. Do this using a
4087+
* CAS loop, to avoid having to lock the buffer header.
40984088
*/
4099-
buf_state = LockBufHdr(bufHdr);
4100-
4101-
if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
4102-
BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
4089+
old_buf_state = pg_atomic_read_u32(&bufHdr->state);
4090+
for (;;)
41034091
{
4092+
buf_state = old_buf_state;
4093+
4094+
/*
4095+
* We can make these checks without taking the buffer content lock so
4096+
* long as we mark pages dirty in access methods *before* logging
4097+
* changes with XLogInsert(): if someone marks the buffer dirty just
4098+
* after our check we don't worry because our checkpoint.redo points
4099+
* before log record for upcoming changes and so we are not required
4100+
* to write such dirty buffer.
4101+
*/
4102+
if (BUF_STATE_GET_REFCOUNT(buf_state) != 0 ||
4103+
BUF_STATE_GET_USAGECOUNT(buf_state) != 0)
4104+
{
4105+
/* Don't write recently-used buffers */
4106+
return result;
4107+
}
4108+
41044109
result |= BUF_REUSABLE;
4105-
}
4106-
else if (skip_recently_used)
4107-
{
4108-
/* Caller told us not to write recently-used buffers */
4109-
UnlockBufHdr(bufHdr);
4110-
return result;
4111-
}
41124110

4113-
if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
4114-
{
4115-
/* It's clean, so nothing to do */
4116-
UnlockBufHdr(bufHdr);
4117-
return result;
4111+
if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
4112+
{
4113+
/* It's clean, so nothing to do */
4114+
return result;
4115+
}
4116+
4117+
if (unlikely(buf_state & BM_LOCKED))
4118+
{
4119+
old_buf_state = WaitBufHdrUnlocked(bufHdr);
4120+
continue;
4121+
}
4122+
4123+
/* Make sure we can handle the pin */
4124+
ReservePrivateRefCountEntry();
4125+
ResourceOwnerEnlarge(CurrentResourceOwner);
4126+
4127+
/* pin the buffer if the CAS succeeds */
4128+
buf_state += BUF_REFCOUNT_ONE;
4129+
4130+
if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
4131+
buf_state))
4132+
{
4133+
TrackNewBufferPin(BufferDescriptorGetBuffer(bufHdr));
4134+
break;
4135+
}
41184136
}
41194137

41204138
/*
4121-
* Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
4122-
* buffer is clean by the time we've locked it.)
4139+
* Share lock and write it out (FlushBuffer will do nothing if the buffer
4140+
* is clean by the time we've locked it.)
41234141
*/
4124-
PinBuffer_Locked(bufHdr);
4125-
41264142
FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
41274143

41284144
tag = bufHdr->tag;
41294145

41304146
UnpinBuffer(bufHdr);
41314147

41324148
/*
4133-
* SyncOneBuffer() is only called by checkpointer and bgwriter, so
4134-
* IOContext will always be IOCONTEXT_NORMAL.
4149+
* SyncOneBuffer() is only called by bgwriter, so IOContext will always be
4150+
* IOCONTEXT_NORMAL.
41354151
*/
41364152
ScheduleBufferTagForWriteback(wb_context, IOCONTEXT_NORMAL, &tag);
41374153

0 commit comments

Comments
 (0)