@@ -533,6 +533,12 @@ static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln,
533533 IOObject io_object , IOContext io_context );
534534static void FlushBuffer (BufferDesc * buf , SMgrRelation reln ,
535535 IOObject io_object , IOContext io_context );
536+ static bool PrepareFlushBuffer (BufferDesc * bufdesc , XLogRecPtr * lsn );
537+ static void DoFlushBuffer (BufferDesc * buf , SMgrRelation reln ,
538+ IOObject io_object , IOContext io_context ,
539+ XLogRecPtr buffer_lsn );
540+ static void CleanVictimBuffer (BufferDesc * bufdesc , bool from_ring ,
541+ IOContext io_context );
536542static void FindAndDropRelationBuffers (RelFileLocator rlocator ,
537543 ForkNumber forkNum ,
538544 BlockNumber nForkBlock ,
@@ -2394,12 +2400,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
23942400 continue ;
23952401 }
23962402
2397- /* OK, do the I/O */
2398- FlushBuffer (buf_hdr , NULL , IOOBJECT_RELATION , io_context );
2399- LWLockRelease (content_lock );
2400-
2401- ScheduleBufferTagForWriteback (& BackendWritebackContext , io_context ,
2402- & buf_hdr -> tag );
2403+ /* Content lock is released inside CleanVictimBuffer */
2404+ CleanVictimBuffer (buf_hdr , from_ring , io_context );
24032405 }
24042406
24052407
@@ -4270,54 +4272,65 @@ static void
42704272FlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
42714273 IOContext io_context )
42724274{
4273- XLogRecPtr recptr ;
4274- ErrorContextCallback errcallback ;
4275- instr_time io_start ;
4276- Block bufBlock ;
4277- char * bufToWrite ;
4278- uint32 buf_state ;
4275+ XLogRecPtr lsn ;
42794276
4280- /*
4281- * Try to start an I/O operation. If StartBufferIO returns false, then
4282- * someone else flushed the buffer before we could, so we need not do
4283- * anything.
4284- */
4285- if (!StartBufferIO (buf , false, false))
4286- return ;
4277+ if (PrepareFlushBuffer (buf , & lsn ))
4278+ DoFlushBuffer (buf , reln , io_object , io_context , lsn );
4279+ }
42874280
4288- /* Setup error traceback support for ereport() */
4289- errcallback .callback = shared_buffer_write_error_callback ;
4290- errcallback .arg = buf ;
4291- errcallback .previous = error_context_stack ;
4292- error_context_stack = & errcallback ;
4281+ /*
4282+ * Prepare and write out a dirty victim buffer.
4283+ *
4284+ * Buffer must be pinned, the content lock must be held, and the buffer header
4285+ * spinlock must not be held. The content lock is released and the buffer is
4286+ * returned pinned but not locked.
4287+ *
4288+ * bufdesc may be modified.
4289+ */
4290+ static void
4291+ CleanVictimBuffer (BufferDesc * bufdesc ,
4292+ bool from_ring , IOContext io_context )
4293+ {
4294+ XLogRecPtr max_lsn = InvalidXLogRecPtr ;
42934295
4294- /* Find smgr relation for buffer */
4295- if (reln == NULL )
4296- reln = smgropen (BufTagGetRelFileLocator (& buf -> tag ), INVALID_PROC_NUMBER );
4296+ /* Set up this victim buffer to be flushed */
4297+ if (!PrepareFlushBuffer (bufdesc , & max_lsn ))
4298+ {
4299+ LWLockRelease (BufferDescriptorGetContentLock (bufdesc ));
4300+ return ;
4301+ }
42974302
4298- TRACE_POSTGRESQL_BUFFER_FLUSH_START ( BufTagGetForkNum ( & buf -> tag ),
4299- buf -> tag . blockNum ,
4300- reln -> smgr_rlocator . locator . spcOid ,
4301- reln -> smgr_rlocator . locator . dbOid ,
4302- reln -> smgr_rlocator . locator . relNumber );
4303+ DoFlushBuffer ( bufdesc , NULL , IOOBJECT_RELATION , io_context , max_lsn );
4304+ LWLockRelease ( BufferDescriptorGetContentLock ( bufdesc ));
4305+ ScheduleBufferTagForWriteback ( & BackendWritebackContext , io_context ,
4306+ & bufdesc -> tag );
4307+ }
43034308
4304- buf_state = LockBufHdr (buf );
4309+ /*
4310+ * Prepare the buffer with bufdesc for writing. Returns true if the buffer
4311+ * acutally needs writing and false otherwise. lsn returns the buffer's LSN if
4312+ * the table is logged.
4313+ */
4314+ static bool
4315+ PrepareFlushBuffer (BufferDesc * bufdesc , XLogRecPtr * lsn )
4316+ {
4317+ uint32 buf_state ;
43054318
43064319 /*
4307- * Run PageGetLSN while holding header lock, since we don't have the
4308- * buffer locked exclusively in all cases.
4320+ * Try to start an I/O operation. If StartBufferIO returns false, then
4321+ * someone else flushed the buffer before we could, so we need not do
4322+ * anything.
43094323 */
4310- recptr = BufferGetLSN (buf );
4324+ if (!StartBufferIO (bufdesc , false, false))
4325+ return false;
43114326
4312- /* To check if block content changes while flushing. - vadim 01/17/97 */
4313- UnlockBufHdrExt (buf , buf_state ,
4314- 0 , BM_JUST_DIRTIED ,
4315- 0 );
4327+ * lsn = InvalidXLogRecPtr ;
4328+ buf_state = LockBufHdr (bufdesc );
43164329
43174330 /*
4318- * Force XLOG flush up to buffer's LSN. This implements the basic WAL
4319- * rule that log updates must hit disk before any of the data-file changes
4320- * they describe do.
4331+ * Record the buffer's LSN. We will force XLOG flush up to buffer's LSN.
4332+ * This implements the basic WAL rule that log updates must hit disk
4333+ * before any of the data-file changes they describe do.
43214334 *
43224335 * However, this rule does not apply to unlogged relations, which will be
43234336 * lost after a crash anyway. Most unlogged relation pages do not bear
@@ -4330,9 +4343,51 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
43304343 * happen, attempting to flush WAL through that location would fail, with
43314344 * disastrous system-wide consequences. To make sure that can't happen,
43324345 * skip the flush if the buffer isn't permanent.
4346+ *
4347+ * We must hold the buffer header lock when examining the page LSN since
4348+ * don't have buffer exclusively locked in all cases.
43334349 */
43344350 if (buf_state & BM_PERMANENT )
4335- XLogFlush (recptr );
4351+ * lsn = BufferGetLSN (bufdesc );
4352+
4353+ /* To check if block content changes while flushing. - vadim 01/17/97 */
4354+ UnlockBufHdrExt (bufdesc , buf_state ,
4355+ 0 , BM_JUST_DIRTIED ,
4356+ 0 );
4357+ return true;
4358+ }
4359+
4360+ /*
4361+ * Actually do the write I/O to clean a buffer. buf and reln may be modified.
4362+ */
4363+ static void
4364+ DoFlushBuffer (BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
4365+ IOContext io_context , XLogRecPtr buffer_lsn )
4366+ {
4367+ ErrorContextCallback errcallback ;
4368+ instr_time io_start ;
4369+ Block bufBlock ;
4370+ char * bufToWrite ;
4371+
4372+ /* Setup error traceback support for ereport() */
4373+ errcallback .callback = shared_buffer_write_error_callback ;
4374+ errcallback .arg = buf ;
4375+ errcallback .previous = error_context_stack ;
4376+ error_context_stack = & errcallback ;
4377+
4378+ /* Find smgr relation for buffer */
4379+ if (reln == NULL )
4380+ reln = smgropen (BufTagGetRelFileLocator (& buf -> tag ), INVALID_PROC_NUMBER );
4381+
4382+ TRACE_POSTGRESQL_BUFFER_FLUSH_START (BufTagGetForkNum (& buf -> tag ),
4383+ buf -> tag .blockNum ,
4384+ reln -> smgr_rlocator .locator .spcOid ,
4385+ reln -> smgr_rlocator .locator .dbOid ,
4386+ reln -> smgr_rlocator .locator .relNumber );
4387+
4388+ /* Force XLOG flush up to buffer's LSN */
4389+ if (XLogRecPtrIsValid (buffer_lsn ))
4390+ XLogFlush (buffer_lsn );
43364391
43374392 /*
43384393 * Now it's safe to write the buffer to disk. Note that no one else should
0 commit comments