From c8098aa411ee72b36879acba95819100b263f726 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 17 Dec 2025 16:23:13 +0200 Subject: Make postmaster 003_start_stop.pl test less flaky The test is very sensitive to how backends start and exit, because it tests dead-end backends which occur when all the connection slots are in use. The test failed occasionally in the CI, when the backend that was launched for the raw_connect_works() check lingered for a while, and exited only later during the test. When it exited, it released a connection slot, when the test expected all the slots to be in use at that time. The 002_connection_limits.pl test had a similar issue: if the backend launched for safe_psql() in the test initialization lingers around, it uses up a connection slot during the test, messing up the test's connection counting. I haven't seen that in the CI, but when I added a "sleep(1);" to proc_exit(), the test failed. To make the tests more robust, restart the server to ensure that the lingering backends doesn't interfere with the later test steps. In the passing, fix a bogus test name. Report and analysis by Jelte Fennema-Nio, Andres Freund, Thomas Munro. Discussion: https://www.postgresql.org/message-id/CAGECzQSU2iGuocuP+fmu89hmBmR3tb-TNyYKjCcL2M_zTCkAFw@mail.gmail.com Backpatch-through: 18 --- src/test/postmaster/t/002_connection_limits.pl | 5 +++++ src/test/postmaster/t/003_start_stop.pl | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/test/postmaster/t/002_connection_limits.pl b/src/test/postmaster/t/002_connection_limits.pl index 4a7fb16261f..2fc821ad0b4 100644 --- a/src/test/postmaster/t/002_connection_limits.pl +++ b/src/test/postmaster/t/002_connection_limits.pl @@ -74,6 +74,11 @@ sub connect_fails_wait ok(1, "$test_name: client backend process exited"); } +# Restart the server to ensure that any backends launched for the +# initialization steps are gone. Otherwise they could still be using +# up connection slots and mess with our expectations. +$node->restart; + my @sessions = (); my @raw_connections = (); diff --git a/src/test/postmaster/t/003_start_stop.pl b/src/test/postmaster/t/003_start_stop.pl index 58e7ba6cc42..25d6f667217 100644 --- a/src/test/postmaster/t/003_start_stop.pl +++ b/src/test/postmaster/t/003_start_stop.pl @@ -46,6 +46,11 @@ if (!$node->raw_connect_works()) plan skip_all => "this test requires working raw_connect()"; } +# Restart the server to ensure that the backend launched for +# raw_connect_works() is gone. Otherwise, it might free up the +# connection slot later, when we expect all the slots to be in use. +$node->restart; + my @raw_connections = (); # Open a lot of TCP (or Unix domain socket) connections to use up all @@ -81,7 +86,7 @@ for (my $i = 0; $i <= 20; $i++) # clients already" instead of "role does not exist" error. Test that # to ensure that we have used up all the slots. $node->connect_fails("dbname=postgres user=invalid_user", - "connect ", + "connection is rejected when all slots are in use", expected_stderr => qr/FATAL: sorry, too many clients already/); # Open one more connection, to really ensure that we have at least one -- cgit v1.2.3