From: Muhammad Usama Date: Tue, 7 Apr 2015 12:48:44 +0000 (+0500) Subject: Fixing a problem with pcp_detach_node, When graceful node detach is requested X-Git-Tag: V3_2_11~1 X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=719bafc574b7844151a9f80f55c1aebead20d71d;p=pgpool2.git Fixing a problem with pcp_detach_node, When graceful node detach is requested gracefully detaching a node by pcp_detach_node should check if it is allowed to process detach_node command on the particular node before blocking the incoming connections and closing the existing connections. --- diff --git a/main.c b/main.c index e58e95246..24fe29fce 100644 --- a/main.c +++ b/main.c @@ -1462,11 +1462,25 @@ void notice_backend_error(int node_id) } } -/* notice backend connection error using SIGUSR1 */ -void degenerate_backend_set(int *node_id_set, int count) +/* + * degenerate_backend_set_ex: + * + * The function signals/verifies the node down request. + * The request is then processed by failover function. + * + * node_id_set: array of node ids to be registered for NODE DOWN operation + * count: number of elements in node_id_set array + * test_only: When set, function only checks if NODE DOWN operation can be + * executed on provided node ids and never registers the operation + * request. + * For test_only case function returs false as + * soon as first non complient node in node_id_set is found + */ +bool degenerate_backend_set_ex(int *node_id_set, int count, bool test_only) { pid_t parent = getppid(); int i; + bool ret = false; bool need_signal = false; #ifdef HAVE_SIGPROCMASK sigset_t oldmask; @@ -1476,42 +1490,68 @@ void degenerate_backend_set(int *node_id_set, int count) if (pool_config->parallel_mode) { - return; + return false; + } + if (test_only == false) + { + POOL_SETMASK2(&BlockSig, &oldmask); + pool_semaphore_lock(REQUEST_INFO_SEM); + Req_info->kind = NODE_DOWN_REQUEST; } - POOL_SETMASK2(&BlockSig, &oldmask); - pool_semaphore_lock(REQUEST_INFO_SEM); - Req_info->kind = NODE_DOWN_REQUEST; for (i = 0; i < count; i++) { if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS || !VALID_BACKEND(node_id_set[i])) { pool_log("degenerate_backend_set: node %d is not valid backend.", i); + if (test_only) + return false; continue; } if (POOL_DISALLOW_TO_FAILOVER(BACKEND_INFO(node_id_set[i]).flag)) { pool_log("degenerate_backend_set: %d failover request from pid %d is canceld because failover is disallowed", node_id_set[i], getpid()); + if (test_only) + return false; continue; } - pool_log("degenerate_backend_set: %d fail over request from pid %d", node_id_set[i], getpid()); - Req_info->node_id[i] = node_id_set[i]; + if (test_only == false) + { + pool_log("degenerate_backend_set: %d fail over request from pid %d", node_id_set[i], getpid()); + Req_info->node_id[i] = node_id_set[i]; + } need_signal = true; } if (need_signal) { + if (test_only) + return true; if (!pool_config->use_watchdog || WD_OK == wd_degenerate_backend_set(node_id_set, count)) { kill(parent, SIGUSR1); + ret = true; } } + else if (test_only) + return false; pool_semaphore_unlock(REQUEST_INFO_SEM); POOL_SETMASK(&oldmask); + + return ret; +} + +/* + * wrapper over degenerate_backend_set_ex function to signal + * NODE down operation request + */ +void degenerate_backend_set(int *node_id_set, int count) +{ + degenerate_backend_set_ex(node_id_set, count, false); } /* send promote node request using SIGUSR1 */ diff --git a/pcp_child.c b/pcp_child.c index a39f280e9..7f444dbdc 100644 --- a/pcp_child.c +++ b/pcp_child.c @@ -752,7 +752,7 @@ pcp_do_child(int unix_fd, int inet_fd, char *pcp_conf_file) { int node_id; int wsize; - char code[] = "CommandComplete"; + char *code; bool gracefully; if (tos == 'D') @@ -762,12 +762,15 @@ pcp_do_child(int unix_fd, int inet_fd, char *pcp_conf_file) node_id = atoi(buf); pool_debug("pcp_child: detaching Node ID %d", node_id); - pool_detach_node(node_id, gracefully); + if (pool_detach_node(node_id, gracefully) == 0) + code = "CommandComplete"; + else + code = "CommandFailed"; pcp_write(frontend, "d", 1); - wsize = htonl(sizeof(code) + sizeof(int)); + wsize = htonl(strlen(code) + 1 + sizeof(int)); pcp_write(frontend, &wsize, sizeof(int)); - pcp_write(frontend, code, sizeof(code)); + pcp_write(frontend, code, strlen(code) + 1); if (pcp_flush(frontend) < 0) { pool_error("pcp_child: pcp_flush() failed. reason: %s", strerror(errno)); @@ -1319,12 +1322,26 @@ static RETSIGTYPE reload_config_handler(int sig) /* Dedatch a node */ static int pool_detach_node(int node_id, bool gracefully) { + int nRet = 0; if (!gracefully) { - notice_backend_error(node_id); /* send failover request */ + if (degenerate_backend_set_ex(&node_id, 1, false) == false) + { + pool_error("pcp_child: processing detach node failed"); + return -1; + } return 0; } - + + /* Check if the NODE DOWN can be executed on + * the given node id. + */ + if (degenerate_backend_set_ex(&node_id, 1, true) == false) + { + pool_error("pcp_child: processing graceful detach node failed"); + return -1; + } + /* * Wait until all frontends exit */ @@ -1341,12 +1358,19 @@ static int pool_detach_node(int node_id, bool gracefully) /* * Now all frontends have gone. Let's do failover. */ - notice_backend_error(node_id); /* send failover request */ - - /* - * Wait for failover completed. - */ - pcp_wakeup_request = 0; + if (degenerate_backend_set_ex(&node_id, 1, false) == false) + { + nRet = -1; + pcp_wakeup_request = 1; + pool_error("pcp_child: processing graceful detach node failed"); + } + else + { + /* + * Wait for failover completed. + */ + pcp_wakeup_request = 0; + } while (!pcp_wakeup_request) { @@ -1361,7 +1385,7 @@ static int pool_detach_node(int node_id, bool gracefully) */ finish_recovery(); - return 0; + return nRet; } /* Promote a node */ diff --git a/pool.h b/pool.h index 66b687d69..384e71d0e 100644 --- a/pool.h +++ b/pool.h @@ -482,6 +482,7 @@ extern POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend, extern void notice_backend_error(int node_id); extern void degenerate_backend_set(int *node_id_set, int count); +extern bool degenerate_backend_set_ex(int *node_id_set, int count, bool test_only); extern void promote_backend(int node_id); extern void send_failback_request(int node_id);