Fixing a problem with pcp_detach_node, When graceful node detach is requested
authorMuhammad Usama <m.usama@gmail.com>
Tue, 7 Apr 2015 12:48:44 +0000 (17:48 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Tue, 7 Apr 2015 12:48:44 +0000 (17:48 +0500)
gracefully detaching a node by pcp_detach_node should check if it is allowed to
process detach_node command on the particular node before blocking the incoming
connections and closing the existing connections.

main.c
pcp_child.c
pool.h

diff --git a/main.c b/main.c
index e58e95246a34cfe0e9e146375690fdabbcbf9b94..24fe29fce015769e44e655a65d51a8832d114cfb 100644 (file)
--- a/main.c
+++ b/main.c
@@ -1462,11 +1462,25 @@ void notice_backend_error(int node_id)
        }
 }
 
-/* notice backend connection error using SIGUSR1 */
-void degenerate_backend_set(int *node_id_set, int count)
+/*
+ * degenerate_backend_set_ex:
+ *
+ * The function signals/verifies the node down request.
+ * The request is then processed by failover function.
+ *
+ * node_id_set: array of node ids to be registered for NODE DOWN operation
+ * count:       number of elements in node_id_set array
+ * test_only:   When set, function only checks if NODE DOWN operation can be
+ *              executed on provided node ids and never registers the operation
+ *              request.
+ *              For test_only case function returs false as
+ *              soon as first non complient node in node_id_set is found
+ */
+bool degenerate_backend_set_ex(int *node_id_set, int count, bool test_only)
 {
        pid_t parent = getppid();
        int i;
+       bool ret = false;
        bool need_signal = false;
 #ifdef HAVE_SIGPROCMASK
        sigset_t oldmask;
@@ -1476,42 +1490,68 @@ void degenerate_backend_set(int *node_id_set, int count)
 
        if (pool_config->parallel_mode)
        {
-               return;
+               return false;
+       }
+       if (test_only == false)
+       {
+               POOL_SETMASK2(&BlockSig, &oldmask);
+               pool_semaphore_lock(REQUEST_INFO_SEM);
+               Req_info->kind = NODE_DOWN_REQUEST;
        }
 
-       POOL_SETMASK2(&BlockSig, &oldmask);
-       pool_semaphore_lock(REQUEST_INFO_SEM);
-       Req_info->kind = NODE_DOWN_REQUEST;
        for (i = 0; i < count; i++)
        {
                if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS ||
                        !VALID_BACKEND(node_id_set[i]))
                {
                        pool_log("degenerate_backend_set: node %d is not valid backend.", i);
+                       if (test_only)
+                               return false;
                        continue;
                }
 
                if (POOL_DISALLOW_TO_FAILOVER(BACKEND_INFO(node_id_set[i]).flag))
                {
                        pool_log("degenerate_backend_set: %d failover request from pid %d is canceld because failover is disallowed", node_id_set[i], getpid());
+                       if (test_only)
+                               return false;
                        continue;
                }
 
-               pool_log("degenerate_backend_set: %d fail over request from pid %d", node_id_set[i], getpid());
-               Req_info->node_id[i] = node_id_set[i];
+               if (test_only == false)
+               {
+                       pool_log("degenerate_backend_set: %d fail over request from pid %d", node_id_set[i], getpid());
+                       Req_info->node_id[i] = node_id_set[i];
+               }
                need_signal = true;
        }
 
        if (need_signal)
        {
+               if (test_only)
+                       return true;
                if (!pool_config->use_watchdog || WD_OK == wd_degenerate_backend_set(node_id_set, count))
                {
                        kill(parent, SIGUSR1);
+                       ret = true;
                }
        }
+       else if (test_only)
+               return false;
 
        pool_semaphore_unlock(REQUEST_INFO_SEM);
        POOL_SETMASK(&oldmask);
+
+       return ret;
+}
+
+/*
+ * wrapper over degenerate_backend_set_ex function to signal
+ * NODE down operation request
+ */
+void degenerate_backend_set(int *node_id_set, int count)
+{
+       degenerate_backend_set_ex(node_id_set, count, false);
 }
 
 /* send promote node request using SIGUSR1 */
index a39f280e9eae7ff5c505ae5b3b05d1713bbc380d..7f444dbdc0cf4c784380f80640a1e7ad5da7e24f 100644 (file)
@@ -752,7 +752,7 @@ pcp_do_child(int unix_fd, int inet_fd, char *pcp_conf_file)
                        {
                                int node_id;
                                int wsize;
-                               char code[] = "CommandComplete";
+                               char *code;
                                bool gracefully;
 
                                if (tos == 'D')
@@ -762,12 +762,15 @@ pcp_do_child(int unix_fd, int inet_fd, char *pcp_conf_file)
 
                                node_id = atoi(buf);
                                pool_debug("pcp_child: detaching Node ID %d", node_id);
-                               pool_detach_node(node_id, gracefully);
+                               if (pool_detach_node(node_id, gracefully) == 0)
+                                       code = "CommandComplete";
+                               else
+                                       code = "CommandFailed";
 
                                pcp_write(frontend, "d", 1);
-                               wsize = htonl(sizeof(code) + sizeof(int));
+                               wsize = htonl(strlen(code) + 1 + sizeof(int));
                                pcp_write(frontend, &wsize, sizeof(int));
-                               pcp_write(frontend, code, sizeof(code));
+                               pcp_write(frontend, code, strlen(code) + 1);
                                if (pcp_flush(frontend) < 0)
                                {
                                        pool_error("pcp_child: pcp_flush() failed. reason: %s", strerror(errno));
@@ -1319,12 +1322,26 @@ static RETSIGTYPE reload_config_handler(int sig)
 /* Dedatch a node */
 static int pool_detach_node(int node_id, bool gracefully)
 {
+       int nRet = 0;
        if (!gracefully)
        {
-               notice_backend_error(node_id);  /* send failover request */
+               if (degenerate_backend_set_ex(&node_id, 1, false) == false)
+               {
+                       pool_error("pcp_child: processing detach node failed");
+                       return -1;
+               }
                return 0;
        }
-               
+
+       /* Check if the NODE DOWN can be executed on
+        * the given node id.
+        */
+       if (degenerate_backend_set_ex(&node_id, 1, true) == false)
+       {
+               pool_error("pcp_child: processing graceful detach node failed");
+               return -1;
+       }
+
        /*
         * Wait until all frontends exit
         */
@@ -1341,12 +1358,19 @@ static int pool_detach_node(int node_id, bool gracefully)
        /*
         * Now all frontends have gone. Let's do failover.
         */
-       notice_backend_error(node_id);          /* send failover request */
-
-       /*
-        * Wait for failover completed.
-        */
-       pcp_wakeup_request = 0;
+       if (degenerate_backend_set_ex(&node_id, 1, false) == false)
+       {
+               nRet = -1;
+               pcp_wakeup_request = 1;
+               pool_error("pcp_child: processing graceful detach node failed");
+       }
+       else
+       {
+               /*
+                * Wait for failover completed.
+                */
+               pcp_wakeup_request = 0;
+       }
 
        while (!pcp_wakeup_request)
        {
@@ -1361,7 +1385,7 @@ static int pool_detach_node(int node_id, bool gracefully)
         */
        finish_recovery();
 
-       return 0;
+       return nRet;
 }
 
 /* Promote a node */
diff --git a/pool.h b/pool.h
index 66b687d6979b5c17a0b3cf99bde2c16a5630eaa6..384e71d0eec577d2cf744288140427b8647f23a2 100644 (file)
--- a/pool.h
+++ b/pool.h
@@ -482,6 +482,7 @@ extern POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend,
 
 extern void notice_backend_error(int node_id);
 extern void degenerate_backend_set(int *node_id_set, int count);
+extern bool degenerate_backend_set_ex(int *node_id_set, int count, bool test_only);
 extern void promote_backend(int node_id);
 extern void send_failback_request(int node_id);