From b2e24693c44b63305fe8740506ad4fc1d5be5e5f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 5 Mar 2010 13:41:28 +0000 Subject: [PATCH] Check the return status when we forward stdin and remove the recipient when they are no longer alive This commit was SVN r22786. --- orte/mca/iof/hnp/iof_hnp.h | 8 ++++---- orte/mca/iof/hnp/iof_hnp_read.c | 12 ++++++++++-- orte/mca/iof/hnp/iof_hnp_send.c | 25 +++++++++++++++---------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/orte/mca/iof/hnp/iof_hnp.h b/orte/mca/iof/hnp/iof_hnp.h index f68d9a9afb..3618b8bbeb 100644 --- a/orte/mca/iof/hnp/iof_hnp.h +++ b/orte/mca/iof/hnp/iof_hnp.h @@ -79,10 +79,10 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata); void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata); bool orte_iof_hnp_stdin_check(int fd); -void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, - orte_process_name_t *target, - orte_iof_tag_t tag, - unsigned char *data, int numbytes); +int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, + orte_process_name_t *target, + orte_iof_tag_t tag, + unsigned char *data, int numbytes); END_C_DECLS diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index f430025820..f31e89bda2 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -85,7 +85,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; int32_t numbytes; - opal_list_item_t *item; + opal_list_item_t *item, *prev_item; orte_iof_proc_t *proct; int rc; @@ -182,7 +182,15 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) * sent - this will tell the daemon to close * the fd for stdin to that proc */ - orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes); + if( ORTE_SUCCESS != (rc = orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes))) { + /* if the addressee is unknown, remove the sink from the list */ + if( ORTE_ERR_ADDRESSEE_UNKNOWN == rc ) { + prev_item = opal_list_get_prev(item); + opal_list_remove_item(&mca_iof_hnp_component.sinks, item); + OBJ_RELEASE(item); + item = prev_item; + } + } } } /* if num_bytes was zero, then we need to terminate the event */ diff --git a/orte/mca/iof/hnp/iof_hnp_send.c b/orte/mca/iof/hnp/iof_hnp_send.c index 915ad0cc40..03d77b2f3b 100644 --- a/orte/mca/iof/hnp/iof_hnp_send.c +++ b/orte/mca/iof/hnp/iof_hnp_send.c @@ -51,10 +51,10 @@ static void send_cb(int status, orte_process_name_t *peer, OBJ_RELEASE(buf); } -void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, - orte_process_name_t *target, - orte_iof_tag_t tag, - unsigned char *data, int numbytes) +int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, + orte_process_name_t *target, + orte_iof_tag_t tag, + unsigned char *data, int numbytes) { opal_buffer_t *buf; int rc; @@ -67,7 +67,7 @@ void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); - return; + return rc; } /* pack the name of the target - this is either the intended * recipient (if the tag is stdin and we are sending to a daemon), @@ -76,7 +76,7 @@ void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, target, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); - return; + return rc; } /* if data is NULL, then we are done */ @@ -85,7 +85,7 @@ void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, data, numbytes, OPAL_BYTE))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); - return; + return rc; } } @@ -95,12 +95,17 @@ void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, /* xcast this to everyone - the local daemons will know how to handle it */ orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, buf, ORTE_RML_TAG_IOF_PROXY); OBJ_RELEASE(buf); - return; + return ORTE_SUCCESS; } /* send the buffer to the host - this is either a daemon or * a tool that requested IOF */ - orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY, - 0, send_cb, NULL); + if( ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY, + 0, send_cb, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; }