diff --git a/orte/mca/rml/base/rml_base_contact.c b/orte/mca/rml/base/rml_base_contact.c index 73ed100473..fa87289882 100644 --- a/orte/mca/rml/base/rml_base_contact.c +++ b/orte/mca/rml/base/rml_base_contact.c @@ -68,6 +68,7 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data) char *rml_uri; orte_process_name_t name; int rc; + orte_jobid_t jobid; /* unpack the data for each entry */ num_procs = 0; @@ -96,6 +97,12 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data) * since we were given the contact info */ orte_routed.update_route(&name, &name); + /* we only get an update from a single jobid - the command + * that creates these doesn't cross jobid boundaries - so + * record it here + */ + jobid = name.jobid; + /* track how many procs were in the message */ ++num_procs; } if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { @@ -103,12 +110,13 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data) return rc; } - /* if we are a daemon, this update would include updated contact info + /* if we are a daemon and this was info about our jobid, this update would + * include updated contact info * for all daemons in the system - indicating that the number of daemons * changed since we were initially launched. Thus, update the num_procs * in our process_info struct so we can correctly route any messages */ - if (orte_process_info.daemon) { + if (ORTE_PROC_MY_NAME->jobid == jobid && orte_process_info.daemon) { orte_process_info.num_procs = num_procs; } diff --git a/orte/mca/routed/unity/routed_unity.c b/orte/mca/routed/unity/routed_unity.c index cf224cd4d1..91ff6b32c0 100644 --- a/orte/mca/routed/unity/routed_unity.c +++ b/orte/mca/routed/unity/routed_unity.c @@ -205,7 +205,6 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer) orte_std_cntr_t cnt; char *rml_uri; int rc; - orte_rml_cmd_flag_t command=ORTE_RML_UPDATE_CMD; /* lookup the job object */ if (NULL == (jdata = orte_get_job_data_object(job))) { @@ -266,30 +265,6 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer) jdata->state = ORTE_JOB_STATE_RUNNING; } - /* first update the daemons so they will know how to talk to the - * procs - this is required for support of modex and barrier - */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - /* pack an update command */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_RML_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return rc; - } - /* pack the RML contact info for each proc */ - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(jdata->jobid, &buf))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return rc; - } - /* send it to the daemons via xcast */ - if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, &buf, ORTE_RML_TAG_RML_INFO_UPDATE))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return rc; - } - OBJ_DESTRUCT(&buf); - /* now send to the procs so they release from their barrier */ OBJ_CONSTRUCT(&buf, opal_buffer_t); /* pack the RML contact info for each proc */