Fix a potential ordering issue with the names and RML exchange during
sm coll setup. This commit was SVN r21981.
Этот коммит содержится в:
родитель
0ccf2d87b6
Коммит
f9dfa03fde
@ -192,19 +192,23 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
|
|||||||
char filename_to_send[OPAL_PATH_MAX];
|
char filename_to_send[OPAL_PATH_MAX];
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
pending_rml_msg_t *rml_msg;
|
pending_rml_msg_t *rml_msg;
|
||||||
|
ompi_proc_t *temp_proc;
|
||||||
|
|
||||||
/* Reorder all procs array to have all the local procs at the
|
/* Reorder all procs array to have all the local procs at the
|
||||||
beginning. Simultaneously look for the local proc with the
|
beginning. Simultaneously look for the local proc with the
|
||||||
lowest name. */
|
lowest name. Ensure that procs[0] is the lowest named
|
||||||
|
process. */
|
||||||
for (p = 0; p < num_procs; p++) {
|
for (p = 0; p < num_procs; p++) {
|
||||||
if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) {
|
if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) {
|
||||||
procs[num_local_procs] = procs[p];
|
procs[num_local_procs] = procs[p];
|
||||||
if (NULL == lowest_name) {
|
if (NULL == lowest_name) {
|
||||||
lowest_name = &(procs[0]->proc_name);
|
procs[num_local_procs] = procs[p];
|
||||||
} else if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
} else if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||||
&(procs[p]->proc_name),
|
&(procs[p]->proc_name),
|
||||||
lowest_name) < 0) {
|
lowest_name) < 0) {
|
||||||
lowest_name = &(procs[p]->proc_name);
|
temp_proc = procs[0];
|
||||||
|
procs[0] = procs[p];
|
||||||
|
procs[num_local_procs] = temp_proc;
|
||||||
}
|
}
|
||||||
++num_local_procs;
|
++num_local_procs;
|
||||||
}
|
}
|
||||||
@ -213,6 +217,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
|
|||||||
if (0 == num_local_procs) {
|
if (0 == num_local_procs) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
lowest_name = &(procs[0]->proc_name);
|
||||||
num_procs = num_local_procs;
|
num_procs = num_local_procs;
|
||||||
|
|
||||||
iov[0].iov_base = &sm_file_created;
|
iov[0].iov_base = &sm_file_created;
|
||||||
@ -268,10 +273,12 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
|
|||||||
for (p = 1; p < num_procs; p++) {
|
for (p = 1; p < num_procs; p++) {
|
||||||
rc = orte_rml.send(&(procs[p]->proc_name), iov, 3,
|
rc = orte_rml.send(&(procs[p]->proc_name), iov, 3,
|
||||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED, 0);
|
OMPI_RML_TAG_SM_BACK_FILE_CREATED, 0);
|
||||||
if (rc < 0) {
|
if (rc < (ssize_t) (iov[0].iov_len + iov[1].iov_len + iov[2].iov_len)) {
|
||||||
opal_output(0, "mca_common_sm_mmap_init: "
|
opal_output(0, "mca_common_sm_mmap_init: "
|
||||||
"orte_rml.send failed to %lu with errno=%d\n",
|
"orte_rml.send failed to %lu with errno=%d, ret=%d, iov_len sum=%d\n",
|
||||||
(unsigned long)p, errno);
|
(unsigned long)p, errno,
|
||||||
|
rc,
|
||||||
|
(int) (iov[0].iov_len + iov[1].iov_len + iov[2].iov_len));
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user