Merge pull request #5301 from rhc54/topic/nodis
Fix the no-disconnect test
Этот коммит содержится в:
Коммит
30ffdc9efc
@ -65,3 +65,4 @@ orte_abort_timeout = 10
|
||||
hwloc_base_mem_bind_failure_action = silent
|
||||
btl_tcp_if_include=10.10.10.0/24
|
||||
oob=^ud
|
||||
btl=self,vader,tcp
|
||||
|
@ -390,12 +390,16 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
data = (opal_hwloc_obj_data_t*)locale->userdata;
|
||||
/* get the number of cpus under this location */
|
||||
if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology->topo, locale))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
data = (opal_hwloc_obj_data_t*)locale->userdata;
|
||||
if (NULL == data) {
|
||||
data = OBJ_NEW(opal_hwloc_obj_data_t);
|
||||
locale->userdata = data;
|
||||
}
|
||||
/* if we don't have enough cpus to support this additional proc, try
|
||||
* shifting the location to a cousin that can support it - the important
|
||||
* thing is that we maintain the same level in the topology */
|
||||
@ -406,8 +410,12 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
sib = locale;
|
||||
found = false;
|
||||
while (NULL != (sib = sib->next_cousin)) {
|
||||
data = (opal_hwloc_obj_data_t*)sib->userdata;
|
||||
ncpus = opal_hwloc_base_get_npus(node->topology->topo, sib);
|
||||
data = (opal_hwloc_obj_data_t*)sib->userdata;
|
||||
if (NULL == data) {
|
||||
data = OBJ_NEW(opal_hwloc_obj_data_t);
|
||||
sib->userdata = data;
|
||||
}
|
||||
if (data->num_bound < ncpus) {
|
||||
found = true;
|
||||
locale = sib;
|
||||
@ -421,8 +429,12 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
sib = locale;
|
||||
while (NULL != (sib = sib->prev_cousin)) {
|
||||
data = (opal_hwloc_obj_data_t*)sib->userdata;
|
||||
ncpus = opal_hwloc_base_get_npus(node->topology->topo, sib);
|
||||
data = (opal_hwloc_obj_data_t*)sib->userdata;
|
||||
if (NULL == data) {
|
||||
data = OBJ_NEW(opal_hwloc_obj_data_t);
|
||||
sib->userdata = data;
|
||||
}
|
||||
if (data->num_bound < ncpus) {
|
||||
found = true;
|
||||
locale = sib;
|
||||
@ -453,6 +465,10 @@ static int bind_in_place(orte_job_t *jdata,
|
||||
}
|
||||
/* track the number bound */
|
||||
data = (opal_hwloc_obj_data_t*)locale->userdata; // just in case it changed
|
||||
if (NULL == data) {
|
||||
data = OBJ_NEW(opal_hwloc_obj_data_t);
|
||||
locale->userdata = data;
|
||||
}
|
||||
data->num_bound++;
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"BINDING PROC %s TO %s NUMBER %u",
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user