Fix potential issue if opal_hwloc_topology is NULL
cmr=v1.8.2:reviewer=jsquyres This commit was SVN r32050.
Этот коммит содержится в:
родитель
b5a2ceaa7c
Коммит
b618b36a2f
@ -74,174 +74,176 @@ int orte_ess_base_proc_binding(void)
|
||||
"%s Not bound at launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
/* we were not bound at launch */
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
|
||||
/* get our node object */
|
||||
node = hwloc_get_root_obj(opal_hwloc_topology);
|
||||
nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
|
||||
/* get our bindings */
|
||||
cpus = hwloc_bitmap_alloc();
|
||||
if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
|
||||
/* we are NOT bound if get_cpubind fails, nor can we be bound - the
|
||||
* environment does not support it
|
||||
*/
|
||||
hwloc_bitmap_free(cpus);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Binding not supported",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
goto MOVEON;
|
||||
}
|
||||
/* we are bound if the two cpusets are not equal,
|
||||
* or if there is only ONE cpu available to us
|
||||
if (NULL == opal_hwloc_topology) {
|
||||
/* there is nothing we can do, so just return */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
|
||||
/* get our node object */
|
||||
node = hwloc_get_root_obj(opal_hwloc_topology);
|
||||
nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
|
||||
/* get our bindings */
|
||||
cpus = hwloc_bitmap_alloc();
|
||||
if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
|
||||
/* we are NOT bound if get_cpubind fails, nor can we be bound - the
|
||||
* environment does not support it
|
||||
*/
|
||||
if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
|
||||
opal_hwloc_base_single_cpu(nodeset) ||
|
||||
opal_hwloc_base_single_cpu(cpus)) {
|
||||
/* someone external set it - indicate it is set
|
||||
* so that we know
|
||||
*/
|
||||
orte_proc_is_bound = true;
|
||||
hwloc_bitmap_free(cpus);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Binding not supported",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
goto MOVEON;
|
||||
}
|
||||
/* we are bound if the two cpusets are not equal,
|
||||
* or if there is only ONE cpu available to us
|
||||
*/
|
||||
if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
|
||||
opal_hwloc_base_single_cpu(nodeset) ||
|
||||
opal_hwloc_base_single_cpu(cpus)) {
|
||||
/* someone external set it - indicate it is set
|
||||
* so that we know
|
||||
*/
|
||||
orte_proc_is_bound = true;
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process was externally bound",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
} else if (support->cpubind->set_thisproc_cpubind &&
|
||||
OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
|
||||
OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
/* the system is capable of doing processor affinity, but it
|
||||
* has not yet been set - see if a slot_list was given
|
||||
*/
|
||||
hwloc_bitmap_zero(cpus);
|
||||
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
|
||||
opal_hwloc_topology, cpus))) {
|
||||
error = "Setting processor affinity failed";
|
||||
hwloc_bitmap_free(cpus);
|
||||
goto error;
|
||||
}
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
error = "Setting processor affinity failed";
|
||||
hwloc_bitmap_free(cpus);
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
orte_proc_is_bound = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process was externally bound",
|
||||
"%s Process bound according to slot_list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
} else if (support->cpubind->set_thisproc_cpubind &&
|
||||
OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
|
||||
OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
/* the system is capable of doing processor affinity, but it
|
||||
* has not yet been set - see if a slot_list was given
|
||||
} else {
|
||||
/* cleanup */
|
||||
hwloc_bitmap_free(cpus);
|
||||
/* get the node rank */
|
||||
if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
|
||||
/* this is not an error - could be due to being
|
||||
* direct launched - so just ignore and leave
|
||||
* us unbound
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process not bound - no node rank available",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
goto MOVEON;
|
||||
}
|
||||
/* if the binding policy is hwthread, then we bind to the nrank-th
|
||||
* hwthread on this node
|
||||
*/
|
||||
hwloc_bitmap_zero(cpus);
|
||||
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
|
||||
opal_hwloc_topology, cpus))) {
|
||||
error = "Setting processor affinity failed";
|
||||
hwloc_bitmap_free(cpus);
|
||||
if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
|
||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Getting hwthread object";
|
||||
goto error;
|
||||
}
|
||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
ret = ORTE_ERROR;
|
||||
error = "Setting processor affinity failed";
|
||||
hwloc_bitmap_free(cpus);
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
orte_proc_is_bound = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process bound according to slot_list",
|
||||
"%s Process bound to hwthread",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
} else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
/* if the binding policy is core, then we bind to the nrank-th
|
||||
* core on this node
|
||||
*/
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Getting core object";
|
||||
goto error;
|
||||
}
|
||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
error = "Setting processor affinity failed";
|
||||
ret = ORTE_ERROR;
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process bound to core",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
} else {
|
||||
/* cleanup */
|
||||
hwloc_bitmap_free(cpus);
|
||||
/* get the node rank */
|
||||
if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
|
||||
/* this is not an error - could be due to being
|
||||
* direct launched - so just ignore and leave
|
||||
* us unbound
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process not bound - no node rank available",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
goto MOVEON;
|
||||
}
|
||||
/* if the binding policy is hwthread, then we bind to the nrank-th
|
||||
* hwthread on this node
|
||||
/* for all higher binding policies, we bind to the specified
|
||||
* object that the nrank-th core belongs to
|
||||
*/
|
||||
if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
|
||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Getting hwthread object";
|
||||
goto error;
|
||||
}
|
||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
ret = ORTE_ERROR;
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process bound to hwthread",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
} else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
/* if the binding policy is core, then we bind to the nrank-th
|
||||
* core on this node
|
||||
*/
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Getting core object";
|
||||
goto error;
|
||||
}
|
||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
error = "Setting processor affinity failed";
|
||||
ret = ORTE_ERROR;
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process bound to core",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Getting core object";
|
||||
goto error;
|
||||
}
|
||||
if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_CACHE;
|
||||
cache_level = 1;
|
||||
} else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_CACHE;
|
||||
cache_level = 2;
|
||||
} else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_CACHE;
|
||||
cache_level = 3;
|
||||
} else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_SOCKET;
|
||||
} else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_NODE;
|
||||
} else {
|
||||
/* for all higher binding policies, we bind to the specified
|
||||
* object that the nrank-th core belongs to
|
||||
*/
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Getting core object";
|
||||
goto error;
|
||||
}
|
||||
if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_CACHE;
|
||||
cache_level = 1;
|
||||
} else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_CACHE;
|
||||
cache_level = 2;
|
||||
} else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_CACHE;
|
||||
cache_level = 3;
|
||||
} else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_SOCKET;
|
||||
} else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||
target = HWLOC_OBJ_NODE;
|
||||
} else {
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Binding policy not known";
|
||||
goto error;
|
||||
}
|
||||
for (obj = obj->parent; NULL != obj; obj = obj->parent) {
|
||||
if (target == obj->type) {
|
||||
if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
|
||||
continue;
|
||||
}
|
||||
/* this is the place! */
|
||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
ret = ORTE_ERROR;
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
orte_proc_is_bound = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process bound to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
hwloc_obj_type_string(target)));
|
||||
break;
|
||||
ret = ORTE_ERR_NOT_FOUND;
|
||||
error = "Binding policy not known";
|
||||
goto error;
|
||||
}
|
||||
for (obj = obj->parent; NULL != obj; obj = obj->parent) {
|
||||
if (target == obj->type) {
|
||||
if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
|
||||
continue;
|
||||
}
|
||||
/* this is the place! */
|
||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||
ret = ORTE_ERROR;
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||
hwloc_bitmap_free(cpus);
|
||||
orte_proc_is_bound = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||
"%s Process bound to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
hwloc_obj_type_string(target)));
|
||||
break;
|
||||
}
|
||||
if (!orte_proc_is_bound) {
|
||||
ret = ORTE_ERROR;
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
if (!orte_proc_is_bound) {
|
||||
ret = ORTE_ERROR;
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user