Fix potential issue if opal_hwloc_topology is NULL
cmr=v1.8.2:reviewer=jsquyres This commit was SVN r32050.
Этот коммит содержится в:
родитель
b5a2ceaa7c
Коммит
b618b36a2f
@ -74,174 +74,176 @@ int orte_ess_base_proc_binding(void)
|
|||||||
"%s Not bound at launch",
|
"%s Not bound at launch",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
/* we were not bound at launch */
|
/* we were not bound at launch */
|
||||||
if (NULL != opal_hwloc_topology) {
|
if (NULL == opal_hwloc_topology) {
|
||||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
|
/* there is nothing we can do, so just return */
|
||||||
/* get our node object */
|
return ORTE_SUCCESS;
|
||||||
node = hwloc_get_root_obj(opal_hwloc_topology);
|
}
|
||||||
nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
|
support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
|
||||||
/* get our bindings */
|
/* get our node object */
|
||||||
cpus = hwloc_bitmap_alloc();
|
node = hwloc_get_root_obj(opal_hwloc_topology);
|
||||||
if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
|
nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
|
||||||
/* we are NOT bound if get_cpubind fails, nor can we be bound - the
|
/* get our bindings */
|
||||||
* environment does not support it
|
cpus = hwloc_bitmap_alloc();
|
||||||
*/
|
if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
|
||||||
hwloc_bitmap_free(cpus);
|
/* we are NOT bound if get_cpubind fails, nor can we be bound - the
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
* environment does not support it
|
||||||
"%s Binding not supported",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
goto MOVEON;
|
|
||||||
}
|
|
||||||
/* we are bound if the two cpusets are not equal,
|
|
||||||
* or if there is only ONE cpu available to us
|
|
||||||
*/
|
*/
|
||||||
if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
|
hwloc_bitmap_free(cpus);
|
||||||
opal_hwloc_base_single_cpu(nodeset) ||
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
opal_hwloc_base_single_cpu(cpus)) {
|
"%s Binding not supported",
|
||||||
/* someone external set it - indicate it is set
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
* so that we know
|
goto MOVEON;
|
||||||
*/
|
}
|
||||||
orte_proc_is_bound = true;
|
/* we are bound if the two cpusets are not equal,
|
||||||
|
* or if there is only ONE cpu available to us
|
||||||
|
*/
|
||||||
|
if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
|
||||||
|
opal_hwloc_base_single_cpu(nodeset) ||
|
||||||
|
opal_hwloc_base_single_cpu(cpus)) {
|
||||||
|
/* someone external set it - indicate it is set
|
||||||
|
* so that we know
|
||||||
|
*/
|
||||||
|
orte_proc_is_bound = true;
|
||||||
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
|
hwloc_bitmap_free(cpus);
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
|
"%s Process was externally bound",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
} else if (support->cpubind->set_thisproc_cpubind &&
|
||||||
|
OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
|
||||||
|
OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
|
/* the system is capable of doing processor affinity, but it
|
||||||
|
* has not yet been set - see if a slot_list was given
|
||||||
|
*/
|
||||||
|
hwloc_bitmap_zero(cpus);
|
||||||
|
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
|
if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
|
||||||
|
opal_hwloc_topology, cpus))) {
|
||||||
|
error = "Setting processor affinity failed";
|
||||||
|
hwloc_bitmap_free(cpus);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||||
|
error = "Setting processor affinity failed";
|
||||||
|
hwloc_bitmap_free(cpus);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
hwloc_bitmap_free(cpus);
|
hwloc_bitmap_free(cpus);
|
||||||
|
orte_proc_is_bound = true;
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
"%s Process was externally bound",
|
"%s Process bound according to slot_list",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
} else if (support->cpubind->set_thisproc_cpubind &&
|
} else {
|
||||||
OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
|
/* cleanup */
|
||||||
OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
hwloc_bitmap_free(cpus);
|
||||||
/* the system is capable of doing processor affinity, but it
|
/* get the node rank */
|
||||||
* has not yet been set - see if a slot_list was given
|
if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
|
||||||
|
/* this is not an error - could be due to being
|
||||||
|
* direct launched - so just ignore and leave
|
||||||
|
* us unbound
|
||||||
|
*/
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
|
"%s Process not bound - no node rank available",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
goto MOVEON;
|
||||||
|
}
|
||||||
|
/* if the binding policy is hwthread, then we bind to the nrank-th
|
||||||
|
* hwthread on this node
|
||||||
*/
|
*/
|
||||||
hwloc_bitmap_zero(cpus);
|
if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
|
||||||
if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
|
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||||
opal_hwloc_topology, cpus))) {
|
ret = ORTE_ERR_NOT_FOUND;
|
||||||
error = "Setting processor affinity failed";
|
error = "Getting hwthread object";
|
||||||
hwloc_bitmap_free(cpus);
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||||
|
ret = ORTE_ERROR;
|
||||||
error = "Setting processor affinity failed";
|
error = "Setting processor affinity failed";
|
||||||
hwloc_bitmap_free(cpus);
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
hwloc_bitmap_free(cpus);
|
hwloc_bitmap_free(cpus);
|
||||||
orte_proc_is_bound = true;
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
"%s Process bound according to slot_list",
|
"%s Process bound to hwthread",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
} else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
|
/* if the binding policy is core, then we bind to the nrank-th
|
||||||
|
* core on this node
|
||||||
|
*/
|
||||||
|
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
||||||
|
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||||
|
ret = ORTE_ERR_NOT_FOUND;
|
||||||
|
error = "Getting core object";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||||
|
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||||
|
error = "Setting processor affinity failed";
|
||||||
|
ret = ORTE_ERROR;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
|
hwloc_bitmap_free(cpus);
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
|
"%s Process bound to core",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
} else {
|
} else {
|
||||||
/* cleanup */
|
/* for all higher binding policies, we bind to the specified
|
||||||
hwloc_bitmap_free(cpus);
|
* object that the nrank-th core belongs to
|
||||||
/* get the node rank */
|
|
||||||
if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
|
|
||||||
/* this is not an error - could be due to being
|
|
||||||
* direct launched - so just ignore and leave
|
|
||||||
* us unbound
|
|
||||||
*/
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
|
||||||
"%s Process not bound - no node rank available",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
goto MOVEON;
|
|
||||||
}
|
|
||||||
/* if the binding policy is hwthread, then we bind to the nrank-th
|
|
||||||
* hwthread on this node
|
|
||||||
*/
|
*/
|
||||||
if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
||||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
|
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
||||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
ret = ORTE_ERR_NOT_FOUND;
|
||||||
ret = ORTE_ERR_NOT_FOUND;
|
error = "Getting core object";
|
||||||
error = "Getting hwthread object";
|
goto error;
|
||||||
goto error;
|
}
|
||||||
}
|
if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
target = HWLOC_OBJ_CACHE;
|
||||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
cache_level = 1;
|
||||||
ret = ORTE_ERROR;
|
} else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
error = "Setting processor affinity failed";
|
target = HWLOC_OBJ_CACHE;
|
||||||
goto error;
|
cache_level = 2;
|
||||||
}
|
} else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
target = HWLOC_OBJ_CACHE;
|
||||||
hwloc_bitmap_free(cpus);
|
cache_level = 3;
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
} else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
"%s Process bound to hwthread",
|
target = HWLOC_OBJ_SOCKET;
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
} else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
||||||
} else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
target = HWLOC_OBJ_NODE;
|
||||||
/* if the binding policy is core, then we bind to the nrank-th
|
|
||||||
* core on this node
|
|
||||||
*/
|
|
||||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
|
||||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
|
||||||
ret = ORTE_ERR_NOT_FOUND;
|
|
||||||
error = "Getting core object";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
|
||||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
|
||||||
error = "Setting processor affinity failed";
|
|
||||||
ret = ORTE_ERROR;
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
|
||||||
hwloc_bitmap_free(cpus);
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
|
||||||
"%s Process bound to core",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
} else {
|
} else {
|
||||||
/* for all higher binding policies, we bind to the specified
|
ret = ORTE_ERR_NOT_FOUND;
|
||||||
* object that the nrank-th core belongs to
|
error = "Binding policy not known";
|
||||||
*/
|
goto error;
|
||||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
|
}
|
||||||
0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
|
for (obj = obj->parent; NULL != obj; obj = obj->parent) {
|
||||||
ret = ORTE_ERR_NOT_FOUND;
|
if (target == obj->type) {
|
||||||
error = "Getting core object";
|
if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
|
||||||
goto error;
|
continue;
|
||||||
}
|
|
||||||
if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
|
||||||
target = HWLOC_OBJ_CACHE;
|
|
||||||
cache_level = 1;
|
|
||||||
} else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
|
||||||
target = HWLOC_OBJ_CACHE;
|
|
||||||
cache_level = 2;
|
|
||||||
} else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
|
||||||
target = HWLOC_OBJ_CACHE;
|
|
||||||
cache_level = 3;
|
|
||||||
} else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
|
||||||
target = HWLOC_OBJ_SOCKET;
|
|
||||||
} else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
|
|
||||||
target = HWLOC_OBJ_NODE;
|
|
||||||
} else {
|
|
||||||
ret = ORTE_ERR_NOT_FOUND;
|
|
||||||
error = "Binding policy not known";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
for (obj = obj->parent; NULL != obj; obj = obj->parent) {
|
|
||||||
if (target == obj->type) {
|
|
||||||
if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
/* this is the place! */
|
|
||||||
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
|
||||||
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
|
||||||
ret = ORTE_ERROR;
|
|
||||||
error = "Setting processor affinity failed";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
|
||||||
hwloc_bitmap_free(cpus);
|
|
||||||
orte_proc_is_bound = true;
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
|
||||||
"%s Process bound to %s",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
|
||||||
hwloc_obj_type_string(target)));
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
/* this is the place! */
|
||||||
|
cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
|
||||||
|
if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
|
||||||
|
ret = ORTE_ERROR;
|
||||||
|
error = "Setting processor affinity failed";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
|
||||||
|
hwloc_bitmap_free(cpus);
|
||||||
|
orte_proc_is_bound = true;
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
|
||||||
|
"%s Process bound to %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
hwloc_obj_type_string(target)));
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (!orte_proc_is_bound) {
|
}
|
||||||
ret = ORTE_ERROR;
|
if (!orte_proc_is_bound) {
|
||||||
error = "Setting processor affinity failed";
|
ret = ORTE_ERROR;
|
||||||
goto error;
|
error = "Setting processor affinity failed";
|
||||||
}
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user