From b618b36a2f7abc4eba7aae8f58649e7b4e499d12 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 19 Jun 2014 18:52:41 +0000 Subject: [PATCH] Fix potential issue if opal_hwloc_topology is NULL cmr=v1.8.2:reviewer=jsquyres This commit was SVN r32050. --- orte/mca/ess/base/ess_base_fns.c | 296 ++++++++++++++++--------------- 1 file changed, 149 insertions(+), 147 deletions(-) diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index e2bab09195..0eb0f18c04 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -74,174 +74,176 @@ int orte_ess_base_proc_binding(void) "%s Not bound at launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* we were not bound at launch */ - if (NULL != opal_hwloc_topology) { - support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); - /* get our node object */ - node = hwloc_get_root_obj(opal_hwloc_topology); - nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node); - /* get our bindings */ - cpus = hwloc_bitmap_alloc(); - if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) { - /* we are NOT bound if get_cpubind fails, nor can we be bound - the - * environment does not support it - */ - hwloc_bitmap_free(cpus); - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Binding not supported", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto MOVEON; - } - /* we are bound if the two cpusets are not equal, - * or if there is only ONE cpu available to us + if (NULL == opal_hwloc_topology) { + /* there is nothing we can do, so just return */ + return ORTE_SUCCESS; + } + support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); + /* get our node object */ + node = hwloc_get_root_obj(opal_hwloc_topology); + nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node); + /* get our bindings */ + cpus = hwloc_bitmap_alloc(); + if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) { + /* we are NOT bound if get_cpubind fails, nor can we be bound - the + * environment does not support it */ - if (0 != hwloc_bitmap_compare(cpus, nodeset) || - opal_hwloc_base_single_cpu(nodeset) || - opal_hwloc_base_single_cpu(cpus)) { - /* someone external set it - indicate it is set - * so that we know - */ - orte_proc_is_bound = true; + hwloc_bitmap_free(cpus); + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s Binding not supported", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + goto MOVEON; + } + /* we are bound if the two cpusets are not equal, + * or if there is only ONE cpu available to us + */ + if (0 != hwloc_bitmap_compare(cpus, nodeset) || + opal_hwloc_base_single_cpu(nodeset) || + opal_hwloc_base_single_cpu(cpus)) { + /* someone external set it - indicate it is set + * so that we know + */ + orte_proc_is_bound = true; + hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); + hwloc_bitmap_free(cpus); + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s Process was externally bound", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + } else if (support->cpubind->set_thisproc_cpubind && + OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && + OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + /* the system is capable of doing processor affinity, but it + * has not yet been set - see if a slot_list was given + */ + hwloc_bitmap_zero(cpus); + if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, + opal_hwloc_topology, cpus))) { + error = "Setting processor affinity failed"; + hwloc_bitmap_free(cpus); + goto error; + } + if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + error = "Setting processor affinity failed"; + hwloc_bitmap_free(cpus); + goto error; + } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); hwloc_bitmap_free(cpus); + orte_proc_is_bound = true; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Process was externally bound", + "%s Process bound according to slot_list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - } else if (support->cpubind->set_thisproc_cpubind && - OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && - OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - /* the system is capable of doing processor affinity, but it - * has not yet been set - see if a slot_list was given + } else { + /* cleanup */ + hwloc_bitmap_free(cpus); + /* get the node rank */ + if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) { + /* this is not an error - could be due to being + * direct launched - so just ignore and leave + * us unbound + */ + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s Process not bound - no node rank available", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + goto MOVEON; + } + /* if the binding policy is hwthread, then we bind to the nrank-th + * hwthread on this node */ - hwloc_bitmap_zero(cpus); - if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, - opal_hwloc_topology, cpus))) { - error = "Setting processor affinity failed"; - hwloc_bitmap_free(cpus); + if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, + 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { + ret = ORTE_ERR_NOT_FOUND; + error = "Getting hwthread object"; goto error; } + cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + ret = ORTE_ERROR; error = "Setting processor affinity failed"; - hwloc_bitmap_free(cpus); goto error; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); hwloc_bitmap_free(cpus); - orte_proc_is_bound = true; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Process bound according to slot_list", + "%s Process bound to hwthread", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + /* if the binding policy is core, then we bind to the nrank-th + * core on this node + */ + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, + 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { + ret = ORTE_ERR_NOT_FOUND; + error = "Getting core object"; + goto error; + } + cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + error = "Setting processor affinity failed"; + ret = ORTE_ERROR; + goto error; + } + hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); + hwloc_bitmap_free(cpus); + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s Process bound to core", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else { - /* cleanup */ - hwloc_bitmap_free(cpus); - /* get the node rank */ - if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) { - /* this is not an error - could be due to being - * direct launched - so just ignore and leave - * us unbound - */ - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Process not bound - no node rank available", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - goto MOVEON; - } - /* if the binding policy is hwthread, then we bind to the nrank-th - * hwthread on this node + /* for all higher binding policies, we bind to the specified + * object that the nrank-th core belongs to */ - if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, - 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { - ret = ORTE_ERR_NOT_FOUND; - error = "Getting hwthread object"; - goto error; - } - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { - ret = ORTE_ERROR; - error = "Setting processor affinity failed"; - goto error; - } - hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); - hwloc_bitmap_free(cpus); - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Process bound to hwthread", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - /* if the binding policy is core, then we bind to the nrank-th - * core on this node - */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, - 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { - ret = ORTE_ERR_NOT_FOUND; - error = "Getting core object"; - goto error; - } - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { - error = "Setting processor affinity failed"; - ret = ORTE_ERROR; - goto error; - } - hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); - hwloc_bitmap_free(cpus); - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Process bound to core", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, + 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { + ret = ORTE_ERR_NOT_FOUND; + error = "Getting core object"; + goto error; + } + if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + target = HWLOC_OBJ_CACHE; + cache_level = 1; + } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + target = HWLOC_OBJ_CACHE; + cache_level = 2; + } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + target = HWLOC_OBJ_CACHE; + cache_level = 3; + } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + target = HWLOC_OBJ_SOCKET; + } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { + target = HWLOC_OBJ_NODE; } else { - /* for all higher binding policies, we bind to the specified - * object that the nrank-th core belongs to - */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, - 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { - ret = ORTE_ERR_NOT_FOUND; - error = "Getting core object"; - goto error; - } - if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_CACHE; - cache_level = 1; - } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_CACHE; - cache_level = 2; - } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_CACHE; - cache_level = 3; - } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_SOCKET; - } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_NODE; - } else { - ret = ORTE_ERR_NOT_FOUND; - error = "Binding policy not known"; - goto error; - } - for (obj = obj->parent; NULL != obj; obj = obj->parent) { - if (target == obj->type) { - if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { - continue; - } - /* this is the place! */ - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { - ret = ORTE_ERROR; - error = "Setting processor affinity failed"; - goto error; - } - hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); - hwloc_bitmap_free(cpus); - orte_proc_is_bound = true; - OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, - "%s Process bound to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - hwloc_obj_type_string(target))); - break; + ret = ORTE_ERR_NOT_FOUND; + error = "Binding policy not known"; + goto error; + } + for (obj = obj->parent; NULL != obj; obj = obj->parent) { + if (target == obj->type) { + if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { + continue; } + /* this is the place! */ + cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { + ret = ORTE_ERROR; + error = "Setting processor affinity failed"; + goto error; + } + hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); + hwloc_bitmap_free(cpus); + orte_proc_is_bound = true; + OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, + "%s Process bound to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + hwloc_obj_type_string(target))); + break; } - if (!orte_proc_is_bound) { - ret = ORTE_ERROR; - error = "Setting processor affinity failed"; - goto error; - } + } + if (!orte_proc_is_bound) { + ret = ORTE_ERROR; + error = "Setting processor affinity failed"; + goto error; } } }