1
1

Handle the case where a user's rankfile specifies only cpus, and not socket:cpu pairs.

This commit was SVN r25803.
Этот коммит содержится в:
Ralph Castain 2012-01-27 12:21:45 +00:00
родитель 61ac2bb11b
Коммит 3f31feee6f
5 изменённых файлов: 83 добавлений и 9 удалений

Просмотреть файл

@ -847,7 +847,7 @@ static int socket_core_to_cpu_set(char *socket_core_list,
opal_argv_free(range);
}
} else {
/* we don't support other levels yet */
/* unrecognized option */
rc = OPAL_ERR_NOT_SUPPORTED;
break;
}
@ -863,7 +863,12 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
{
char **item;
int rc, i;
hwloc_obj_t pu;
hwloc_cpuset_t pucpus;
char **range;
size_t range_cnt;
int core_id, lower_range, upper_range;
/* bozo checks */
if (NULL == opal_hwloc_topology) {
return OPAL_ERR_NOT_SUPPORTED;
@ -904,9 +909,49 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
}
}
} else {
/* we don't support other things yet */
opal_argv_free(item);
return OPAL_ERR_NOT_SUPPORTED;
/* just a core specification - see if one or a range was given */
range = opal_argv_split(item[i], '-');
range_cnt = opal_argv_count(range);
hwloc_bitmap_zero(cpumask);
/* see if a range was set or not */
switch (range_cnt) {
case 1: /* only one core specified */
core_id = atoi(range[0]);
/* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) {
opal_argv_free(range);
opal_argv_free(item);
return OPAL_ERROR;
}
/* get the available cpus for that object */
pucpus = opal_hwloc_base_get_available_cpus(topo, pu);
/* set that in the mask */
hwloc_bitmap_copy(cpumask, pucpus);
break;
case 2: /* range of core id's was given */
lower_range = atoi(range[0]);
upper_range = atoi(range[1]);
hwloc_bitmap_zero(cpumask);
for (core_id=lower_range; core_id <= upper_range; core_id++) {
/* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) {
opal_argv_free(range);
opal_argv_free(item);
return OPAL_ERROR;
}
/* get the available cpus for that object */
pucpus = opal_hwloc_base_get_available_cpus(topo, pu);
/* set that in the mask */
hwloc_bitmap_or(cpumask, cpumask, pucpus);
}
break;
default:
opal_argv_free(range);
opal_argv_free(item);
return OPAL_ERROR;
}
}
}
opal_argv_free(item);

Просмотреть файл

@ -79,7 +79,11 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
int rc;
int32_t ljob;
int i;
orte_node_t *node;
#if OPAL_HAVE_HWLOC
hwloc_topology_t t0;
#endif
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:setup_job for job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -124,6 +128,25 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
return rc;
}
#if OPAL_HAVE_HWLOC
/* if we are not going to launch, then we need to set any
* undefined topologies to match our own so the mapper
* can operate
*/
if (orte_do_not_launch) {
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
t0 = node->topology;
for (i=1; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (NULL == node->topology) {
node->topology = t0;
}
}
}
#endif
/* map the job */
if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(jdata))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -131,15 +131,14 @@ You can override this protection by adding the "overload-allowed"
option to your binding directive.
#
[rmaps:no-topology]
A request was made for nperxxx that requires knowledge of
A mapping directive was given that requires knowledge of
a remote node's topology. However, no topology info is
available for the following node:
Node: %s
The job cannot be executed under this condition. Please either
remove the nperxxx directive and specify the number of processes
to use, or investigate the lack of topology info.
remove the directive or investigate the lack of topology info.
#
[rmaps:no-available-cpus]
While computing bindings, we found no available cpus on

Просмотреть файл

@ -512,6 +512,11 @@ static int bind_in_place(orte_job_t *jdata,
int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
{
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
/* user specified binding by rankfile - nothing for us to do */
return ORTE_SUCCESS;
}
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) ||
OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
/* no binding requested */

Просмотреть файл

@ -107,6 +107,8 @@ static int orte_rmaps_rank_file_open(void)
}
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYUSER);
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
/* we are going to bind to cpuset since the user is specifying the cpus */
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET);
/* make us first */
my_priority = 10000;
}