1
1

If you don't specify all the rank-file mapping for all procs, then you'll segfault - which is probably a bad idea. I can't see an easy workaround, so just error out for now and let's see if anyone really cares.

cmr=v1.8.2:reviewer=jsquyres

This commit was SVN r32053.
Этот коммит содержится в:
Ralph Castain 2014-06-19 20:30:06 +00:00
родитель 395078da00
Коммит b43f760f93

Просмотреть файл

@ -114,6 +114,9 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
/* convenience def */ /* convenience def */
map = jdata->map; map = jdata->map;
/* setup the node list */
OBJ_CONSTRUCT(&node_list, opal_list_t);
/* pickup the first app - there must be at least one */ /* pickup the first app - there must be at least one */
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
@ -138,7 +141,6 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
/* start at the beginning... */ /* start at the beginning... */
vpid_start = 0; vpid_start = 0;
jdata->num_procs = 0; jdata->num_procs = 0;
OBJ_CONSTRUCT(&node_list, opal_list_t);
OBJ_CONSTRUCT(&rankmap, opal_pointer_array_t); OBJ_CONSTRUCT(&rankmap, opal_pointer_array_t);
/* parse the rankfile, storing its results in the rankmap */ /* parse the rankfile, storing its results in the rankmap */
@ -183,20 +185,11 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
rank = vpid_start + k; rank = vpid_start + k;
/* get the rankfile entry for this rank */ /* get the rankfile entry for this rank */
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) { if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
#if OPAL_HAVE_HWLOC
/* no entry for this rank - if a default slot_list was given,
* then use it instead
*/
if (NULL != opal_hwloc_base_slot_list) {
slots = opal_hwloc_base_slot_list;
} else {
#endif
/* all ranks must be specified */ /* all ranks must be specified */
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile); orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
goto error; goto error;
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
}
} else { } else {
if (0 == strlen(rfmap->slot_list)) { if (0 == strlen(rfmap->slot_list)) {
/* rank was specified but no slot list given - that's an error */ /* rank was specified but no slot list given - that's an error */
@ -210,10 +203,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
/* find the node where this proc was assigned */ /* find the node where this proc was assigned */
node = NULL; node = NULL;
for (item = opal_list_get_first(&node_list); OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
nd = (orte_node_t*)item;
if (NULL != rfmap->node_name && if (NULL != rfmap->node_name &&
0 == strcmp(nd->name, rfmap->node_name)) { 0 == strcmp(nd->name, rfmap->node_name)) {
node = nd; node = nd;
@ -250,7 +240,11 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
++(jdata->map->num_nodes); ++(jdata->map->num_nodes);
} }
proc = orte_rmaps_base_setup_proc(jdata, node, i); if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, i))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto error;
}
if ((node->slots < (int)node->num_procs) || if ((node->slots < (int)node->num_procs) ||
(0 < node->slots_max && node->slots_max < (int)node->num_procs)) { (0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
@ -340,10 +334,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
return rc; return rc;
error: error:
while(NULL != (item = opal_list_remove_first(&node_list))) { OPAL_LIST_DESTRUCT(&node_list);
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc; return rc;
} }
@ -456,11 +447,11 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
node_name = strdup(argv[0]); node_name = strdup(argv[0]);
} else if (2 == cnt) { } else if (2 == cnt) {
node_name = strdup(argv[1]); node_name = strdup(argv[1]);
} } else {
else {
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
opal_argv_free(argv);
goto unlock; goto unlock;
} }
opal_argv_free (argv); opal_argv_free (argv);
@ -469,6 +460,8 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(node_name);
node_name = NULL;
goto unlock; goto unlock;
} }
/* check if this is the local node */ /* check if this is the local node */
@ -478,6 +471,8 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
} else { } else {
rfmap->node_name = strdup(node_name); rfmap->node_name = strdup(node_name);
} }
free(node_name);
node_name = NULL;
} }
break; break;
case ORTE_RANKFILE_SLOT: case ORTE_RANKFILE_SLOT:
@ -494,6 +489,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
orte_show_help("help-rmaps_rank_file.txt", "bad-assign", true, rank, orte_show_help("help-rmaps_rank_file.txt", "bad-assign", true, rank,
opal_pointer_array_get_item(assigned_ranks_array, rank), rankfile); opal_pointer_array_get_item(assigned_ranks_array, rank), rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
free(value);
goto unlock; goto unlock;
} else { } else {
/* prepare rank assignment string for the help message in case of a bad-assign */ /* prepare rank assignment string for the help message in case of a bad-assign */
@ -506,11 +502,13 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(value);
goto unlock; goto unlock;
} }
for (i=0; i < 64 && '\0' != value[i]; i++) { for (i=0; i < 64 && '\0' != value[i]; i++) {
rfmap->slot_list[i] = value[i]; rfmap->slot_list[i] = value[i];
} }
free(value);
break; break;
} }
} }