If you don't specify all the rank-file mapping for all procs, then you'll segfault - which is probably a bad idea. I can't see an easy workaround, so just error out for now and let's see if anyone really cares.
cmr=v1.8.2:reviewer=jsquyres This commit was SVN r32053.
Этот коммит содержится в:
родитель
395078da00
Коммит
b43f760f93
@ -114,6 +114,9 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
/* convenience def */
|
/* convenience def */
|
||||||
map = jdata->map;
|
map = jdata->map;
|
||||||
|
|
||||||
|
/* setup the node list */
|
||||||
|
OBJ_CONSTRUCT(&node_list, opal_list_t);
|
||||||
|
|
||||||
/* pickup the first app - there must be at least one */
|
/* pickup the first app - there must be at least one */
|
||||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
|
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
|
||||||
rc = ORTE_ERR_SILENT;
|
rc = ORTE_ERR_SILENT;
|
||||||
@ -138,7 +141,6 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
/* start at the beginning... */
|
/* start at the beginning... */
|
||||||
vpid_start = 0;
|
vpid_start = 0;
|
||||||
jdata->num_procs = 0;
|
jdata->num_procs = 0;
|
||||||
OBJ_CONSTRUCT(&node_list, opal_list_t);
|
|
||||||
OBJ_CONSTRUCT(&rankmap, opal_pointer_array_t);
|
OBJ_CONSTRUCT(&rankmap, opal_pointer_array_t);
|
||||||
|
|
||||||
/* parse the rankfile, storing its results in the rankmap */
|
/* parse the rankfile, storing its results in the rankmap */
|
||||||
@ -183,20 +185,11 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
rank = vpid_start + k;
|
rank = vpid_start + k;
|
||||||
/* get the rankfile entry for this rank */
|
/* get the rankfile entry for this rank */
|
||||||
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
|
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
/* no entry for this rank - if a default slot_list was given,
|
|
||||||
* then use it instead
|
|
||||||
*/
|
|
||||||
if (NULL != opal_hwloc_base_slot_list) {
|
|
||||||
slots = opal_hwloc_base_slot_list;
|
|
||||||
} else {
|
|
||||||
#endif
|
|
||||||
/* all ranks must be specified */
|
/* all ranks must be specified */
|
||||||
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
|
orte_show_help("help-rmaps_rank_file.txt", "missing-rank", true, rank, orte_rankfile);
|
||||||
rc = ORTE_ERR_SILENT;
|
rc = ORTE_ERR_SILENT;
|
||||||
goto error;
|
goto error;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (0 == strlen(rfmap->slot_list)) {
|
if (0 == strlen(rfmap->slot_list)) {
|
||||||
/* rank was specified but no slot list given - that's an error */
|
/* rank was specified but no slot list given - that's an error */
|
||||||
@ -210,10 +203,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
|
|
||||||
/* find the node where this proc was assigned */
|
/* find the node where this proc was assigned */
|
||||||
node = NULL;
|
node = NULL;
|
||||||
for (item = opal_list_get_first(&node_list);
|
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
|
||||||
item != opal_list_get_end(&node_list);
|
|
||||||
item = opal_list_get_next(item)) {
|
|
||||||
nd = (orte_node_t*)item;
|
|
||||||
if (NULL != rfmap->node_name &&
|
if (NULL != rfmap->node_name &&
|
||||||
0 == strcmp(nd->name, rfmap->node_name)) {
|
0 == strcmp(nd->name, rfmap->node_name)) {
|
||||||
node = nd;
|
node = nd;
|
||||||
@ -250,7 +240,11 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
|
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
|
||||||
++(jdata->map->num_nodes);
|
++(jdata->map->num_nodes);
|
||||||
}
|
}
|
||||||
proc = orte_rmaps_base_setup_proc(jdata, node, i);
|
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, i))) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
if ((node->slots < (int)node->num_procs) ||
|
if ((node->slots < (int)node->num_procs) ||
|
||||||
(0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
|
(0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
|
||||||
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||||
@ -340,10 +334,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
while(NULL != (item = opal_list_remove_first(&node_list))) {
|
OPAL_LIST_DESTRUCT(&node_list);
|
||||||
OBJ_RELEASE(item);
|
|
||||||
}
|
|
||||||
OBJ_DESTRUCT(&node_list);
|
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -456,11 +447,11 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
|||||||
node_name = strdup(argv[0]);
|
node_name = strdup(argv[0]);
|
||||||
} else if (2 == cnt) {
|
} else if (2 == cnt) {
|
||||||
node_name = strdup(argv[1]);
|
node_name = strdup(argv[1]);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
||||||
rc = ORTE_ERR_BAD_PARAM;
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
opal_argv_free(argv);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
opal_argv_free (argv);
|
opal_argv_free (argv);
|
||||||
@ -469,6 +460,8 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
|||||||
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
||||||
rc = ORTE_ERR_BAD_PARAM;
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
free(node_name);
|
||||||
|
node_name = NULL;
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
/* check if this is the local node */
|
/* check if this is the local node */
|
||||||
@ -478,6 +471,8 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
|||||||
} else {
|
} else {
|
||||||
rfmap->node_name = strdup(node_name);
|
rfmap->node_name = strdup(node_name);
|
||||||
}
|
}
|
||||||
|
free(node_name);
|
||||||
|
node_name = NULL;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ORTE_RANKFILE_SLOT:
|
case ORTE_RANKFILE_SLOT:
|
||||||
@ -494,6 +489,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
|||||||
orte_show_help("help-rmaps_rank_file.txt", "bad-assign", true, rank,
|
orte_show_help("help-rmaps_rank_file.txt", "bad-assign", true, rank,
|
||||||
opal_pointer_array_get_item(assigned_ranks_array, rank), rankfile);
|
opal_pointer_array_get_item(assigned_ranks_array, rank), rankfile);
|
||||||
rc = ORTE_ERR_BAD_PARAM;
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
|
free(value);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
} else {
|
} else {
|
||||||
/* prepare rank assignment string for the help message in case of a bad-assign */
|
/* prepare rank assignment string for the help message in case of a bad-assign */
|
||||||
@ -506,11 +502,13 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
|||||||
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
||||||
rc = ORTE_ERR_BAD_PARAM;
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
free(value);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
for (i=0; i < 64 && '\0' != value[i]; i++) {
|
for (i=0; i < 64 && '\0' != value[i]; i++) {
|
||||||
rfmap->slot_list[i] = value[i];
|
rfmap->slot_list[i] = value[i];
|
||||||
}
|
}
|
||||||
|
free(value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user