1. Fixing Possible strdup of NULL
2. Fixing num_alloc when combined mapping policies ( rankfile & byslot or bynode ) This commit was SVN r18073.
Этот коммит содержится в:
родитель
f115b4aed2
Коммит
2be4e32c79
@ -31,20 +31,17 @@ example: cat hosfile
|
||||
rank 3=host3 slot=0:1,1:0-2
|
||||
|
||||
[parse_error_string]
|
||||
Open RTE detected a parse error in the rankfile:
|
||||
%s
|
||||
Open RTE detected a parse error in the rankfile (%s)
|
||||
It occured on line number %d on token %d:
|
||||
%s
|
||||
|
||||
[parse_error_int]
|
||||
Open RTE detected a parse error in the rankfile:
|
||||
%s
|
||||
Open RTE detected a parse error in the rankfile (%s)
|
||||
It occured on line number %d on token %d:
|
||||
%d
|
||||
|
||||
[parse_error]
|
||||
Open RTE detected a parse error in the rankfile:
|
||||
%s
|
||||
Open RTE detected a parse error in the rankfile (%s)
|
||||
It occured on line number %d on token %d.
|
||||
|
||||
[not-all-mapped-alloc]
|
||||
@ -66,13 +63,13 @@ Either request fewer slots for your application, or make more slots available
|
||||
for use.
|
||||
|
||||
[bad-rankfile]
|
||||
Error, Invalid rank (%d) in the rankfile (%s)
|
||||
Error, invalid rank (%d) in the rankfile (%s)
|
||||
|
||||
[bad-assign]
|
||||
Error, rank %d is already assigned to %s, check %s
|
||||
|
||||
[bad-syntax]
|
||||
Error, invalid syntax in the rankfile
|
||||
Error, invalid syntax in the rankfile (%s)
|
||||
syntax must be the fallowing
|
||||
rank i=host_i slot=string
|
||||
ex: rank 1=host1 slot=1:0,1
|
||||
|
@ -54,6 +54,7 @@ char *orte_rmaps_rank_file_path = NULL;
|
||||
static const char *orte_rmaps_rank_file_name_cur = NULL;
|
||||
static opal_mutex_t orte_rmaps_rank_file_mutex;
|
||||
char *orte_rmaps_rank_file_slot_list;
|
||||
static orte_std_cntr_t orte_rmaps_rank_file_num_alloc = 0;
|
||||
|
||||
/*
|
||||
* Local variable
|
||||
@ -72,11 +73,11 @@ static int map_app_by_user_map(
|
||||
int rc = ORTE_SUCCESS;
|
||||
opal_list_item_t *next;
|
||||
orte_node_t *node;
|
||||
orte_std_cntr_t num_alloc=0, round_cnt;
|
||||
orte_std_cntr_t round_cnt;
|
||||
|
||||
OPAL_TRACE(2);
|
||||
|
||||
while (num_alloc < app->num_procs) {
|
||||
while (orte_rmaps_rank_file_num_alloc < app->num_procs) {
|
||||
/** see if any nodes remain unused and available. We need to do this check
|
||||
* each time since we may remove nodes from the list (as they become fully
|
||||
* used) as we cycle through the loop */
|
||||
@ -91,7 +92,7 @@ static int map_app_by_user_map(
|
||||
* we may need to prune the nodes list removing overused nodes.
|
||||
* Wrap around to beginning if we are at the end of the list */
|
||||
round_cnt=0;
|
||||
if ( -1 != rankmap[vpid_start + num_alloc].rank) {
|
||||
if ( -1 != rankmap[vpid_start + orte_rmaps_rank_file_num_alloc].rank) {
|
||||
do {
|
||||
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
|
||||
next = opal_list_get_first(nodes);
|
||||
@ -103,17 +104,17 @@ static int map_app_by_user_map(
|
||||
node = (orte_node_t*) cur_node_item;
|
||||
cur_node_item = next;
|
||||
if ( round_cnt == 2 ) {
|
||||
opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[num_alloc+vpid_start].node_name);
|
||||
opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].node_name);
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
} while ( strcmp(node->name, rankmap[num_alloc + vpid_start].node_name));
|
||||
node->slot_list = strdup(rankmap[num_alloc+vpid_start].slot_list);
|
||||
} while ( strcmp(node->name, rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name));
|
||||
node->slot_list = strdup(rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].slot_list);
|
||||
if (mca_rmaps_rank_file_component.debug) {
|
||||
opal_output(0, "rank_file RMAPS component: [%s:%d]->slot_list=%s\n",
|
||||
rankmap[num_alloc + vpid_start].node_name,rankmap[num_alloc+vpid_start].rank, node->slot_list);
|
||||
rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, node->slot_list);
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[num_alloc+vpid_start].rank, app->idx,
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, app->idx,
|
||||
nodes, jdata->map->oversubscribe))) {
|
||||
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
|
||||
* really isn't an error - we just need to break from the loop
|
||||
@ -126,7 +127,7 @@ static int map_app_by_user_map(
|
||||
}
|
||||
}
|
||||
}
|
||||
++num_alloc;
|
||||
++orte_rmaps_rank_file_num_alloc;
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -146,7 +147,6 @@ static int map_app_by_node(
|
||||
int rc = ORTE_SUCCESS;
|
||||
opal_list_item_t *next;
|
||||
orte_node_t *node;
|
||||
orte_std_cntr_t num_alloc = 0;
|
||||
|
||||
OPAL_TRACE(2);
|
||||
|
||||
@ -167,9 +167,9 @@ static int map_app_by_node(
|
||||
list, oversubscription is automatically taken care of via this logic.
|
||||
*/
|
||||
|
||||
while (num_alloc < app->num_procs) {
|
||||
if ( -1 != rankmap[num_alloc + vpid_start].rank) {
|
||||
++num_alloc;
|
||||
while (orte_rmaps_rank_file_num_alloc < app->num_procs) {
|
||||
if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) {
|
||||
++orte_rmaps_rank_file_num_alloc;
|
||||
continue;
|
||||
}
|
||||
/** see if any nodes remain unused and available. We need to do this check
|
||||
@ -199,7 +199,7 @@ static int map_app_by_node(
|
||||
strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list);
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx,
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx,
|
||||
nodes, jdata->map->oversubscribe))) {
|
||||
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
|
||||
* really isn't an error - we just need to break from the loop
|
||||
@ -211,7 +211,7 @@ static int map_app_by_node(
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
++num_alloc;
|
||||
++orte_rmaps_rank_file_num_alloc;
|
||||
cur_node_item = next;
|
||||
}
|
||||
|
||||
@ -229,7 +229,7 @@ static int map_app_by_slot(
|
||||
opal_list_t* nodes )
|
||||
{
|
||||
int rc = ORTE_SUCCESS;
|
||||
orte_std_cntr_t i, num_slots_to_take, num_alloc = 0;
|
||||
orte_std_cntr_t i, num_slots_to_take;
|
||||
orte_node_t *node;
|
||||
opal_list_item_t *next;
|
||||
|
||||
@ -241,8 +241,7 @@ static int map_app_by_slot(
|
||||
have reached their soft limit and the user directed us to "no oversubscribe".
|
||||
If we still have processes that haven't been mapped yet, then it's an
|
||||
"out of resources" error. */
|
||||
|
||||
while ( num_alloc < app->num_procs) {
|
||||
while ( orte_rmaps_rank_file_num_alloc < app->num_procs) {
|
||||
/** see if any nodes remain unused and available. We need to do this check
|
||||
* each time since we may remove nodes from the list (as they become fully
|
||||
* used) as we cycle through the loop */
|
||||
@ -300,8 +299,8 @@ static int map_app_by_slot(
|
||||
}
|
||||
|
||||
for( i = 0; i < num_slots_to_take; ++i) {
|
||||
if ( -1 != rankmap[num_alloc + vpid_start].rank) {
|
||||
++num_alloc;
|
||||
if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) {
|
||||
++orte_rmaps_rank_file_num_alloc;
|
||||
continue;
|
||||
}
|
||||
if ( NULL != orte_mca_rmaps_rank_file_slot_list){
|
||||
@ -310,7 +309,7 @@ static int map_app_by_slot(
|
||||
strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list);
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx,
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx,
|
||||
nodes, jdata->map->oversubscribe))) {
|
||||
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
|
||||
* really isn't an error - we just need to break from the loop
|
||||
@ -323,12 +322,12 @@ static int map_app_by_slot(
|
||||
}
|
||||
}
|
||||
/* Update the number of procs allocated */
|
||||
++num_alloc;
|
||||
++orte_rmaps_rank_file_num_alloc;
|
||||
|
||||
/** if all the procs have been mapped OR we have fully used up this node, then
|
||||
* break from the loop
|
||||
*/
|
||||
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
|
||||
if(orte_rmaps_rank_file_num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -626,7 +625,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
|
||||
node_name = strdup(argv[1]);
|
||||
}
|
||||
else {
|
||||
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true);
|
||||
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto unlock;
|
||||
}
|
||||
@ -641,7 +640,12 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
|
||||
}
|
||||
break;
|
||||
case ORTE_RANKFILE_SLOT:
|
||||
rankmap[ival].slot_list = strdup(orte_rmaps_rank_file_parse_string_or_int());
|
||||
if ( NULL == (value = orte_rmaps_rank_file_parse_string_or_int())) {
|
||||
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto unlock;
|
||||
}
|
||||
rankmap[ival].slot_list = strdup(value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user