1
1

1. Fixing Possible strdup of NULL

2.  Fixing num_alloc when combined mapping policies ( rankfile & byslot or bynode )

This commit was SVN r18073.
Этот коммит содержится в:
Lenny Verkhovsky 2008-04-02 14:12:38 +00:00
родитель f115b4aed2
Коммит 2be4e32c79
2 изменённых файлов: 34 добавлений и 33 удалений

Просмотреть файл

@ -31,20 +31,17 @@ example: cat hosfile
rank 3=host3 slot=0:1,1:0-2
[parse_error_string]
Open RTE detected a parse error in the rankfile:
%s
Open RTE detected a parse error in the rankfile (%s)
It occured on line number %d on token %d:
%s
[parse_error_int]
Open RTE detected a parse error in the rankfile:
%s
Open RTE detected a parse error in the rankfile (%s)
It occured on line number %d on token %d:
%d
[parse_error]
Open RTE detected a parse error in the rankfile:
%s
Open RTE detected a parse error in the rankfile (%s)
It occured on line number %d on token %d.
[not-all-mapped-alloc]
@ -66,13 +63,13 @@ Either request fewer slots for your application, or make more slots available
for use.
[bad-rankfile]
Error, Invalid rank (%d) in the rankfile (%s)
Error, invalid rank (%d) in the rankfile (%s)
[bad-assign]
Error, rank %d is already assigned to %s, check %s
[bad-syntax]
Error, invalid syntax in the rankfile
Error, invalid syntax in the rankfile (%s)
syntax must be the fallowing
rank i=host_i slot=string
ex: rank 1=host1 slot=1:0,1

Просмотреть файл

@ -54,6 +54,7 @@ char *orte_rmaps_rank_file_path = NULL;
static const char *orte_rmaps_rank_file_name_cur = NULL;
static opal_mutex_t orte_rmaps_rank_file_mutex;
char *orte_rmaps_rank_file_slot_list;
static orte_std_cntr_t orte_rmaps_rank_file_num_alloc = 0;
/*
* Local variable
@ -72,11 +73,11 @@ static int map_app_by_user_map(
int rc = ORTE_SUCCESS;
opal_list_item_t *next;
orte_node_t *node;
orte_std_cntr_t num_alloc=0, round_cnt;
orte_std_cntr_t round_cnt;
OPAL_TRACE(2);
while (num_alloc < app->num_procs) {
while (orte_rmaps_rank_file_num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
@ -91,7 +92,7 @@ static int map_app_by_user_map(
* we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */
round_cnt=0;
if ( -1 != rankmap[vpid_start + num_alloc].rank) {
if ( -1 != rankmap[vpid_start + orte_rmaps_rank_file_num_alloc].rank) {
do {
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
@ -103,17 +104,17 @@ static int map_app_by_user_map(
node = (orte_node_t*) cur_node_item;
cur_node_item = next;
if ( round_cnt == 2 ) {
opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[num_alloc+vpid_start].node_name);
opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].node_name);
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
} while ( strcmp(node->name, rankmap[num_alloc + vpid_start].node_name));
node->slot_list = strdup(rankmap[num_alloc+vpid_start].slot_list);
} while ( strcmp(node->name, rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name));
node->slot_list = strdup(rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].slot_list);
if (mca_rmaps_rank_file_component.debug) {
opal_output(0, "rank_file RMAPS component: [%s:%d]->slot_list=%s\n",
rankmap[num_alloc + vpid_start].node_name,rankmap[num_alloc+vpid_start].rank, node->slot_list);
rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, node->slot_list);
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[num_alloc+vpid_start].rank, app->idx,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, app->idx,
nodes, jdata->map->oversubscribe))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
@ -126,7 +127,7 @@ static int map_app_by_user_map(
}
}
}
++num_alloc;
++orte_rmaps_rank_file_num_alloc;
}
return ORTE_SUCCESS;
}
@ -146,7 +147,6 @@ static int map_app_by_node(
int rc = ORTE_SUCCESS;
opal_list_item_t *next;
orte_node_t *node;
orte_std_cntr_t num_alloc = 0;
OPAL_TRACE(2);
@ -167,9 +167,9 @@ static int map_app_by_node(
list, oversubscription is automatically taken care of via this logic.
*/
while (num_alloc < app->num_procs) {
if ( -1 != rankmap[num_alloc + vpid_start].rank) {
++num_alloc;
while (orte_rmaps_rank_file_num_alloc < app->num_procs) {
if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) {
++orte_rmaps_rank_file_num_alloc;
continue;
}
/** see if any nodes remain unused and available. We need to do this check
@ -199,7 +199,7 @@ static int map_app_by_node(
strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list);
}
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx,
nodes, jdata->map->oversubscribe))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
@ -211,7 +211,7 @@ static int map_app_by_node(
return rc;
}
}
++num_alloc;
++orte_rmaps_rank_file_num_alloc;
cur_node_item = next;
}
@ -229,7 +229,7 @@ static int map_app_by_slot(
opal_list_t* nodes )
{
int rc = ORTE_SUCCESS;
orte_std_cntr_t i, num_slots_to_take, num_alloc = 0;
orte_std_cntr_t i, num_slots_to_take;
orte_node_t *node;
opal_list_item_t *next;
@ -241,8 +241,7 @@ static int map_app_by_slot(
have reached their soft limit and the user directed us to "no oversubscribe".
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */
while ( num_alloc < app->num_procs) {
while ( orte_rmaps_rank_file_num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
@ -300,8 +299,8 @@ static int map_app_by_slot(
}
for( i = 0; i < num_slots_to_take; ++i) {
if ( -1 != rankmap[num_alloc + vpid_start].rank) {
++num_alloc;
if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) {
++orte_rmaps_rank_file_num_alloc;
continue;
}
if ( NULL != orte_mca_rmaps_rank_file_slot_list){
@ -310,7 +309,7 @@ static int map_app_by_slot(
strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list);
}
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx,
nodes, jdata->map->oversubscribe))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
@ -323,12 +322,12 @@ static int map_app_by_slot(
}
}
/* Update the number of procs allocated */
++num_alloc;
++orte_rmaps_rank_file_num_alloc;
/** if all the procs have been mapped OR we have fully used up this node, then
* break from the loop
*/
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
if(orte_rmaps_rank_file_num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
break;
}
}
@ -626,7 +625,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
node_name = strdup(argv[1]);
}
else {
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true);
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM;
goto unlock;
}
@ -641,7 +640,12 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
}
break;
case ORTE_RANKFILE_SLOT:
rankmap[ival].slot_list = strdup(orte_rmaps_rank_file_parse_string_or_int());
if ( NULL == (value = orte_rmaps_rank_file_parse_string_or_int())) {
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM;
goto unlock;
}
rankmap[ival].slot_list = strdup(value);
break;
}
}