1
1

1. Fixing Possible strdup of NULL

2.  Fixing num_alloc when combined mapping policies ( rankfile & byslot or bynode )

This commit was SVN r18073.
Этот коммит содержится в:
Lenny Verkhovsky 2008-04-02 14:12:38 +00:00
родитель f115b4aed2
Коммит 2be4e32c79
2 изменённых файлов: 34 добавлений и 33 удалений

Просмотреть файл

@ -31,20 +31,17 @@ example: cat hosfile
rank 3=host3 slot=0:1,1:0-2 rank 3=host3 slot=0:1,1:0-2
[parse_error_string] [parse_error_string]
Open RTE detected a parse error in the rankfile: Open RTE detected a parse error in the rankfile (%s)
%s
It occured on line number %d on token %d: It occured on line number %d on token %d:
%s %s
[parse_error_int] [parse_error_int]
Open RTE detected a parse error in the rankfile: Open RTE detected a parse error in the rankfile (%s)
%s
It occured on line number %d on token %d: It occured on line number %d on token %d:
%d %d
[parse_error] [parse_error]
Open RTE detected a parse error in the rankfile: Open RTE detected a parse error in the rankfile (%s)
%s
It occured on line number %d on token %d. It occured on line number %d on token %d.
[not-all-mapped-alloc] [not-all-mapped-alloc]
@ -66,13 +63,13 @@ Either request fewer slots for your application, or make more slots available
for use. for use.
[bad-rankfile] [bad-rankfile]
Error, Invalid rank (%d) in the rankfile (%s) Error, invalid rank (%d) in the rankfile (%s)
[bad-assign] [bad-assign]
Error, rank %d is already assigned to %s, check %s Error, rank %d is already assigned to %s, check %s
[bad-syntax] [bad-syntax]
Error, invalid syntax in the rankfile Error, invalid syntax in the rankfile (%s)
syntax must be the fallowing syntax must be the fallowing
rank i=host_i slot=string rank i=host_i slot=string
ex: rank 1=host1 slot=1:0,1 ex: rank 1=host1 slot=1:0,1

Просмотреть файл

@ -54,6 +54,7 @@ char *orte_rmaps_rank_file_path = NULL;
static const char *orte_rmaps_rank_file_name_cur = NULL; static const char *orte_rmaps_rank_file_name_cur = NULL;
static opal_mutex_t orte_rmaps_rank_file_mutex; static opal_mutex_t orte_rmaps_rank_file_mutex;
char *orte_rmaps_rank_file_slot_list; char *orte_rmaps_rank_file_slot_list;
static orte_std_cntr_t orte_rmaps_rank_file_num_alloc = 0;
/* /*
* Local variable * Local variable
@ -72,11 +73,11 @@ static int map_app_by_user_map(
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
opal_list_item_t *next; opal_list_item_t *next;
orte_node_t *node; orte_node_t *node;
orte_std_cntr_t num_alloc=0, round_cnt; orte_std_cntr_t round_cnt;
OPAL_TRACE(2); OPAL_TRACE(2);
while (num_alloc < app->num_procs) { while (orte_rmaps_rank_file_num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check /** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully * each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */ * used) as we cycle through the loop */
@ -91,7 +92,7 @@ static int map_app_by_user_map(
* we may need to prune the nodes list removing overused nodes. * we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */ * Wrap around to beginning if we are at the end of the list */
round_cnt=0; round_cnt=0;
if ( -1 != rankmap[vpid_start + num_alloc].rank) { if ( -1 != rankmap[vpid_start + orte_rmaps_rank_file_num_alloc].rank) {
do { do {
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes); next = opal_list_get_first(nodes);
@ -103,17 +104,17 @@ static int map_app_by_user_map(
node = (orte_node_t*) cur_node_item; node = (orte_node_t*) cur_node_item;
cur_node_item = next; cur_node_item = next;
if ( round_cnt == 2 ) { if ( round_cnt == 2 ) {
opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[num_alloc+vpid_start].node_name); opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].node_name);
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM; return ORTE_ERR_BAD_PARAM;
} }
} while ( strcmp(node->name, rankmap[num_alloc + vpid_start].node_name)); } while ( strcmp(node->name, rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name));
node->slot_list = strdup(rankmap[num_alloc+vpid_start].slot_list); node->slot_list = strdup(rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].slot_list);
if (mca_rmaps_rank_file_component.debug) { if (mca_rmaps_rank_file_component.debug) {
opal_output(0, "rank_file RMAPS component: [%s:%d]->slot_list=%s\n", opal_output(0, "rank_file RMAPS component: [%s:%d]->slot_list=%s\n",
rankmap[num_alloc + vpid_start].node_name,rankmap[num_alloc+vpid_start].rank, node->slot_list); rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, node->slot_list);
} }
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[num_alloc+vpid_start].rank, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, app->idx,
nodes, jdata->map->oversubscribe))) { nodes, jdata->map->oversubscribe))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
@ -126,7 +127,7 @@ static int map_app_by_user_map(
} }
} }
} }
++num_alloc; ++orte_rmaps_rank_file_num_alloc;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -146,7 +147,6 @@ static int map_app_by_node(
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
opal_list_item_t *next; opal_list_item_t *next;
orte_node_t *node; orte_node_t *node;
orte_std_cntr_t num_alloc = 0;
OPAL_TRACE(2); OPAL_TRACE(2);
@ -167,9 +167,9 @@ static int map_app_by_node(
list, oversubscription is automatically taken care of via this logic. list, oversubscription is automatically taken care of via this logic.
*/ */
while (num_alloc < app->num_procs) { while (orte_rmaps_rank_file_num_alloc < app->num_procs) {
if ( -1 != rankmap[num_alloc + vpid_start].rank) { if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) {
++num_alloc; ++orte_rmaps_rank_file_num_alloc;
continue; continue;
} }
/** see if any nodes remain unused and available. We need to do this check /** see if any nodes remain unused and available. We need to do this check
@ -199,7 +199,7 @@ static int map_app_by_node(
strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list); strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list);
} }
} }
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx,
nodes, jdata->map->oversubscribe))) { nodes, jdata->map->oversubscribe))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
@ -211,7 +211,7 @@ static int map_app_by_node(
return rc; return rc;
} }
} }
++num_alloc; ++orte_rmaps_rank_file_num_alloc;
cur_node_item = next; cur_node_item = next;
} }
@ -229,7 +229,7 @@ static int map_app_by_slot(
opal_list_t* nodes ) opal_list_t* nodes )
{ {
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
orte_std_cntr_t i, num_slots_to_take, num_alloc = 0; orte_std_cntr_t i, num_slots_to_take;
orte_node_t *node; orte_node_t *node;
opal_list_item_t *next; opal_list_item_t *next;
@ -241,8 +241,7 @@ static int map_app_by_slot(
have reached their soft limit and the user directed us to "no oversubscribe". have reached their soft limit and the user directed us to "no oversubscribe".
If we still have processes that haven't been mapped yet, then it's an If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */ "out of resources" error. */
while ( orte_rmaps_rank_file_num_alloc < app->num_procs) {
while ( num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check /** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully * each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */ * used) as we cycle through the loop */
@ -300,8 +299,8 @@ static int map_app_by_slot(
} }
for( i = 0; i < num_slots_to_take; ++i) { for( i = 0; i < num_slots_to_take; ++i) {
if ( -1 != rankmap[num_alloc + vpid_start].rank) { if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) {
++num_alloc; ++orte_rmaps_rank_file_num_alloc;
continue; continue;
} }
if ( NULL != orte_mca_rmaps_rank_file_slot_list){ if ( NULL != orte_mca_rmaps_rank_file_slot_list){
@ -310,7 +309,7 @@ static int map_app_by_slot(
strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list); strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list);
} }
} }
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx,
nodes, jdata->map->oversubscribe))) { nodes, jdata->map->oversubscribe))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
@ -323,12 +322,12 @@ static int map_app_by_slot(
} }
} }
/* Update the number of procs allocated */ /* Update the number of procs allocated */
++num_alloc; ++orte_rmaps_rank_file_num_alloc;
/** if all the procs have been mapped OR we have fully used up this node, then /** if all the procs have been mapped OR we have fully used up this node, then
* break from the loop * break from the loop
*/ */
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) { if(orte_rmaps_rank_file_num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
break; break;
} }
} }
@ -626,7 +625,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
node_name = strdup(argv[1]); node_name = strdup(argv[1]);
} }
else { else {
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true); opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
goto unlock; goto unlock;
} }
@ -641,7 +640,12 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
} }
break; break;
case ORTE_RANKFILE_SLOT: case ORTE_RANKFILE_SLOT:
rankmap[ival].slot_list = strdup(orte_rmaps_rank_file_parse_string_or_int()); if ( NULL == (value = orte_rmaps_rank_file_parse_string_or_int())) {
opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM;
goto unlock;
}
rankmap[ival].slot_list = strdup(value);
break; break;
} }
} }