From 2be4e32c79e3e25e155cc5b6f246ab1ee2536dbb Mon Sep 17 00:00:00 2001 From: Lenny Verkhovsky Date: Wed, 2 Apr 2008 14:12:38 +0000 Subject: [PATCH] 1. Fixing Possible strdup of NULL 2. Fixing num_alloc when combined mapping policies ( rankfile & byslot or bynode ) This commit was SVN r18073. --- .../rmaps/rank_file/help-rmaps_rank_file.txt | 13 ++--- orte/mca/rmaps/rank_file/rmaps_rank_file.c | 54 ++++++++++--------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt b/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt index 3b4d82268b..da9302a948 100644 --- a/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt +++ b/orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt @@ -31,20 +31,17 @@ example: cat hosfile rank 3=host3 slot=0:1,1:0-2 [parse_error_string] -Open RTE detected a parse error in the rankfile: - %s +Open RTE detected a parse error in the rankfile (%s) It occured on line number %d on token %d: %s [parse_error_int] -Open RTE detected a parse error in the rankfile: - %s +Open RTE detected a parse error in the rankfile (%s) It occured on line number %d on token %d: %d [parse_error] -Open RTE detected a parse error in the rankfile: - %s +Open RTE detected a parse error in the rankfile (%s) It occured on line number %d on token %d. [not-all-mapped-alloc] @@ -66,13 +63,13 @@ Either request fewer slots for your application, or make more slots available for use. [bad-rankfile] -Error, Invalid rank (%d) in the rankfile (%s) +Error, invalid rank (%d) in the rankfile (%s) [bad-assign] Error, rank %d is already assigned to %s, check %s [bad-syntax] -Error, invalid syntax in the rankfile +Error, invalid syntax in the rankfile (%s) syntax must be the fallowing rank i=host_i slot=string ex: rank 1=host1 slot=1:0,1 diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.c b/orte/mca/rmaps/rank_file/rmaps_rank_file.c index a47b03cd67..da58df2b74 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.c @@ -54,6 +54,7 @@ char *orte_rmaps_rank_file_path = NULL; static const char *orte_rmaps_rank_file_name_cur = NULL; static opal_mutex_t orte_rmaps_rank_file_mutex; char *orte_rmaps_rank_file_slot_list; +static orte_std_cntr_t orte_rmaps_rank_file_num_alloc = 0; /* * Local variable @@ -72,11 +73,11 @@ static int map_app_by_user_map( int rc = ORTE_SUCCESS; opal_list_item_t *next; orte_node_t *node; - orte_std_cntr_t num_alloc=0, round_cnt; + orte_std_cntr_t round_cnt; OPAL_TRACE(2); - while (num_alloc < app->num_procs) { + while (orte_rmaps_rank_file_num_alloc < app->num_procs) { /** see if any nodes remain unused and available. We need to do this check * each time since we may remove nodes from the list (as they become fully * used) as we cycle through the loop */ @@ -91,7 +92,7 @@ static int map_app_by_user_map( * we may need to prune the nodes list removing overused nodes. * Wrap around to beginning if we are at the end of the list */ round_cnt=0; - if ( -1 != rankmap[vpid_start + num_alloc].rank) { + if ( -1 != rankmap[vpid_start + orte_rmaps_rank_file_num_alloc].rank) { do { if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { next = opal_list_get_first(nodes); @@ -103,17 +104,17 @@ static int map_app_by_user_map( node = (orte_node_t*) cur_node_item; cur_node_item = next; if ( round_cnt == 2 ) { - opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[num_alloc+vpid_start].node_name); + opal_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].node_name); ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - } while ( strcmp(node->name, rankmap[num_alloc + vpid_start].node_name)); - node->slot_list = strdup(rankmap[num_alloc+vpid_start].slot_list); + } while ( strcmp(node->name, rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name)); + node->slot_list = strdup(rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].slot_list); if (mca_rmaps_rank_file_component.debug) { opal_output(0, "rank_file RMAPS component: [%s:%d]->slot_list=%s\n", - rankmap[num_alloc + vpid_start].node_name,rankmap[num_alloc+vpid_start].rank, node->slot_list); + rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].node_name,rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, node->slot_list); } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[num_alloc+vpid_start].rank, app->idx, + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[orte_rmaps_rank_file_num_alloc+vpid_start].rank, app->idx, nodes, jdata->map->oversubscribe))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop @@ -126,7 +127,7 @@ static int map_app_by_user_map( } } } - ++num_alloc; + ++orte_rmaps_rank_file_num_alloc; } return ORTE_SUCCESS; } @@ -146,7 +147,6 @@ static int map_app_by_node( int rc = ORTE_SUCCESS; opal_list_item_t *next; orte_node_t *node; - orte_std_cntr_t num_alloc = 0; OPAL_TRACE(2); @@ -167,9 +167,9 @@ static int map_app_by_node( list, oversubscription is automatically taken care of via this logic. */ - while (num_alloc < app->num_procs) { - if ( -1 != rankmap[num_alloc + vpid_start].rank) { - ++num_alloc; + while (orte_rmaps_rank_file_num_alloc < app->num_procs) { + if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) { + ++orte_rmaps_rank_file_num_alloc; continue; } /** see if any nodes remain unused and available. We need to do this check @@ -199,7 +199,7 @@ static int map_app_by_node( strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list); } } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx, nodes, jdata->map->oversubscribe))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop @@ -211,7 +211,7 @@ static int map_app_by_node( return rc; } } - ++num_alloc; + ++orte_rmaps_rank_file_num_alloc; cur_node_item = next; } @@ -229,7 +229,7 @@ static int map_app_by_slot( opal_list_t* nodes ) { int rc = ORTE_SUCCESS; - orte_std_cntr_t i, num_slots_to_take, num_alloc = 0; + orte_std_cntr_t i, num_slots_to_take; orte_node_t *node; opal_list_item_t *next; @@ -241,8 +241,7 @@ static int map_app_by_slot( have reached their soft limit and the user directed us to "no oversubscribe". If we still have processes that haven't been mapped yet, then it's an "out of resources" error. */ - - while ( num_alloc < app->num_procs) { + while ( orte_rmaps_rank_file_num_alloc < app->num_procs) { /** see if any nodes remain unused and available. We need to do this check * each time since we may remove nodes from the list (as they become fully * used) as we cycle through the loop */ @@ -300,8 +299,8 @@ static int map_app_by_slot( } for( i = 0; i < num_slots_to_take; ++i) { - if ( -1 != rankmap[num_alloc + vpid_start].rank) { - ++num_alloc; + if ( -1 != rankmap[orte_rmaps_rank_file_num_alloc + vpid_start].rank) { + ++orte_rmaps_rank_file_num_alloc; continue; } if ( NULL != orte_mca_rmaps_rank_file_slot_list){ @@ -310,7 +309,7 @@ static int map_app_by_slot( strcpy(node->slot_list, orte_mca_rmaps_rank_file_slot_list); } } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + orte_rmaps_rank_file_num_alloc, app->idx, nodes, jdata->map->oversubscribe))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop @@ -323,12 +322,12 @@ static int map_app_by_slot( } } /* Update the number of procs allocated */ - ++num_alloc; + ++orte_rmaps_rank_file_num_alloc; /** if all the procs have been mapped OR we have fully used up this node, then * break from the loop */ - if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) { + if(orte_rmaps_rank_file_num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) { break; } } @@ -626,7 +625,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np) node_name = strdup(argv[1]); } else { - opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true); + opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); rc = ORTE_ERR_BAD_PARAM; goto unlock; } @@ -641,7 +640,12 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np) } break; case ORTE_RANKFILE_SLOT: - rankmap[ival].slot_list = strdup(orte_rmaps_rank_file_parse_string_or_int()); + if ( NULL == (value = orte_rmaps_rank_file_parse_string_or_int())) { + opal_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); + rc = ORTE_ERR_BAD_PARAM; + goto unlock; + } + rankmap[ival].slot_list = strdup(value); break; } }