1
1

Correct a couple of bugs in the rank_file mapper that were incorrectly assigning vpids.

Add a capability to parse the rankfile to extract node information in place of requiring both hostfile and rankfile for non-RM managed environments. The rankfile is -only- parsed for this IF the hostfile and -host options are not given. Otherwise, those are used to establish allocation info as we did before this commit.

This commit was SVN r21815.
Этот коммит содержится в:
Ralph Castain 2009-08-13 16:08:43 +00:00
родитель ded58ae483
Коммит 0005e6e834
13 изменённых файлов: 170 добавлений и 48 удалений

Просмотреть файл

@ -315,10 +315,43 @@ int orte_ras_base_allocate(orte_job_t *jdata)
goto DISPLAY;
}
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
"%s ras:base:allocate nothing found in dash-host - checking for rankfile",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* Our next option is to look for a rankfile - if one was provided, we
* will use its nodes to create a default allocation pool
*/
if (NULL != orte_rankfile) {
/* check the rankfile for node information */
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
&override_oversubscribed,
orte_rankfile))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&nodes);
return rc;
}
}
/* if something was found in rankfile, we use that as our global
* pool - set it and we are done
*/
if (!opal_list_is_empty(&nodes)) {
/* store the results in the global resource pool - this removes the
* list items
*/
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
ORTE_ERROR_LOG(rc);
}
/* update the jdata object with override_oversubscribed flag */
jdata->oversubscribe_override = false;
/* cleanup */
OBJ_DESTRUCT(&nodes);
goto DISPLAY;
}
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
"%s ras:base:allocate nothing found in dash-host - inserting current node",
"%s ras:base:allocate nothing found in rankfile - inserting current node",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* if nothing was found by any of the above methods, then we have no

Просмотреть файл

@ -166,9 +166,13 @@ int orte_rmaps_base_open(void)
param = mca_base_param_reg_string_name("rmaps", "base_slot_list",
"List of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files) [default=NULL]",
false, false, NULL, &orte_rmaps_base.slot_list);
/* ensure we flag mapping by user */
if (NULL != orte_rmaps_base.slot_list ||
NULL != orte_rankfile) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER);
}
/* Should we schedule on the local node or not? */
mca_base_param_reg_int_name("rmaps", "base_no_schedule_local",
"If false, allow scheduling MPI applications on the same node as mpirun (default). If true, do not schedule any MPI applications on the same node as mpirun",
false, false, (int)false, &value);

Просмотреть файл

@ -386,7 +386,7 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
{
orte_job_map_t *map;
orte_vpid_t vpid;
orte_vpid_t vpid, vpid_start=0;
int i, j;
orte_node_t *node;
orte_proc_t *proc;
@ -394,11 +394,9 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
map = jdata->map;
if (ORTE_MAPPING_BYSLOT & map->policy ||
ORTE_MAPPING_BYSOCKET & map->policy ||
ORTE_MAPPING_BYBOARD & map->policy) {
/* assign the ranks sequentially */
vpid = 0;
if (ORTE_MAPPING_BYUSER & map->policy) {
/* find the max vpid already assigned */
vpid_start = ORTE_VPID_MIN;
for (i=0; i < map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue;
@ -407,7 +405,32 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
proc->name.vpid = vpid++;
if (ORTE_VPID_INVALID != proc->name.vpid &&
vpid_start < proc->name.vpid) {
vpid_start = proc->name.vpid;
}
}
}
/* we start one higher than the max found */
vpid_start++;
}
if (ORTE_MAPPING_BYSLOT & map->policy ||
ORTE_MAPPING_BYSOCKET & map->policy ||
ORTE_MAPPING_BYBOARD & map->policy) {
/* assign the ranks sequentially */
vpid = vpid_start;
for (i=0; i < map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue;
}
for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
if (ORTE_VPID_INVALID == proc->name.vpid) {
proc->name.vpid = vpid++;
}
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs,
proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
@ -424,13 +447,15 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue;
}
vpid = i;
vpid = i + vpid_start;
for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue;
}
proc->name.vpid = vpid;
vpid += map->num_nodes;
if (ORTE_VPID_INVALID == proc->name.vpid) {
proc->name.vpid = vpid;
vpid += map->num_nodes;
}
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs,
proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -48,7 +48,6 @@
static int orte_rmaps_rank_file_parse(const char *);
static char *orte_rmaps_rank_file_parse_string_or_int(void);
char *orte_rmaps_rank_file_path = NULL;
static const char *orte_rmaps_rank_file_name_cur = NULL;
static opal_mutex_t orte_rmaps_rank_file_mutex;
char *orte_rmaps_rank_file_slot_list;
@ -118,7 +117,8 @@ static int map_app_by_node(orte_app_context_t* app,
}
/* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item;
/* pass the base slot list in case it was provided */
/* grab the slot - have a new proc object created */
proc = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, 1, app->idx,
nodes, jdata->map->oversubscribe, true, &proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
@ -227,7 +227,8 @@ static int map_app_by_slot(orte_app_context_t* app,
++num_alloc;
continue;
}
/* pass the base slot list in case it was provided */
/* grab the slot - have a new proc object created */
proc = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, 1, app->idx,
nodes, jdata->map->oversubscribe, true, &proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
@ -333,8 +334,8 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
OBJ_CONSTRUCT(&rankmap, opal_pointer_array_t);
/* parse the rankfile, storing its results in the rankmap */
if ( NULL != orte_rmaps_rank_file_path ) {
if ( ORTE_SUCCESS != (rc = orte_rmaps_rank_file_parse(orte_rmaps_rank_file_path))) {
if ( NULL != orte_rankfile ) {
if ( ORTE_SUCCESS != (rc = orte_rmaps_rank_file_parse(orte_rankfile))) {
ORTE_ERROR_LOG(rc);
goto error;
}
@ -467,6 +468,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
return rc;
}
}
proc->name.vpid = rank;
proc->slot_list = strdup(rfmap->slot_list);
jdata->num_procs++;
}

Просмотреть файл

@ -48,11 +48,8 @@ typedef struct orte_rmaps_rank_file_component_t orte_rmaps_rank_file_component_t
ORTE_MODULE_DECLSPEC extern orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component;
extern orte_rmaps_base_module_t orte_rmaps_rank_file_module;
extern char *orte_mca_rmaps_rank_file_slot_list;
extern char *orte_rmaps_rank_file_path;
typedef struct cpu_socket_t cpu_socket_t;
struct orte_rmaps_rank_file_map_t {

Просмотреть файл

@ -42,7 +42,6 @@
static int orte_rmaps_rank_file_open(void);
static int orte_rmaps_rank_file_close(void);
static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority);
char *orte_mca_rmaps_rank_file_slot_list = NULL;
orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
{
@ -75,18 +74,11 @@ static int orte_rmaps_rank_file_open(void)
{
mca_rmaps_rank_file_component.priority = 0;
mca_base_param_reg_string(&mca_rmaps_rank_file_component.super.base_version,
"path",
"The path to the rank mapping file",
false, false, NULL, &orte_rmaps_rank_file_path);
if (NULL != orte_rmaps_rank_file_path) {
if (NULL != orte_rankfile ||
NULL != orte_rmaps_base.slot_list) {
mca_rmaps_rank_file_component.priority = 100;
}
if (NULL != orte_rmaps_base.slot_list) {
mca_rmaps_rank_file_component.priority = 100;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -88,7 +88,6 @@ int orte_startup_timeout;
int orte_timeout_usec_per_proc;
float orte_max_timeout;
char *orte_default_hostfile = NULL;
opal_buffer_t *orte_tree_launch_cmd = NULL;
@ -140,6 +139,8 @@ uint8_t orte_default_num_cores_per_socket;
/* allocation specification */
char *orte_default_cpu_set;
char *orte_default_hostfile = NULL;
char *orte_rankfile;
/* default rank assigment and binding policy */
orte_mapping_policy_t orte_default_mapping_policy = 0;

Просмотреть файл

@ -276,6 +276,7 @@ typedef uint16_t orte_mapping_policy_t;
#define ORTE_MAPPING_BYBOARD 0x1000
#define ORTE_MAPPING_NO_USE_LOCAL 0x2000
#define ORTE_MAPPING_NPERXXX 0x4000
#define ORTE_MAPPING_BYUSER 0x8000
/* nice macro for setting these */
#define ORTE_SET_MAPPING_POLICY(pol) \
orte_default_mapping_policy = (orte_default_mapping_policy & 0x00ff) | (pol);
@ -520,8 +521,6 @@ ORTE_DECLSPEC extern int orte_startup_timeout;
ORTE_DECLSPEC extern int orte_timeout_usec_per_proc;
ORTE_DECLSPEC extern float orte_max_timeout;
ORTE_DECLSPEC extern char *orte_default_hostfile;
ORTE_DECLSPEC extern opal_buffer_t *orte_tree_launch_cmd;
/* global arrays for data storage */
@ -571,6 +570,8 @@ ORTE_DECLSPEC extern uint8_t orte_default_num_cores_per_socket;
/* allocation specification */
ORTE_DECLSPEC extern char *orte_default_cpu_set;
ORTE_DECLSPEC extern char *orte_default_hostfile;
ORTE_DECLSPEC extern char *orte_rankfile;
/* default rank assigment and binding policy */
ORTE_DECLSPEC extern orte_mapping_policy_t orte_default_mapping_policy;

Просмотреть файл

@ -199,6 +199,12 @@ int orte_register_params(void)
mca_base_param_reg_string_name("orte", "default_hostfile",
"Name of the default hostfile (relative or absolute path)",
false, false, NULL, &orte_default_hostfile);
/* rankfile */
tmp = mca_base_param_reg_string_name("orte", "rankfile",
"Name of the rankfile to be used for mapping processes (relative or absolute path)",
false, false, NULL, NULL);
mca_base_param_reg_syn_name(tmp, "rmaps", "rank_file_path", true);
mca_base_param_lookup_string(tmp, &orte_rankfile);
/* whether or not to keep FQDN hostnames */

Просмотреть файл

@ -243,7 +243,7 @@ static opal_cmd_line_init_t cmd_line_init[] = {
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Provide a cartography file" },
{ "rmaps_rank", "file", "path", '\0', "rf", "rankfile", 1,
{ "orte", "rankfile", NULL, '\0', "rf", "rankfile", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Provide a rankfile file" },

Просмотреть файл

@ -135,13 +135,13 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
char* username = NULL;
int cnt;
int number_of_slots = 0;
char buff[64];
if (ORTE_HOSTFILE_STRING == token ||
ORTE_HOSTFILE_HOSTNAME == token ||
ORTE_HOSTFILE_INT == token ||
ORTE_HOSTFILE_IPV4 == token ||
ORTE_HOSTFILE_IPV6 == token) {
char buff[64];
if(ORTE_HOSTFILE_INT == token) {
snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
@ -174,7 +174,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
}
node_name[len-1] = '\0'; /* truncate */
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
OPAL_OUTPUT_VERBOSE((0, orte_debug_output,
"%s hostfile: node %s is being excluded",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
@ -216,7 +216,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
node_name = strdup(orte_process_info.nodename);
}
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
OPAL_OUTPUT_VERBOSE((0, orte_debug_output,
"%s hostfile: node %s is being included - keep all is %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name,
keep_all ? "TRUE" : "FALSE"));
@ -241,16 +241,75 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
/* store this for later processing */
node = OBJ_NEW(orte_node_t);
node->name = strdup(orte_util_hostfile_value.sval);
} else if (ORTE_HOSTFILE_RANK == token) {
/* we can ignore the rank, but we need to extract the node name. we
* first need to shift over to the other side of the equal sign as
* this is where the node name will be
*/
while (!orte_util_hostfile_done &&
ORTE_HOSTFILE_EQUAL != token) {
token = orte_util_hostfile_lex();
}
if (orte_util_hostfile_done) {
/* bad syntax somewhere */
return ORTE_ERROR;
}
/* next position should be the node name */
token = orte_util_hostfile_lex();
if(ORTE_HOSTFILE_INT == token) {
snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
value = buff;
} else {
value = orte_util_hostfile_value.sval;
}
argv = opal_argv_split (value, '@');
cnt = opal_argv_count (argv);
if (1 == cnt) {
node_name = strdup(argv[0]);
} else if (2 == cnt) {
username = strdup(argv[0]);
node_name = strdup(argv[1]);
} else {
opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
}
opal_argv_free (argv);
/* Do we need to make a new node object? First check to see
* if we are keeping everything or if it's already in the updates
* list. Because we check keep_all first, if that is set we will
* not do the hostfile_lookup call, and thus won't remove the
* pre-existing node from the updates list
*/
if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) {
node = OBJ_NEW(orte_node_t);
node->name = node_name;
}
/* add a slot */
node->slots++;
/* do we need to record an alias for this node? */
if (NULL != node_alias) {
/* add to list of aliases for this node - only add if unique */
opal_argv_append_unique_nosize(&node->alias, node_alias, false);
free(node_alias);
}
/* skip to end of line */
while (!orte_util_hostfile_done &&
ORTE_HOSTFILE_NEWLINE != token) {
token = orte_util_hostfile_lex();
}
opal_list_append(updates, &node->super);
return ORTE_SUCCESS;
} else {
hostfile_parse_error(token);
return ORTE_ERROR;
}
got_count = false;
while (!orte_util_hostfile_done) {
token = orte_util_hostfile_lex();
switch (token) {
switch (token) {
case ORTE_HOSTFILE_DONE:
goto done;
@ -417,19 +476,14 @@ static int hostfile_parse(const char *hostfile, opal_list_t* updates, opal_list_
case ORTE_HOSTFILE_HOSTNAME:
case ORTE_HOSTFILE_IPV4:
case ORTE_HOSTFILE_IPV6:
case ORTE_HOSTFILE_RELATIVE:
case ORTE_HOSTFILE_RANK:
rc = hostfile_parse_line(token, updates, exclude, keep_all);
if (ORTE_SUCCESS != rc) {
goto unlock;
}
break;
case ORTE_HOSTFILE_RELATIVE:
rc = hostfile_parse_line(token, updates, exclude, keep_all);
if (ORTE_SUCCESS != rc) {
goto unlock;
}
break;
default:
hostfile_parse_error(token);
goto unlock;

Просмотреть файл

@ -76,5 +76,7 @@ extern orte_hostfile_value_t orte_util_hostfile_value;
#define ORTE_HOSTFILE_SOCKETS_PER_BOARD 18
#define ORTE_HOSTFILE_CORES_PER_SOCKET 19
#define ORTE_HOSTFILE_CPU_SET 20
/* ensure we can handle a rank_file input */
#define ORTE_HOSTFILE_RANK 21
#endif

Просмотреть файл

@ -113,6 +113,11 @@ count_max { orte_util_hostfile_value.sval = yytext;
max_count { orte_util_hostfile_value.sval = yytext;
return ORTE_HOSTFILE_SLOTS_MAX; }
rank { orte_util_hostfile_value.sval = yytext;
return ORTE_HOSTFILE_RANK; }
slot { orte_util_hostfile_value.sval = yytext;
return ORTE_HOSTFILE_SLOT; }
username { orte_util_hostfile_value.sval = yytext;
return ORTE_HOSTFILE_USERNAME; }
"user-name" { orte_util_hostfile_value.sval = yytext;