1
1

Do not pass topologies during tree spawn of daemons as there is no way the HNP can know the backend topologies at that point. Any needed topologies will be sent along with the launch_apps command

Do not pass param file MCA params if the user has requested that no param files be read - required when trying to avoid launch time penalties from large numbers of processes reading default param files. The daemon picks them up and passes them along anyway, so it isn't clear what value we gain from having them all read the defaults

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-04-25 21:24:21 -07:00
родитель ee4ce13e16
Коммит 180809f2ef
4 изменённых файлов: 84 добавлений и 78 удалений

Просмотреть файл

@ -1544,6 +1544,25 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, orte_xterm);
}
loc_id = mca_base_var_find("opal", "mca", "base", "param_files");
if (loc_id < 0) {
rc = OPAL_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc);
return rc;
}
tmp_value = NULL;
rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL != tmp_value && NULL != tmp_value[0]) {
rc = strcmp(tmp_value[0], "none");
} else {
rc = 1;
}
if (0 != rc) {
/*
* Pass along the Aggregate MCA Parameter Sets
*/
@ -1633,6 +1652,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, tmp_value[0]);
}
}
}
/* pass along any cmd line MCA params provided to mpirun,
* being sure to "purge" any that would cause problems

Просмотреть файл

@ -800,15 +800,6 @@ static int remote_spawn(opal_buffer_t *launch)
goto cleanup;
}
/* extract and update the daemon map */
if (ORTE_SUCCESS != (rc = orte_util_decode_daemon_nodemap(launch))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* since we are tree-spawning, we need to update the routing plan */
orte_routed.update_routing_plan(NULL);
/* get the updated routing list */
rtmod = orte_rml.get_routed(orte_coll_conduit);
OBJ_CONSTRUCT(&coll, opal_list_t);
@ -1177,12 +1168,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
OBJ_RELEASE(orte_tree_launch_cmd);
goto cleanup;
}
/* construct a nodemap of all daemons we know about */
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(orte_tree_launch_cmd))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(orte_tree_launch_cmd);
goto cleanup;
}
/* get the orted job data object */
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {

Просмотреть файл

@ -447,7 +447,6 @@ int orte_util_nidmap_create(char **regex)
asprintf(&tmp2, "%s@%s", nodenames, tmp);
free(nodenames);
free(tmp);
*regex = tmp2;
return ORTE_SUCCESS;
}
@ -760,9 +759,10 @@ int orte_util_nidmap_parse(char *regex)
dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren
++ptr;
rng->cnt = strtoul(ptr, NULL, 10);
} else {
rng->cnt = 1;
}
/* convert the number - since it might be a range,
* save the remainder pointer */
/* convert the number */
rng->vpid = strtoul(dvpids[n], NULL, 10);
}
opal_argv_free(dvpids);
@ -797,16 +797,17 @@ int orte_util_nidmap_parse(char *regex)
nd->daemon = proc;
}
++cnt;
if (cnt == rng->cnt) {
if (rng->cnt <= cnt) {
rng = (orte_regex_range_t*)opal_list_get_next(&rng->super);
if (NULL == rng) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
cnt = 0;
}
}
/* unpdate num procs */
/* update num procs */
if (orte_process_info.num_procs != daemons->num_procs) {
orte_process_info.num_procs = daemons->num_procs;
/* need to update the routing plan */