Do not pass topologies during tree spawn of daemons as there is no way the HNP can know the backend topologies at that point. Any needed topologies will be sent along with the launch_apps command
Do not pass param file MCA params if the user has requested that no param files be read - required when trying to avoid launch time penalties from large numbers of processes reading default param files. The daemon picks them up and passes them along anyway, so it isn't clear what value we gain from having them all read the defaults Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
ee4ce13e16
Коммит
180809f2ef
@ -1544,6 +1544,25 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
|||||||
opal_argv_append(argc, argv, orte_xterm);
|
opal_argv_append(argc, argv, orte_xterm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
loc_id = mca_base_var_find("opal", "mca", "base", "param_files");
|
||||||
|
if (loc_id < 0) {
|
||||||
|
rc = OPAL_ERR_NOT_FOUND;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
tmp_value = NULL;
|
||||||
|
rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
|
||||||
|
if (ORTE_SUCCESS != rc) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if (NULL != tmp_value && NULL != tmp_value[0]) {
|
||||||
|
rc = strcmp(tmp_value[0], "none");
|
||||||
|
} else {
|
||||||
|
rc = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != rc) {
|
||||||
/*
|
/*
|
||||||
* Pass along the Aggregate MCA Parameter Sets
|
* Pass along the Aggregate MCA Parameter Sets
|
||||||
*/
|
*/
|
||||||
@ -1633,6 +1652,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
|||||||
opal_argv_append(argc, argv, tmp_value[0]);
|
opal_argv_append(argc, argv, tmp_value[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* pass along any cmd line MCA params provided to mpirun,
|
/* pass along any cmd line MCA params provided to mpirun,
|
||||||
* being sure to "purge" any that would cause problems
|
* being sure to "purge" any that would cause problems
|
||||||
|
@ -800,15 +800,6 @@ static int remote_spawn(opal_buffer_t *launch)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* extract and update the daemon map */
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_decode_daemon_nodemap(launch))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* since we are tree-spawning, we need to update the routing plan */
|
|
||||||
orte_routed.update_routing_plan(NULL);
|
|
||||||
|
|
||||||
/* get the updated routing list */
|
/* get the updated routing list */
|
||||||
rtmod = orte_rml.get_routed(orte_coll_conduit);
|
rtmod = orte_rml.get_routed(orte_coll_conduit);
|
||||||
OBJ_CONSTRUCT(&coll, opal_list_t);
|
OBJ_CONSTRUCT(&coll, opal_list_t);
|
||||||
@ -1177,12 +1168,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
|||||||
OBJ_RELEASE(orte_tree_launch_cmd);
|
OBJ_RELEASE(orte_tree_launch_cmd);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
/* construct a nodemap of all daemons we know about */
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(orte_tree_launch_cmd))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
OBJ_RELEASE(orte_tree_launch_cmd);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* get the orted job data object */
|
/* get the orted job data object */
|
||||||
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||||
|
@ -447,7 +447,6 @@ int orte_util_nidmap_create(char **regex)
|
|||||||
asprintf(&tmp2, "%s@%s", nodenames, tmp);
|
asprintf(&tmp2, "%s@%s", nodenames, tmp);
|
||||||
free(nodenames);
|
free(nodenames);
|
||||||
free(tmp);
|
free(tmp);
|
||||||
|
|
||||||
*regex = tmp2;
|
*regex = tmp2;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -760,9 +759,10 @@ int orte_util_nidmap_parse(char *regex)
|
|||||||
dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren
|
dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren
|
||||||
++ptr;
|
++ptr;
|
||||||
rng->cnt = strtoul(ptr, NULL, 10);
|
rng->cnt = strtoul(ptr, NULL, 10);
|
||||||
|
} else {
|
||||||
|
rng->cnt = 1;
|
||||||
}
|
}
|
||||||
/* convert the number - since it might be a range,
|
/* convert the number */
|
||||||
* save the remainder pointer */
|
|
||||||
rng->vpid = strtoul(dvpids[n], NULL, 10);
|
rng->vpid = strtoul(dvpids[n], NULL, 10);
|
||||||
}
|
}
|
||||||
opal_argv_free(dvpids);
|
opal_argv_free(dvpids);
|
||||||
@ -797,16 +797,17 @@ int orte_util_nidmap_parse(char *regex)
|
|||||||
nd->daemon = proc;
|
nd->daemon = proc;
|
||||||
}
|
}
|
||||||
++cnt;
|
++cnt;
|
||||||
if (cnt == rng->cnt) {
|
if (rng->cnt <= cnt) {
|
||||||
rng = (orte_regex_range_t*)opal_list_get_next(&rng->super);
|
rng = (orte_regex_range_t*)opal_list_get_next(&rng->super);
|
||||||
if (NULL == rng) {
|
if (NULL == rng) {
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
return ORTE_ERR_NOT_FOUND;
|
return ORTE_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
cnt = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* unpdate num procs */
|
/* update num procs */
|
||||||
if (orte_process_info.num_procs != daemons->num_procs) {
|
if (orte_process_info.num_procs != daemons->num_procs) {
|
||||||
orte_process_info.num_procs = daemons->num_procs;
|
orte_process_info.num_procs = daemons->num_procs;
|
||||||
/* need to update the routing plan */
|
/* need to update the routing plan */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user