1
1

Do not pass topologies during tree spawn of daemons as there is no way the HNP can know the backend topologies at that point. Any needed topologies will be sent along with the launch_apps command

Do not pass param file MCA params if the user has requested that no param files be read - required when trying to avoid launch time penalties from large numbers of processes reading default param files. The daemon picks them up and passes them along anyway, so it isn't clear what value we gain from having them all read the defaults

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-04-25 21:24:21 -07:00
родитель ee4ce13e16
Коммит 180809f2ef
4 изменённых файлов: 84 добавлений и 78 удалений

Просмотреть файл

@ -1544,51 +1544,34 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, orte_xterm); opal_argv_append(argc, argv, orte_xterm);
} }
/* loc_id = mca_base_var_find("opal", "mca", "base", "param_files");
* Pass along the Aggregate MCA Parameter Sets
*/
/* Add the 'prefix' param */
tmp_value = NULL;
loc_id = mca_base_var_find("opal", "mca", "base", "envar_file_prefix");
if (loc_id < 0) { if (loc_id < 0) {
rc = OPAL_ERR_NOT_FOUND; rc = OPAL_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
tmp_value = NULL;
rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL); rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
if( NULL != tmp_value && NULL != tmp_value[0] ) { if (NULL != tmp_value && NULL != tmp_value[0]) {
/* Could also use the short version '-tune' rc = strcmp(tmp_value[0], "none");
* but being verbose has some value } else {
*/ rc = 1;
opal_argv_append(argc, argv, "-mca");
opal_argv_append(argc, argv, "mca_base_envar_file_prefix");
opal_argv_append(argc, argv, tmp_value[0]);
} }
tmp_value2 = NULL; if (0 != rc) {
loc_id = mca_base_var_find("opal", "mca", "base", "param_file_prefix"); /*
mca_base_var_get_value(loc_id, &tmp_value2, NULL, NULL); * Pass along the Aggregate MCA Parameter Sets
if( NULL != tmp_value2 && NULL != tmp_value2[0] ) {
/* Could also use the short version '-am'
* but being verbose has some value
*/ */
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); /* Add the 'prefix' param */
opal_argv_append(argc, argv, "mca_base_param_file_prefix");
opal_argv_append(argc, argv, tmp_value2[0]);
orte_show_help("help-plm-base.txt", "deprecated-amca", true);
}
if ((NULL != tmp_value && NULL != tmp_value[0])
|| (NULL != tmp_value2 && NULL != tmp_value2[0])) {
/* Add the 'path' param */
tmp_value = NULL; tmp_value = NULL;
loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path");
loc_id = mca_base_var_find("opal", "mca", "base", "envar_file_prefix");
if (loc_id < 0) { if (loc_id < 0) {
rc = OPAL_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -1598,39 +1581,76 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
return rc; return rc;
} }
if( NULL != tmp_value && NULL != tmp_value[0] ) { if( NULL != tmp_value && NULL != tmp_value[0] ) {
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); /* Could also use the short version '-tune'
opal_argv_append(argc, argv, "mca_base_param_file_path"); * but being verbose has some value
*/
opal_argv_append(argc, argv, "-mca");
opal_argv_append(argc, argv, "mca_base_envar_file_prefix");
opal_argv_append(argc, argv, tmp_value[0]); opal_argv_append(argc, argv, tmp_value[0]);
} }
/* Add the 'path' param */ tmp_value2 = NULL;
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); loc_id = mca_base_var_find("opal", "mca", "base", "param_file_prefix");
opal_argv_append(argc, argv, "mca_base_param_file_path_force"); mca_base_var_get_value(loc_id, &tmp_value2, NULL, NULL);
if( NULL != tmp_value2 && NULL != tmp_value2[0] ) {
/* Could also use the short version '-am'
* but being verbose has some value
*/
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(argc, argv, "mca_base_param_file_prefix");
opal_argv_append(argc, argv, tmp_value2[0]);
orte_show_help("help-plm-base.txt", "deprecated-amca", true);
}
tmp_value = NULL; if ((NULL != tmp_value && NULL != tmp_value[0])
loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path_force"); || (NULL != tmp_value2 && NULL != tmp_value2[0])) {
if (loc_id < 0) { /* Add the 'path' param */
rc = OPAL_ERR_NOT_FOUND; tmp_value = NULL;
ORTE_ERROR_LOG(rc); loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path");
return rc; if (loc_id < 0) {
} ORTE_ERROR_LOG(rc);
rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL); return rc;
if (OPAL_SUCCESS != rc) { }
ORTE_ERROR_LOG(rc); rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
return rc; if (ORTE_SUCCESS != rc) {
} ORTE_ERROR_LOG(rc);
if( NULL == tmp_value || NULL == tmp_value[0] ) { return rc;
/* Get the current working directory */ }
tmp_force = (char *) malloc(sizeof(char) * OPAL_PATH_MAX); if( NULL != tmp_value && NULL != tmp_value[0] ) {
if (NULL == getcwd(tmp_force, OPAL_PATH_MAX)) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
free(tmp_force); opal_argv_append(argc, argv, "mca_base_param_file_path");
tmp_force = strdup(""); opal_argv_append(argc, argv, tmp_value[0]);
} }
opal_argv_append(argc, argv, tmp_force); /* Add the 'path' param */
free(tmp_force); opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
} else { opal_argv_append(argc, argv, "mca_base_param_file_path_force");
opal_argv_append(argc, argv, tmp_value[0]);
tmp_value = NULL;
loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path_force");
if (loc_id < 0) {
rc = OPAL_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc);
return rc;
}
rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL);
if (OPAL_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
if( NULL == tmp_value || NULL == tmp_value[0] ) {
/* Get the current working directory */
tmp_force = (char *) malloc(sizeof(char) * OPAL_PATH_MAX);
if (NULL == getcwd(tmp_force, OPAL_PATH_MAX)) {
free(tmp_force);
tmp_force = strdup("");
}
opal_argv_append(argc, argv, tmp_force);
free(tmp_force);
} else {
opal_argv_append(argc, argv, tmp_value[0]);
}
} }
} }

Просмотреть файл

@ -800,15 +800,6 @@ static int remote_spawn(opal_buffer_t *launch)
goto cleanup; goto cleanup;
} }
/* extract and update the daemon map */
if (ORTE_SUCCESS != (rc = orte_util_decode_daemon_nodemap(launch))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* since we are tree-spawning, we need to update the routing plan */
orte_routed.update_routing_plan(NULL);
/* get the updated routing list */ /* get the updated routing list */
rtmod = orte_rml.get_routed(orte_coll_conduit); rtmod = orte_rml.get_routed(orte_coll_conduit);
OBJ_CONSTRUCT(&coll, opal_list_t); OBJ_CONSTRUCT(&coll, opal_list_t);
@ -1177,12 +1168,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
OBJ_RELEASE(orte_tree_launch_cmd); OBJ_RELEASE(orte_tree_launch_cmd);
goto cleanup; goto cleanup;
} }
/* construct a nodemap of all daemons we know about */
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(orte_tree_launch_cmd))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(orte_tree_launch_cmd);
goto cleanup;
}
/* get the orted job data object */ /* get the orted job data object */
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {

Просмотреть файл

@ -413,7 +413,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
* are getting for an initial map of a job, * are getting for an initial map of a job,
* then mark all nodes as unmapped * then mark all nodes as unmapped
*/ */
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
} }
if (NULL == nd || NULL == nd->daemon || if (NULL == nd || NULL == nd->daemon ||
NULL == node->daemon || NULL == node->daemon ||

Просмотреть файл

@ -447,7 +447,6 @@ int orte_util_nidmap_create(char **regex)
asprintf(&tmp2, "%s@%s", nodenames, tmp); asprintf(&tmp2, "%s@%s", nodenames, tmp);
free(nodenames); free(nodenames);
free(tmp); free(tmp);
*regex = tmp2; *regex = tmp2;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -760,9 +759,10 @@ int orte_util_nidmap_parse(char *regex)
dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren
++ptr; ++ptr;
rng->cnt = strtoul(ptr, NULL, 10); rng->cnt = strtoul(ptr, NULL, 10);
} else {
rng->cnt = 1;
} }
/* convert the number - since it might be a range, /* convert the number */
* save the remainder pointer */
rng->vpid = strtoul(dvpids[n], NULL, 10); rng->vpid = strtoul(dvpids[n], NULL, 10);
} }
opal_argv_free(dvpids); opal_argv_free(dvpids);
@ -797,16 +797,17 @@ int orte_util_nidmap_parse(char *regex)
nd->daemon = proc; nd->daemon = proc;
} }
++cnt; ++cnt;
if (cnt == rng->cnt) { if (rng->cnt <= cnt) {
rng = (orte_regex_range_t*)opal_list_get_next(&rng->super); rng = (orte_regex_range_t*)opal_list_get_next(&rng->super);
if (NULL == rng) { if (NULL == rng) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND; return ORTE_ERR_NOT_FOUND;
} }
cnt = 0;
} }
} }
/* unpdate num procs */ /* update num procs */
if (orte_process_info.num_procs != daemons->num_procs) { if (orte_process_info.num_procs != daemons->num_procs) {
orte_process_info.num_procs = daemons->num_procs; orte_process_info.num_procs = daemons->num_procs;
/* need to update the routing plan */ /* need to update the routing plan */