1
1

Add support for the -v (verbose) option to prun and silence the "executing" and "completed" output otherwise.

Debounce "unreachable" notifications for tools when they disconnect
Enable the -x cmd line option for prun

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
(cherry picked from commit 0a5b36180a22959654461ac1303cec35313f8b4a)
Этот коммит содержится в:
Ralph Castain 2017-10-10 10:08:30 -07:00
родитель 1ae78e23fa
Коммит 388034c814
4 изменённых файлов: 106 добавлений и 18 удалений

Просмотреть файл

@ -815,7 +815,9 @@ static void _notify_client_event(int sd, short args, void *cbdata)
/* check for caching instructions */ /* check for caching instructions */
for (n=0; n < cd->ninfo; n++) { for (n=0; n < cd->ninfo; n++) {
if (0 == strncmp(cd->info[n].key, PMIX_EVENT_DO_NOT_CACHE, PMIX_MAX_KEYLEN)) { if (0 == strncmp(cd->info[n].key, PMIX_EVENT_DO_NOT_CACHE, PMIX_MAX_KEYLEN)) {
holdcd = PMIX_INFO_TRUE(&cd->info[n]); if (PMIX_INFO_TRUE(&cd->info[n])) {
holdcd = false;
}
break; break;
} }
} }

Просмотреть файл

@ -140,7 +140,7 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err)
} }
} }
} }
if (!peer->finalized) { if (!peer->finalized && !PMIX_PROC_IS_TOOL(peer)) {
/* if this peer already called finalize, then /* if this peer already called finalize, then
* we are just seeing their connection go away * we are just seeing their connection go away
* when they terminate - so do not generate * when they terminate - so do not generate

Просмотреть файл

@ -599,19 +599,18 @@ static void dvm_notify(int sd, short args, void *cbdata)
val->type = OPAL_STATUS; val->type = OPAL_STATUS;
val->data.status = ret; val->data.status = ret;
opal_list_append(info, &val->super); opal_list_append(info, &val->super);
/* if there was a problem, we need to send the requestor more info about what happened */ /* tell the requestor which job or proc */
if (ORTE_SUCCESS != ret) { val = OBJ_NEW(opal_value_t);
val = OBJ_NEW(opal_value_t); val->key = strdup(OPAL_PMIX_PROCID);
val->key = strdup(OPAL_PMIX_PROCID); val->type = OPAL_NAME;
val->type = OPAL_NAME; val->data.name.jobid = jdata->jobid;
val->data.name.jobid = jdata->jobid; if (NULL != pptr) {
if (NULL != pptr) { val->data.name.vpid = pptr->name.vpid;
val->data.name.vpid = pptr->name.vpid; } else {
} else { val->data.name.vpid = ORTE_VPID_WILDCARD;
val->data.name.vpid = ORTE_VPID_WILDCARD;
}
opal_list_append(info, &val->super);
} }
opal_list_append(info, &val->super);
/* setup the caddy */
mycaddy = (mycaddy_t*)malloc(sizeof(mycaddy_t)); mycaddy = (mycaddy_t*)malloc(sizeof(mycaddy_t));
mycaddy->info = info; mycaddy->info = info;
OBJ_RETAIN(jdata); OBJ_RETAIN(jdata);

Просмотреть файл

@ -174,14 +174,19 @@ static void evhandler(int status,
void *cbdata) void *cbdata)
{ {
opal_value_t *val; opal_value_t *val;
int jobstatus=0;
orte_jobid_t jobid = ORTE_JOBID_INVALID;
if (NULL != info) { if (orte_cmd_options.verbose && NULL != info) {
OPAL_LIST_FOREACH(val, info, opal_value_t) { OPAL_LIST_FOREACH(val, info, opal_value_t) {
if (0 == strcmp(val->key, OPAL_PMIX_JOB_TERM_STATUS)) { if (0 == strcmp(val->key, OPAL_PMIX_JOB_TERM_STATUS)) {
opal_output(0, "JOB COMPLETED WITH STATUS %d", jobstatus = val->data.integer;
val->data.integer); } else if (0 == strcmp(val->key, OPAL_PMIX_PROCID)) {
jobid = val->data.name.jobid;
} }
} }
opal_output(0, "JOB %s COMPLETED WITH STATUS %d",
ORTE_JOBID_PRINT(jobid), jobstatus);
} }
if (NULL != cbfunc) { if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata); cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata);
@ -622,7 +627,9 @@ int prun(int argc, char *argv[])
OPAL_LIST_DESTRUCT(&job_info); OPAL_LIST_DESTRUCT(&job_info);
OPAL_LIST_DESTRUCT(&apps); OPAL_LIST_DESTRUCT(&apps);
opal_output(0, "JOB %s EXECUTING", OPAL_JOBID_PRINT(jobid)); if (orte_cmd_options.verbose) {
opal_output(0, "JOB %s EXECUTING", OPAL_JOBID_PRINT(jobid));
}
while (active) { while (active) {
nanosleep(&tp, NULL); nanosleep(&tp, NULL);
@ -788,6 +795,86 @@ static int create_app(int argc, char* argv[],
} }
} }
/* set necessary env variables for external usage from tune conf file*/
int set_from_file = 0;
char **vars = NULL;
if (OPAL_SUCCESS == mca_base_var_process_env_list_from_file(&vars) &&
NULL != vars) {
for (i=0; NULL != vars[i]; i++) {
value = strchr(vars[i], '=');
/* terminate the name of the param */
*value = '\0';
/* step over the equals */
value++;
/* overwrite any prior entry */
opal_setenv(vars[i], value, true, &app->env);
/* save it for any comm_spawn'd apps */
opal_setenv(vars[i], value, true, &orte_forwarded_envars);
}
set_from_file = 1;
opal_argv_free(vars);
}
/* Did the user request to export any environment variables on the cmd line? */
char *env_set_flag;
env_set_flag = getenv("OMPI_MCA_mca_base_env_list");
if (opal_cmd_line_is_taken(orte_cmd_line, "x")) {
if (NULL != env_set_flag) {
opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false);
return ORTE_ERR_FATAL;
}
j = opal_cmd_line_get_ninsts(orte_cmd_line, "x");
for (i = 0; i < j; ++i) {
param = opal_cmd_line_get_param(orte_cmd_line, "x", i, 0);
if (NULL != (value = strchr(param, '='))) {
/* terminate the name of the param */
*value = '\0';
/* step over the equals */
value++;
/* overwrite any prior entry */
opal_setenv(param, value, true, &app->env);
/* save it for any comm_spawn'd apps */
opal_setenv(param, value, true, &orte_forwarded_envars);
} else {
value = getenv(param);
if (NULL != value) {
/* overwrite any prior entry */
opal_setenv(param, value, true, &app->env);
/* save it for any comm_spawn'd apps */
opal_setenv(param, value, true, &orte_forwarded_envars);
} else {
opal_output(0, "Warning: could not find environment variable \"%s\"\n", param);
}
}
}
} else if (NULL != env_set_flag) {
/* if mca_base_env_list was set, check if some of env vars were set via -x from a conf file.
* If this is the case, error out.
*/
if (!set_from_file) {
/* set necessary env variables for external usage */
vars = NULL;
if (OPAL_SUCCESS == mca_base_var_process_env_list(env_set_flag, &vars) &&
NULL != vars) {
for (i=0; NULL != vars[i]; i++) {
value = strchr(vars[i], '=');
/* terminate the name of the param */
*value = '\0';
/* step over the equals */
value++;
/* overwrite any prior entry */
opal_setenv(vars[i], value, true, &app->env);
/* save it for any comm_spawn'd apps */
opal_setenv(vars[i], value, true, &orte_forwarded_envars);
}
opal_argv_free(vars);
}
} else {
opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false);
return ORTE_ERR_FATAL;
}
}
/* Did the user request a specific wdir? */ /* Did the user request a specific wdir? */
if (NULL != orte_cmd_options.wdir) { if (NULL != orte_cmd_options.wdir) {