From 70591bf4dcac9705bb855a047284f551fd1cc90d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 11 Mar 2017 08:20:38 -0800 Subject: [PATCH] Enable parallel fork/exec of local procs by providing the option of multiple odls progress threads Signed-off-by: Ralph Castain --- orte/mca/iof/base/iof_base_setup.c | 5 - orte/mca/odls/base/odls_base_default_fns.c | 575 ++++++++++---------- orte/mca/odls/base/odls_base_frame.c | 46 ++ orte/mca/odls/base/odls_private.h | 31 +- orte/mca/odls/default/odls_default_module.c | 106 ++-- orte/mca/schizo/base/base.h | 3 +- orte/mca/schizo/base/schizo_base_stubs.c | 5 +- orte/mca/schizo/ompi/schizo_ompi.c | 30 +- orte/mca/schizo/schizo.h | 3 +- 9 files changed, 426 insertions(+), 378 deletions(-) diff --git a/orte/mca/iof/base/iof_base_setup.c b/orte/mca/iof/base/iof_base_setup.c index 83eba0ea52..36e4559299 100644 --- a/orte/mca/iof/base/iof_base_setup.c +++ b/orte/mca/iof/base/iof_base_setup.c @@ -219,11 +219,6 @@ orte_iof_base_setup_parent(const orte_process_name_t* name, { int ret; - close(opts->p_stdin[0]); - close(opts->p_stdout[1]); - close(opts->p_stderr[1]); - close(opts->p_internal[1]); - /* connect stdin endpoint */ if (opts->connect_stdin) { /* and connect the pty to stdin */ diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 8947ec2851..46cf89f67e 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -625,22 +625,186 @@ static int compute_num_procs_alive(orte_jobid_t job) return num_procs_alive; } +void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) +{ + orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cbdata; + orte_job_t *jobdat = cd->jdata; + orte_app_context_t *app = cd->app; + orte_proc_t *child = cd->child; + char **env = NULL, **argv = NULL, *cmd = NULL; + int rc, i; + + /* thread-protect common values */ + env = opal_argv_copy(app->env); + + /* setup the pmix environment */ + if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &env))) { + ORTE_ERROR_LOG(rc); + goto errorout; + } + + /* ensure we clear any prior info regarding state or exit status in + * case this is a restart + */ + child->exit_code = 0; + ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID); + /* if we are not forwarding output for this job, then + * flag iof as complete + */ + if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_IOF_COMPLETE); + } else { + ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE); + } + child->pid = 0; + if (NULL != child->rml_uri) { + free(child->rml_uri); + child->rml_uri = NULL; + } + + /* did the user request we display output in xterms? */ + if (NULL != orte_xterm) { + opal_list_item_t *nmitem; + orte_namelist_t *nm; + /* see if this rank is one of those requested */ + for (nmitem = opal_list_get_first(&orte_odls_globals.xterm_ranks); + nmitem != opal_list_get_end(&orte_odls_globals.xterm_ranks); + nmitem = opal_list_get_next(nmitem)) { + nm = (orte_namelist_t*)nmitem; + if (ORTE_VPID_WILDCARD == nm->name.vpid || + child->name.vpid == nm->name.vpid) { + /* we want this one - modify the app's command to include + * the orte xterm cmd that starts with the xtermcmd */ + argv = opal_argv_copy(orte_odls_globals.xtermcmd); + /* insert the rank into the correct place as a window title */ + free(argv[2]); + asprintf(&argv[2], "Rank %s", ORTE_VPID_PRINT(child->name.vpid)); + /* add in the argv from the app */ + for (i=0; NULL != app->argv[i]; i++) { + opal_argv_append_nosize(&argv, app->argv[i]); + } + /* use the xterm cmd as the app string */ + cmd = strdup(orte_odls_globals.xtermcmd[0]); + break; + } else if (jobdat->num_procs <= nm->name.vpid) { /* check for bozo case */ + /* can't be done! */ + orte_show_help("help-orte-odls-base.txt", + "orte-odls-base:xterm-rank-out-of-bounds", + true, nm->name.vpid, jobdat->num_procs); + child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + goto errorout; + } + } + } else if (NULL != orte_fork_agent) { + /* we were given a fork agent - use it */ + argv = opal_argv_copy(orte_fork_agent); + /* add in the argv from the app */ + for (i=0; NULL != app->argv[i]; i++) { + opal_argv_append_nosize(&argv, app->argv[i]); + } + /* the app exe name itself is in the argvsav array, so + * we can recover it from there later + */ + cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL); + if (NULL == cmd) { + orte_show_help("help-orte-odls-base.txt", + "orte-odls-base:fork-agent-not-found", + true, orte_process_info.nodename, orte_fork_agent[0]); + child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + goto errorout; + } + } else { + cmd = strdup(app->app); + argv = opal_argv_copy(app->argv); + } + + /* setup the rest of the environment with the proc-specific items - these + * will be overwritten for each child + */ + if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &env))) { + ORTE_ERROR_LOG(rc); + child->exit_code = rc; + goto errorout; + } + + /* if we are indexing the argv by rank, do so now */ + if (cd->index_argv) { + char *param; + asprintf(¶m, "%s-%d", argv[0], (int)child->name.vpid); + free(argv[0]); + argv[0] = param; + } + + if (5 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { + opal_output(orte_odls_base_framework.framework_output, "%s odls:launch spawning child %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&child->name)); + + /* dump what is going to be exec'd */ + if (7 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { + opal_dss.dump(orte_odls_base_framework.framework_output, app, ORTE_APP_CONTEXT); + } + } + + if (ORTE_SUCCESS != (rc = cd->fork_local(child, cmd, argv, env, jobdat, cd->opts))) { + child->exit_code = rc; /* error message already output */ + goto errorout; + } + if (ORTE_SUCCESS != rc) { + /* do NOT ERROR_LOG this error - it generates + * a message/node as most errors will be common + * across the entire cluster. Instead, we let orterun + * output a consolidated error message for us + */ + ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + child->exit_code = rc; /* error message already output */ + goto errorout; + } + + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_RUNNING); + if (NULL != env) { + opal_argv_free(env); + } + if (NULL != argv) { + opal_argv_free(argv); + } + if (NULL != cmd) { + free(cmd); + } + OBJ_RELEASE(cd); + return; + + errorout: + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); + if (NULL != env) { + opal_argv_free(env); + } + if (NULL != argv) { + opal_argv_free(argv); + } + if (NULL != cmd) { + free(cmd); + } + OBJ_RELEASE(cd); +} void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) { orte_app_context_t *app; orte_proc_t *child=NULL; int rc=ORTE_SUCCESS; - orte_std_cntr_t proc_rank; char basedir[MAXPATHLEN]; char **argvsav=NULL; - int inm, j, idx; + int j, idx; int total_num_local_procs = 0; orte_odls_launch_local_t *caddy = (orte_odls_launch_local_t*)cbdata; orte_job_t *jobdat; orte_jobid_t job = caddy->job; orte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local; bool index_argv; + char *msg; + orte_odls_spawn_caddy_t *cd; + opal_event_base_t *evb; opal_output_verbose(5, orte_odls_base_framework.framework_output, "%s local:launch", @@ -671,32 +835,65 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) goto GETOUT; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Notify the local SnapC component regarding new job - */ - if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(job) ) ) { - /* Silent Failure :/ JJH */ - ORTE_ERROR_LOG(rc); - } -#endif - -#if OPAL_ENABLE_FT_CR == 1 - for (j=0; j < jobdat->apps->size; j++) { - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) { - continue; - } - orte_sstore.fetch_app_deps(app); - } - orte_sstore.wait_all_deps(); -#endif - /* track if we are indexing argvs so we don't check every time */ index_argv = orte_get_attribute(&jobdat->attributes, ORTE_JOB_INDEX_ARGV, NULL, OPAL_BOOL); /* compute the total number of local procs currently alive and about to be launched */ total_num_local_procs = compute_num_procs_alive(job) + jobdat->num_local_procs; + /* check the system limits - if we are at our max allowed children, then + * we won't be allowed to do this anyway, so we may as well abort now. + * According to the documentation, num_procs = 0 is equivalent to + * no limit, so treat it as unlimited here. + */ + if (0 < opal_sys_limits.num_procs) { + OPAL_OUTPUT_VERBOSE((10, orte_odls_base_framework.framework_output, + "%s checking limit on num procs %d #children needed %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + opal_sys_limits.num_procs, total_num_local_procs)); + if (opal_sys_limits.num_procs < total_num_local_procs) { + if (2 < caddy->retries) { + /* if we have already tried too many times, then just give up */ + ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_FAILED_TO_LAUNCH); + goto ERROR_OUT; + } + /* set a timer event so we can retry later - this + * gives the system a chance to let other procs + * terminate, thus creating room for new ones + */ + ORTE_DETECT_TIMEOUT(1000, 1000, -1, timer_cb, caddy); + return; + } + } + + /* check to see if we have enough available file descriptors + * to launch these children - if not, then let's wait a little + * while to see if some come free. This can happen if we are + * in a tight loop over comm_spawn + */ + if (0 < opal_sys_limits.num_files) { + int limit; + limit = 4*total_num_local_procs + 6*jobdat->num_local_procs; + OPAL_OUTPUT_VERBOSE((10, orte_odls_base_framework.framework_output, + "%s checking limit on file descriptors %d need %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + opal_sys_limits.num_files, limit)); + if (opal_sys_limits.num_files < limit) { + if (2 < caddy->retries) { + /* tried enough - give up */ + child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); + goto ERROR_OUT; + } + /* don't have enough - wait a little time */ + ORTE_DETECT_TIMEOUT(1000, 1000, -1, timer_cb, caddy); + if (NULL != argvsav) { + opal_argv_free(argvsav); + } + return; + } + } + for (j=0; j < jobdat->apps->size; j++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) { continue; @@ -710,31 +907,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) continue; } - /* check the system limits - if we are at our max allowed children, then - * we won't be allowed to do this anyway, so we may as well abort now. - * According to the documentation, num_procs = 0 is equivalent to - * no limit, so treat it as unlimited here. - */ - if (0 < opal_sys_limits.num_procs) { - OPAL_OUTPUT_VERBOSE((10, orte_odls_base_framework.framework_output, - "%s checking limit on num procs %d #children needed %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - opal_sys_limits.num_procs, total_num_local_procs)); - if (opal_sys_limits.num_procs < total_num_local_procs) { - if (2 < caddy->retries) { - /* if we have already tried too many times, then just give up */ - ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_FAILED_TO_LAUNCH); - goto ERROR_OUT; - } - /* set a timer event so we can retry later - this - * gives the system a chance to let other procs - * terminate, thus creating room for new ones - */ - ORTE_DETECT_TIMEOUT(1000, 1000, -1, timer_cb, caddy); - return; - } - } - /* setup the environment for this app */ if (ORTE_SUCCESS != (rc = orte_schizo.setup_fork(jobdat, app))) { @@ -809,8 +981,30 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) goto GETOUT; } + /* tell all children that they are being launched via ORTE */ + opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &app->env); + + /* if the user requested it, set the system resource limits */ + if (OPAL_SUCCESS != (rc = opal_util_init_sys_limits(&msg))) { + orte_show_help("help-orte-odls-default.txt", "set limit", true, + orte_process_info.nodename, app, + __FILE__, __LINE__, msg); + /* cycle through children to find those for this jobid */ + for (idx=0; idx < orte_local_children->size; idx++) { + if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) { + continue; + } + if (OPAL_EQUAL == opal_dss.compare(&job, &(child->name.jobid), ORTE_JOBID) && + j == (int)child->app_idx) { + child->exit_code = rc; + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); + } + } + goto GETOUT; + } + /* okay, now let's launch all the local procs for this app using the provided fork_local fn */ - for (proc_rank = 0, idx=0; idx < orte_local_children->size; idx++) { + for (idx=0; idx < orte_local_children->size; idx++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) { continue; } @@ -859,235 +1053,56 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name))); - /* setup the pmix environment */ - if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &app->env))) { - ORTE_ERROR_LOG(rc); - continue; - } - /* tell the child that it is being launched via ORTE */ - opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &app->env); + /* set the waitpid callback here for thread protection and + * to ensure we can capture the callback on shortlived apps */ + ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); + orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); - /* ensure we clear any prior info regarding state or exit status in - * case this is a restart - */ - child->exit_code = 0; - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID); - /* if we are not forwarding output for this job, then - * flag iof as complete - */ - if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_IOF_COMPLETE); + /* dispatch this child to the next available launch thread */ + cd = OBJ_NEW(orte_odls_spawn_caddy_t); + cd->jdata = jobdat; + cd->app = app; + cd->child = child; + cd->fork_local = fork_local; + cd->index_argv = index_argv; + /* setup any IOF */ + cd->opts.usepty = OPAL_ENABLE_PTY_SUPPORT; + + /* do we want to setup stdin? */ + if (jobdat->stdin_target == ORTE_VPID_WILDCARD || + child->name.vpid == jobdat->stdin_target) { + cd->opts.connect_stdin = true; } else { - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE); + cd->opts.connect_stdin = false; } - child->pid = 0; - if (NULL != child->rml_uri) { - free(child->rml_uri); - child->rml_uri = NULL; - } - - /* check to see if we have enough available file descriptors - * to launch another child - if not, then let's wait a little - * while to see if some come free. This can happen if we are - * in a tight loop over comm_spawn - */ - if (0 < opal_sys_limits.num_files) { - int limit; - limit = 4*total_num_local_procs + 6; - OPAL_OUTPUT_VERBOSE((10, orte_odls_base_framework.framework_output, - "%s checking limit on file descriptors %d need %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - opal_sys_limits.num_files, limit)); - if (opal_sys_limits.num_files < limit) { - if (2 < caddy->retries) { - /* tried enough - give up */ - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); - continue; - } - /* don't have enough - wait a little time */ - ORTE_DETECT_TIMEOUT(1000, 1000, -1, timer_cb, caddy); - if (NULL != argvsav) { - opal_argv_free(argvsav); - } - return; - } - } - - /* did the user request we display output in xterms? */ - if (NULL != orte_xterm) { - opal_list_item_t *nmitem; - orte_namelist_t *nm; - /* see if this rank is one of those requested */ - for (nmitem = opal_list_get_first(&orte_odls_globals.xterm_ranks); - nmitem != opal_list_get_end(&orte_odls_globals.xterm_ranks); - nmitem = opal_list_get_next(nmitem)) { - nm = (orte_namelist_t*)nmitem; - if (ORTE_VPID_WILDCARD == nm->name.vpid || - child->name.vpid == nm->name.vpid) { - /* we want this one - modify the app's command to include - * the orte xterm cmd. Need to be careful, though, that we - * don't modify the app for ALL ranks that use it! So we - * will create a copy of the argv so we can restore it later - */ - argvsav = opal_argv_copy(app->argv); - /* free the argv */ - opal_argv_free(app->argv); - app->argv = NULL; - /* now create a new one that starts with the xtermcmd */ - for (inm=0; inm < opal_argv_count(orte_odls_globals.xtermcmd); inm++) { - opal_argv_append_nosize(&app->argv, orte_odls_globals.xtermcmd[inm]); - } - /* insert the rank into the correct place as a window title */ - free(app->argv[2]); - asprintf(&app->argv[2], "Rank %s", ORTE_VPID_PRINT(child->name.vpid)); - /* add back the original argv */ - for (inm=0; inm < opal_argv_count(argvsav); inm++) { - opal_argv_append_nosize(&app->argv, argvsav[inm]); - } - /* the app exe name itself is in the argvsav array, so - * we can recover it from there later - */ - free(app->app); - app->app = strdup(orte_odls_globals.xtermcmd[0]); - break; - } else if (jobdat->num_procs <= nm->name.vpid) { /* check for bozo case */ - /* can't be done! */ - orte_show_help("help-orte-odls-base.txt", - "orte-odls-base:xterm-rank-out-of-bounds", - true, nm->name.vpid, jobdat->num_procs); - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); - continue; - } - - } - } else if (NULL != orte_fork_agent) { - /* we were given a fork agent - use it */ - argvsav = opal_argv_copy(app->argv); - /* free the argv */ - opal_argv_free(app->argv); - app->argv = NULL; - /* now create a new one that starts with the fork agent */ - app->argv = opal_argv_copy(orte_fork_agent); - /* add back the original argv */ - for (inm=0; NULL != argvsav[inm]; inm++) { - opal_argv_append_nosize(&app->argv, argvsav[inm]); - } - /* the app exe name itself is in the argvsav array, so - * we can recover it from there later - */ - free(app->app); - app->app = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL); - if (NULL == app->app) { - orte_show_help("help-orte-odls-base.txt", - "orte-odls-base:fork-agent-not-found", - true, orte_process_info.nodename, orte_fork_agent[0]); - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); - continue; - } - } - - /* setup the rest of the environment with the proc-specific items - these - * will be overwritten for each child - */ - if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app))) { + if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&cd->opts))) { ORTE_ERROR_LOG(rc); child->exit_code = rc; + OBJ_RELEASE(cd); ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); - continue; + goto GETOUT; } - -#if OPAL_ENABLE_FT_CR == 1 - /* - * OPAL CRS components need the opportunity to take action before a process - * is forked. - * Needs access to: - * - Environment - * - Rank/ORTE Name - * - Binary to exec - */ - if( NULL != opal_crs.crs_prelaunch ) { - if( OPAL_SUCCESS != (rc = opal_crs.crs_prelaunch(child->name.vpid, - orte_sstore_base_prelaunch_location, - &(app->app), - &(app->cwd), - &(app->argv), - &(app->env) ) ) ) { + if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + /* connect endpoints IOF */ + rc = orte_iof_base_setup_parent(&child->name, &cd->opts); + if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); - child->exit_code = rc; + OBJ_RELEASE(cd); ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); - continue; + goto GETOUT; } } -#endif - /* if we are indexing the argv by rank, do so now */ - if (index_argv) { - char *param; - asprintf(¶m, "%s-%d", app->argv[0], (int)child->name.vpid); - free(app->argv[0]); - app->argv[0] = param; + ++orte_odls_globals.next_base; + if (orte_odls_globals.num_threads <= orte_odls_globals.next_base) { + orte_odls_globals.next_base = 0; } + evb = orte_odls_globals.ev_bases[orte_odls_globals.next_base]; + opal_event_set(evb, &cd->ev, -1, + OPAL_EV_WRITE, orte_odls_base_spawn_proc, cd); + opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); + opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); - if (5 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { - opal_output(orte_odls_base_framework.framework_output, "%s odls:launch spawning child %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name)); - - /* dump what is going to be exec'd */ - if (7 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { - opal_dss.dump(orte_odls_base_framework.framework_output, app, ORTE_APP_CONTEXT); - } - } - - if (ORTE_SUCCESS != (rc = fork_local(app, child, app->env, jobdat))) { - child->exit_code = rc; /* error message already output */ - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); - continue; - } - orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); - /* if we indexed the argv, we need to restore it to - * its original form - */ - if (index_argv) { - /* restore the argv[0] */ - char *param; - if (NULL == (param = strrchr(app->argv[0], '-'))) { - child->exit_code = ORTE_ERR_NOT_FOUND; - rc = ORTE_ERR_NOT_FOUND; - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); - continue; - } - *param = '\0'; - } - if (ORTE_SUCCESS != rc) { - /* do NOT ERROR_LOG this error - it generates - * a message/node as most errors will be common - * across the entire cluster. Instead, we let orterun - * output a consolidated error message for us - */ - child->exit_code = rc; /* error message already output */ - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); - continue; - } else { - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_RUNNING); - } - /* move to next processor */ - proc_rank++; - /* reset the exe name, if necessary */ - if (NULL != argvsav) { - /* release the current argv array */ - opal_argv_free(app->argv); - /* restore the original one */ - app->argv = argvsav; - argvsav = NULL; - /* the app exe name itself is now in the argv[0] posn */ - free(app->app); - app->app = strdup(app->argv[0]); - } - } /* complete launching all children for this app */ + } /* reset our working directory back to our default location - if we * don't do this, then we will be looking for relative paths starting * from the last wdir option specified by the user. Thus, we would @@ -1097,18 +1112,15 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) */ chdir(basedir); } - if (NULL != argvsav) { - opal_argv_free(argvsav); - } - GETOUT: + GETOUT: /* tell the state machine that all local procs for this job * were launched so that it can do whatever it needs to do, * like send a state update message for all procs to the HNP */ ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE); - ERROR_OUT: + ERROR_OUT: /* ensure we reset our working directory back to our default location */ chdir(basedir); /* release the event */ @@ -1147,10 +1159,10 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i } /* we want it sent to some specified process, so find it */ - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } + for (i=0; i < orte_local_children->size; i++) { + if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { + continue; + } if (OPAL_EQUAL == opal_dss.compare(&(child->name), (orte_process_name_t*)proc, ORTE_NAME)) { if (ORTE_SUCCESS != (rc = signal_local(child->pid, (int)signal))) { ORTE_ERROR_LOG(rc); @@ -1659,6 +1671,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, orte_app_context_t *app; orte_job_t *jobdat; char basedir[MAXPATHLEN]; + orte_iof_base_io_conf_t opts; OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:restart_proc for proc %s", @@ -1690,7 +1703,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, child->app_idx); /* reset envars to match this child */ - if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app))) { + if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &app->env))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -1701,12 +1714,24 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, goto CLEANUP; } + /* setup any IOF */ + memset(&opts, 0, sizeof(orte_iof_base_io_conf_t)); + if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + /* connect endpoints IOF */ + rc = orte_iof_base_setup_parent(&child->name, &opts); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); + goto CLEANUP; + } + } + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s restarting app %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->app)); orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); - if (ORTE_SUCCESS != (rc = fork_local(app, child, app->env, jobdat))) { + if (ORTE_SUCCESS != (rc = fork_local(child, app->app, app->argv, app->env, jobdat, opts))) { orte_wait_cb_cancel(child); child->exit_code = ORTE_ERR_SILENT; /* error message already output */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); diff --git a/orte/mca/odls/base/odls_base_frame.c b/orte/mca/odls/base/odls_base_frame.c index 593a147028..c161ae731b 100644 --- a/orte/mca/odls/base/odls_base_frame.c +++ b/orte/mca/odls/base/odls_base_frame.c @@ -15,6 +15,7 @@ * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,6 +33,7 @@ #include "orte/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/hwloc/hwloc-internal.h" +#include "opal/runtime/opal_progress_threads.h" #include "opal/util/output.h" #include "opal/util/path.h" #include "opal/util/argv.h" @@ -76,6 +78,14 @@ static int orte_odls_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &orte_odls_globals.timeout_before_sigkill); + orte_odls_globals.num_threads = 0; + (void) mca_base_var_register("orte", "odls", "base", "num_threads", + "Number of threads to use for spawning local procs", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &orte_odls_globals.num_threads); + return ORTE_SUCCESS; } @@ -99,6 +109,15 @@ static int orte_odls_base_close(void) } OBJ_RELEASE(orte_local_children); + if (0 < orte_odls_globals.num_threads) { + /* stop the progress threads */ + for (i=0; NULL != orte_odls_globals.ev_threads[i]; i++) { + opal_progress_thread_finalize(orte_odls_globals.ev_threads[i]); + } + } + free(orte_odls_globals.ev_bases); + opal_argv_free(orte_odls_globals.ev_threads); + return mca_base_framework_components_close(&orte_odls_base_framework, NULL); } @@ -174,6 +193,25 @@ static int orte_odls_base_open(mca_base_open_flag_t flags) opal_argv_append_nosize(&orte_odls_globals.xtermcmd, "-e"); } + /* setup the pool of worker threads */ + orte_odls_globals.ev_threads = NULL; + orte_odls_globals.next_base = 0; + if (0 == orte_odls_globals.num_threads) { + orte_odls_globals.ev_bases = (opal_event_base_t**)malloc(sizeof(opal_event_base_t*)); + /* use the default event base */ + orte_odls_globals.ev_bases[0] = orte_event_base; + } else { + orte_odls_globals.ev_bases = + (opal_event_base_t**)malloc(orte_odls_globals.num_threads * sizeof(opal_event_base_t*)); + for (i=0; i < orte_odls_globals.num_threads; i++) { + asprintf(&tmp, "ORTE-ODLS-%d", i); + orte_odls_globals.ev_bases[i] = opal_progress_thread_init(tmp); + opal_argv_append_nosize(&orte_odls_globals.ev_threads, tmp); + free(tmp); + } + + } + /* Open up all available components */ return mca_base_framework_components_open(&orte_odls_base_framework, flags); } @@ -197,3 +235,11 @@ OBJ_CLASS_INSTANCE(orte_odls_launch_local_t, opal_object_t, launch_local_const, launch_local_dest); + +static void sccon(orte_odls_spawn_caddy_t *p) +{ + memset(&p->opts, 0, sizeof(orte_iof_base_io_conf_t)); +} +OBJ_CLASS_INSTANCE(orte_odls_spawn_caddy_t, + opal_object_t, + sccon, NULL); diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index 2a26df98cc..e5e93a8c64 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -12,7 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,6 +36,7 @@ #include "opal/class/opal_bitmap.h" #include "opal/dss/dss_types.h" +#include "orte/mca/iof/base/iof_base_setup.h" #include "orte/mca/rml/rml_types.h" #include "orte/runtime/orte_globals.h" @@ -56,11 +57,15 @@ typedef struct { opal_list_t xterm_ranks; /* the xterm cmd to be used */ char **xtermcmd; + /* thread pool */ + int num_threads; + opal_event_base_t **ev_bases; // event base array for progress threads + char** ev_threads; // event progress thread names + int next_base; // counter to load-level thread use } orte_odls_globals_t; ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals; - /* * Default functions that are common to most environments - can * be overridden by specific environments if they need something @@ -74,11 +79,27 @@ ORTE_DECLSPEC int orte_odls_base_default_construct_child_list(opal_buffer_t *data, orte_jobid_t *job); +ORTE_DECLSPEC void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata); + /* define a function that will fork a local proc */ -typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_app_context_t *context, - orte_proc_t *child, +typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_proc_t *child, + char *app, char **argv, char **environ_copy, - orte_job_t *jdata); + orte_job_t *jdata, + orte_iof_base_io_conf_t opts); + +/* define an object for fork/exec the local proc */ +typedef struct { + opal_object_t super; + opal_event_t ev; + orte_job_t *jdata; + orte_app_context_t *app; + orte_proc_t *child; + bool index_argv; + orte_iof_base_io_conf_t opts; + orte_odls_base_fork_local_proc_fn_t fork_local; +} orte_odls_spawn_caddy_t; +OBJ_CLASS_DECLARATION(orte_odls_spawn_caddy_t); /* define an object for starting local launch */ typedef struct { diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 1641457c38..0e1683e1c9 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -144,8 +144,9 @@ static int orte_odls_default_restart_proc(orte_proc_t *child); static void send_error_show_help(int fd, int exit_status, const char *file, const char *topic, ...) __opal_attribute_noreturn__; -static int do_child(orte_app_context_t* context, - orte_proc_t *child, + +static int do_child(orte_proc_t *child, + char *cmd, char **argv, char **environ_copy, orte_job_t *jobdat, int write_fd, orte_iof_base_io_conf_t opts) @@ -318,16 +319,15 @@ static int close_open_file_descriptors(int write_fd, return ORTE_SUCCESS; } -static int do_child(orte_app_context_t* context, - orte_proc_t *child, +static int do_child(orte_proc_t *child, + char *app, char **argv, char **environ_copy, orte_job_t *jobdat, int write_fd, orte_iof_base_io_conf_t opts) { - int i, rc; + int i; sigset_t sigs; long fd, fdmax = sysconf(_SC_OPEN_MAX); - char *param, *msg; #if HAVE_SETPGID /* Set a new process group for this child, so that any @@ -359,7 +359,7 @@ static int do_child(orte_app_context_t* context, send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "iof setup failed", - orte_process_info.nodename, context->app); + orte_process_info.nodename, app); /* Does not return */ } } @@ -384,18 +384,6 @@ static int do_child(orte_app_context_t* context, close(fdnull); } - /* if the user requested it, set the system resource limits */ - if (OPAL_SUCCESS != (rc = opal_util_init_sys_limits(&msg))) { - send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", - "set limit", - orte_process_info.nodename, context->app, - __FILE__, __LINE__, msg); - } - /* ensure we only do this once */ - (void) mca_base_var_env_name("opal_set_max_sys_limits", ¶m); - opal_unsetenv(param, &environ_copy); - free(param); - /* close all open file descriptors w/ exception of stdin/stdout/stderr, the pipe used for the IOF INTERNAL messages, and the pipe up to the parent. */ @@ -408,10 +396,10 @@ static int do_child(orte_app_context_t* context, } } - if (context->argv == NULL) { - context->argv = malloc(sizeof(char*)*2); - context->argv[0] = strdup(context->app); - context->argv[1] = NULL; + if (argv == NULL) { + argv = malloc(sizeof(char*)*2); + argv[0] = strdup(app); + argv[1] = NULL; } /* Set signal handlers back to the default. Do this close to @@ -436,16 +424,16 @@ static int do_child(orte_app_context_t* context, /* Exec the new executable */ - execve(context->app, context->argv, environ_copy); + execve(app, argv, environ_copy); send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "execve error", - orte_process_info.nodename, context->app, strerror(errno)); + orte_process_info.nodename, app, strerror(errno)); /* Does not return */ } -static int do_parent(orte_app_context_t* context, - orte_proc_t *child, +static int do_parent(orte_proc_t *child, + char *app, char **argv, char **environ_copy, orte_job_t *jobdat, int read_fd, orte_iof_base_io_conf_t opts) @@ -454,19 +442,10 @@ static int do_parent(orte_app_context_t* context, orte_odls_pipe_err_msg_t msg; char file[ORTE_ODLS_MAX_FILE_LEN + 1], topic[ORTE_ODLS_MAX_TOPIC_LEN + 1], *str = NULL; - if (NULL != child && ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { - /* connect endpoints IOF */ - rc = orte_iof_base_setup_parent(&child->name, &opts); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - close(read_fd); - - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; - } - return rc; - } - } + close(opts.p_stdin[0]); + close(opts.p_stdout[1]); + close(opts.p_stderr[1]); + close(opts.p_internal[1]); /* Block reading a message from the pipe */ while (1) { @@ -503,7 +482,7 @@ static int do_parent(orte_app_context_t* context, if (OPAL_SUCCESS != rc) { orte_show_help("help-orte-odls-default.txt", "syscall fail", true, - orte_process_info.nodename, context->app, + orte_process_info.nodename, app, "opal_fd_read", __FILE__, __LINE__); if (NULL != child) { child->state = ORTE_PROC_STATE_UNDEF; @@ -517,7 +496,7 @@ static int do_parent(orte_app_context_t* context, if (OPAL_SUCCESS != rc) { orte_show_help("help-orte-odls-default.txt", "syscall fail", true, - orte_process_info.nodename, context->app, + orte_process_info.nodename, app, "opal_fd_read", __FILE__, __LINE__); if (NULL != child) { child->state = ORTE_PROC_STATE_UNDEF; @@ -531,7 +510,7 @@ static int do_parent(orte_app_context_t* context, if (NULL == str) { orte_show_help("help-orte-odls-default.txt", "syscall fail", true, - orte_process_info.nodename, context->app, + orte_process_info.nodename, app, "opal_fd_read", __FILE__, __LINE__); if (NULL != child) { child->state = ORTE_PROC_STATE_UNDEF; @@ -580,39 +559,16 @@ static int do_parent(orte_app_context_t* context, /** * Fork/exec the specified processes */ -static int odls_default_fork_local_proc(orte_app_context_t* context, - orte_proc_t *child, +static int odls_default_fork_local_proc(orte_proc_t *child, + char *app, + char **argv, char **environ_copy, - orte_job_t *jobdat) + orte_job_t *jobdat, + orte_iof_base_io_conf_t opts) { - orte_iof_base_io_conf_t opts = {0}; - int rc, p[2]; + int p[2]; pid_t pid; - if (NULL != child) { - /* should pull this information from MPIRUN instead of going with - default */ - opts.usepty = OPAL_ENABLE_PTY_SUPPORT; - - /* do we want to setup stdin? */ - if (NULL != child && - (jobdat->stdin_target == ORTE_VPID_WILDCARD || - child->name.vpid == jobdat->stdin_target)) { - opts.connect_stdin = true; - } else { - opts.connect_stdin = false; - } - - if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&opts))) { - ORTE_ERROR_LOG(rc); - if (NULL != child) { - child->state = ORTE_PROC_STATE_FAILED_TO_START; - child->exit_code = rc; - } - return rc; - } - } - /* A pipe is used to communicate between the parent and child to indicate whether the exec ultimately succeeded or failed. The child sets the pipe to be close-on-exec; the child only ever @@ -647,12 +603,12 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, if (pid == 0) { close(p[0]); - do_child(context, child, environ_copy, jobdat, p[1], opts); + do_child(child, app, argv, environ_copy, jobdat, p[1], opts); /* Does not return */ } close(p[1]); - return do_parent(context, child, environ_copy, jobdat, p[0], opts); + return do_parent(child, app, argv, environ_copy, jobdat, p[0], opts); } diff --git a/orte/mca/schizo/base/base.h b/orte/mca/schizo/base/base.h index 265d11569d..ad5d9ffc63 100644 --- a/orte/mca/schizo/base/base.h +++ b/orte/mca/schizo/base/base.h @@ -73,7 +73,8 @@ ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata, orte_app_context_t *context); ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat, orte_proc_t *child, - orte_app_context_t *app); + orte_app_context_t *app, + char ***env); ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void); ORTE_DECLSPEC long orte_schizo_base_get_remaining_time(void); ORTE_DECLSPEC void orte_schizo_base_finalize(void); diff --git a/orte/mca/schizo/base/schizo_base_stubs.c b/orte/mca/schizo/base/schizo_base_stubs.c index b9ab76511b..173ca1c2bf 100644 --- a/orte/mca/schizo/base/schizo_base_stubs.c +++ b/orte/mca/schizo/base/schizo_base_stubs.c @@ -128,14 +128,15 @@ int orte_schizo_base_setup_fork(orte_job_t *jdata, int orte_schizo_base_setup_child(orte_job_t *jdata, orte_proc_t *child, - orte_app_context_t *app) + orte_app_context_t *app, + char ***env) { int rc; orte_schizo_base_active_module_t *mod; OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { if (NULL != mod->module->setup_child) { - rc = mod->module->setup_child(jdata, child, app); + rc = mod->module->setup_child(jdata, child, app, env); if (ORTE_SUCCESS != rc && ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); return rc; diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 7b3cc72c63..1bd42f4e43 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -61,7 +61,8 @@ static int setup_fork(orte_job_t *jdata, orte_app_context_t *context); static int setup_child(orte_job_t *jobdat, orte_proc_t *child, - orte_app_context_t *app); + orte_app_context_t *app, + char ***env); orte_schizo_base_module_t orte_schizo_ompi_module = { .define_cli = define_cli, @@ -992,7 +993,8 @@ static int setup_fork(orte_job_t *jdata, static int setup_child(orte_job_t *jdata, orte_proc_t *child, - orte_app_context_t *app) + orte_app_context_t *app, + char ***env) { char *param, *value; int rc, i; @@ -1026,7 +1028,7 @@ static int setup_child(orte_job_t *jdata, ORTE_ERROR_LOG(rc); return rc; } - opal_setenv("OMPI_MCA_ess_base_jobid", value, true, &app->env); + opal_setenv("OMPI_MCA_ess_base_jobid", value, true, env); free(value); /* setup the vpid */ @@ -1034,7 +1036,7 @@ static int setup_child(orte_job_t *jdata, ORTE_ERROR_LOG(rc); return rc; } - opal_setenv("OMPI_MCA_ess_base_vpid", value, true, &app->env); + opal_setenv("OMPI_MCA_ess_base_vpid", value, true, env); /* although the vpid IS the process' rank within the job, users * would appreciate being given a public environmental variable @@ -1044,7 +1046,7 @@ static int setup_child(orte_job_t *jdata, * AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT. * We know - just live with it */ - opal_setenv("OMPI_COMM_WORLD_RANK", value, true, &app->env); + opal_setenv("OMPI_COMM_WORLD_RANK", value, true, env); free(value); /* done with this now */ /* users would appreciate being given a public environmental variable @@ -1060,7 +1062,7 @@ static int setup_child(orte_job_t *jdata, return rc; } asprintf(&value, "%lu", (unsigned long) child->local_rank); - opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env); + opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, env); free(value); /* users would appreciate being given a public environmental variable @@ -1076,9 +1078,9 @@ static int setup_child(orte_job_t *jdata, return rc; } asprintf(&value, "%lu", (unsigned long) child->node_rank); - opal_setenv("OMPI_COMM_WORLD_NODE_RANK", value, true, &app->env); + opal_setenv("OMPI_COMM_WORLD_NODE_RANK", value, true, env); /* set an mca param for it too */ - opal_setenv("OMPI_MCA_orte_ess_node_rank", value, true, &app->env); + opal_setenv("OMPI_MCA_orte_ess_node_rank", value, true, env); free(value); /* provide the identifier for the PMIx connection - the @@ -1087,7 +1089,7 @@ static int setup_child(orte_job_t *jdata, * process name are the same, it isn't necessarily * required */ orte_util_convert_process_name_to_string(&value, &child->name); - opal_setenv("PMIX_ID", value, true, &app->env); + opal_setenv("PMIX_ID", value, true, env); free(value); nrptr = &nrestarts; @@ -1097,14 +1099,14 @@ static int setup_child(orte_job_t *jdata, * restarted so they can take appropriate action */ asprintf(&value, "%d", nrestarts); - opal_setenv("OMPI_MCA_orte_num_restarts", value, true, &app->env); + opal_setenv("OMPI_MCA_orte_num_restarts", value, true, env); free(value); } /* if the proc should not barrier in orte_init, tell it */ if (orte_get_attribute(&child->attributes, ORTE_PROC_NOBARRIER, NULL, OPAL_BOOL) || 0 < nrestarts) { - opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env); + opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, env); } /* if the proc isn't going to forward IO, then we need to flag that @@ -1116,7 +1118,7 @@ static int setup_child(orte_job_t *jdata, /* pass an envar so the proc can find any files it had prepositioned */ param = orte_process_info.proc_session_dir; - opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env); + opal_setenv("OMPI_FILE_LOCATION", param, true, env); /* if the user wanted the cwd to be the proc's session dir, then * switch to that location now @@ -1144,9 +1146,9 @@ static int setup_child(orte_job_t *jdata, * again not match getcwd! This is beyond our control - we are only * ensuring they start out matching. */ - opal_setenv("PWD", param, true, &app->env); + opal_setenv("PWD", param, true, env); /* update the initial wdir value too */ - opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env); + opal_setenv("OMPI_MCA_initial_wdir", param, true, env); } return ORTE_SUCCESS; } diff --git a/orte/mca/schizo/schizo.h b/orte/mca/schizo/schizo.h index 8d27770ebe..77b1782fc2 100644 --- a/orte/mca/schizo/schizo.h +++ b/orte/mca/schizo/schizo.h @@ -88,7 +88,8 @@ typedef int (*orte_schizo_base_module_setup_fork_fn_t)(orte_job_t *jdata, * proc upon execution */ typedef int (*orte_schizo_base_module_setup_child_fn_t)(orte_job_t *jdata, orte_proc_t *child, - orte_app_context_t *app); + orte_app_context_t *app, + char ***env); typedef enum {