2017-09-13 07:40:41 +03:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved
|
|
|
|
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
|
|
|
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
2018-01-12 06:03:32 +03:00
|
|
|
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
2017-09-13 07:40:41 +03:00
|
|
|
* Copyright (c) 2015 Research Organization for Information Science
|
|
|
|
* and Technology (RIST). All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "orte/constants.h"
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#ifdef HAVE_STRINGS_H
|
|
|
|
#include <strings.h>
|
|
|
|
#endif /* HAVE_STRINGS_H */
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_PARAM_H
|
|
|
|
#include <sys/param.h>
|
|
|
|
#endif
|
|
|
|
#include <errno.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#ifdef HAVE_SYS_TYPES_H
|
|
|
|
#include <sys/types.h>
|
|
|
|
#endif /* HAVE_SYS_TYPES_H */
|
|
|
|
#ifdef HAVE_SYS_WAIT_H
|
|
|
|
#include <sys/wait.h>
|
|
|
|
#endif /* HAVE_SYS_WAIT_H */
|
|
|
|
#ifdef HAVE_SYS_TIME_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#endif /* HAVE_SYS_TIME_H */
|
|
|
|
#include <fcntl.h>
|
|
|
|
#ifdef HAVE_SYS_STAT_H
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "opal/mca/event/event.h"
|
|
|
|
#include "opal/mca/installdirs/installdirs.h"
|
|
|
|
#include "opal/mca/pmix/base/base.h"
|
|
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "opal/util/argv.h"
|
|
|
|
#include "opal/util/output.h"
|
|
|
|
#include "opal/util/basename.h"
|
|
|
|
#include "opal/util/cmd_line.h"
|
|
|
|
#include "opal/util/opal_environ.h"
|
|
|
|
#include "opal/util/opal_getcwd.h"
|
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "opal/util/fd.h"
|
|
|
|
#include "opal/sys/atomic.h"
|
|
|
|
|
|
|
|
#include "opal/version.h"
|
|
|
|
#include "opal/runtime/opal.h"
|
|
|
|
#include "opal/runtime/opal_info_support.h"
|
2017-09-15 16:50:58 +03:00
|
|
|
#include "opal/runtime/opal_progress_threads.h"
|
2017-09-13 07:40:41 +03:00
|
|
|
#include "opal/util/os_path.h"
|
|
|
|
#include "opal/util/path.h"
|
|
|
|
#include "opal/class/opal_pointer_array.h"
|
|
|
|
#include "opal/dss/dss.h"
|
|
|
|
|
2017-09-15 16:50:58 +03:00
|
|
|
#include "orte/runtime/runtime.h"
|
|
|
|
#include "orte/runtime/orte_globals.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
2017-09-21 20:26:41 +03:00
|
|
|
#include "orte/mca/schizo/base/base.h"
|
2017-09-15 16:50:58 +03:00
|
|
|
#include "orte/mca/state/state.h"
|
2017-09-21 20:26:41 +03:00
|
|
|
#include "orte/orted/orted_submit.h"
|
2017-09-15 16:50:58 +03:00
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
/* ensure I can behave like a daemon */
|
|
|
|
#include "prun.h"
|
|
|
|
|
2018-02-23 20:57:19 +03:00
|
|
|
typedef struct {
|
|
|
|
opal_object_t super;
|
|
|
|
opal_pmix_lock_t lock;
|
|
|
|
opal_list_t info;
|
|
|
|
} myinfo_t;
|
|
|
|
static void mcon(myinfo_t *p)
|
|
|
|
{
|
|
|
|
OPAL_PMIX_CONSTRUCT_LOCK(&p->lock);
|
|
|
|
OBJ_CONSTRUCT(&p->info, opal_list_t);
|
|
|
|
}
|
|
|
|
static void mdes(myinfo_t *p)
|
|
|
|
{
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&p->lock);
|
|
|
|
OPAL_LIST_DESTRUCT(&p->info);
|
|
|
|
}
|
|
|
|
static OBJ_CLASS_INSTANCE(myinfo_t, opal_object_t,
|
|
|
|
mcon, mdes);
|
|
|
|
|
2017-09-21 20:26:41 +03:00
|
|
|
static struct {
|
2017-09-13 07:40:41 +03:00
|
|
|
bool terminate_dvm;
|
2017-09-19 22:30:34 +03:00
|
|
|
bool system_server_first;
|
2017-09-21 20:26:41 +03:00
|
|
|
bool system_server_only;
|
|
|
|
int pid;
|
|
|
|
} myoptions;
|
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
static opal_list_t job_info;
|
2017-09-15 16:50:58 +03:00
|
|
|
static volatile bool active = false;
|
2018-01-12 06:03:32 +03:00
|
|
|
static orte_jobid_t myjobid = ORTE_JOBID_INVALID;
|
2018-02-23 20:57:19 +03:00
|
|
|
static myinfo_t myinfo;
|
2017-09-13 07:40:41 +03:00
|
|
|
|
|
|
|
static int create_app(int argc, char* argv[],
|
|
|
|
opal_list_t *jdata,
|
|
|
|
opal_pmix_app_t **app,
|
|
|
|
bool *made_app, char ***app_env);
|
|
|
|
static int parse_locals(opal_list_t *jdata, int argc, char* argv[]);
|
|
|
|
static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfile);
|
2017-10-06 20:04:30 +03:00
|
|
|
static size_t evid = INT_MAX;
|
2017-09-13 07:40:41 +03:00
|
|
|
|
|
|
|
|
|
|
|
static opal_cmd_line_init_t cmd_line_init[] = {
|
|
|
|
/* tell the dvm to terminate */
|
|
|
|
{ NULL, '\0', "terminate", "terminate", 0,
|
2017-09-21 20:26:41 +03:00
|
|
|
&myoptions.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL,
|
2017-09-13 07:40:41 +03:00
|
|
|
"Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
|
2017-09-19 22:30:34 +03:00
|
|
|
/* look first for a system server */
|
|
|
|
{ NULL, '\0', "system-server-first", "system-server-first", 0,
|
2017-09-21 20:26:41 +03:00
|
|
|
&myoptions.system_server_first, OPAL_CMD_LINE_TYPE_BOOL,
|
2017-09-19 22:30:34 +03:00
|
|
|
"First look for a system server and connect to it if found", OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
|
|
|
|
/* connect only to a system server */
|
|
|
|
{ NULL, '\0', "system-server-only", "system-server-only", 0,
|
2017-09-21 20:26:41 +03:00
|
|
|
&myoptions.system_server_only, OPAL_CMD_LINE_TYPE_BOOL,
|
2017-09-19 22:30:34 +03:00
|
|
|
"Connect only to a system-level server", OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
|
|
|
|
/* provide a connection PID */
|
|
|
|
{ NULL, '\0', "pid", "pid", 1,
|
2017-09-21 20:26:41 +03:00
|
|
|
&myoptions.pid, OPAL_CMD_LINE_TYPE_INT,
|
2017-09-19 22:30:34 +03:00
|
|
|
"PID of the session-level daemon to which we should connect",
|
|
|
|
OPAL_CMD_LINE_OTYPE_DVM },
|
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
/* End of list */
|
|
|
|
{ NULL, '\0', NULL, NULL, 0,
|
|
|
|
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-09-15 16:50:58 +03:00
|
|
|
static void infocb(int status,
|
|
|
|
opal_list_t *info,
|
2017-09-13 07:40:41 +03:00
|
|
|
void *cbdata,
|
2017-09-15 16:50:58 +03:00
|
|
|
opal_pmix_release_cbfunc_t release_fn,
|
2017-09-13 07:40:41 +03:00
|
|
|
void *release_cbdata)
|
|
|
|
{
|
|
|
|
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
|
|
|
OPAL_ACQUIRE_OBJECT(lock);
|
|
|
|
|
|
|
|
if (NULL != release_fn) {
|
|
|
|
release_fn(release_cbdata);
|
|
|
|
}
|
|
|
|
OPAL_PMIX_WAKEUP_THREAD(lock);
|
|
|
|
}
|
|
|
|
|
2017-09-15 16:50:58 +03:00
|
|
|
static void regcbfunc(int status, size_t ref, void *cbdata)
|
2017-09-13 07:40:41 +03:00
|
|
|
{
|
|
|
|
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
|
|
|
OPAL_ACQUIRE_OBJECT(lock);
|
2017-10-06 20:04:30 +03:00
|
|
|
evid = ref;
|
2017-09-13 07:40:41 +03:00
|
|
|
OPAL_PMIX_WAKEUP_THREAD(lock);
|
|
|
|
}
|
|
|
|
|
2017-10-06 20:04:30 +03:00
|
|
|
static void opcbfunc(int status, void *cbdata)
|
2017-09-15 16:50:58 +03:00
|
|
|
{
|
2017-10-06 20:04:30 +03:00
|
|
|
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
|
|
|
OPAL_ACQUIRE_OBJECT(lock);
|
|
|
|
OPAL_PMIX_WAKEUP_THREAD(lock);
|
2017-09-15 16:50:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool fired = false;
|
|
|
|
static void evhandler(int status,
|
|
|
|
const opal_process_name_t *source,
|
|
|
|
opal_list_t *info, opal_list_t *results,
|
|
|
|
opal_pmix_notification_complete_fn_t cbfunc,
|
2017-09-13 07:40:41 +03:00
|
|
|
void *cbdata)
|
|
|
|
{
|
2017-09-15 16:50:58 +03:00
|
|
|
opal_value_t *val;
|
2017-10-10 20:08:30 +03:00
|
|
|
int jobstatus=0;
|
|
|
|
orte_jobid_t jobid = ORTE_JOBID_INVALID;
|
2017-09-14 21:48:56 +03:00
|
|
|
|
2018-01-12 06:03:32 +03:00
|
|
|
/* we should always have info returned to us - if not, there is
|
|
|
|
* nothing we can do */
|
|
|
|
if (NULL != info) {
|
2017-09-15 16:50:58 +03:00
|
|
|
OPAL_LIST_FOREACH(val, info, opal_value_t) {
|
|
|
|
if (0 == strcmp(val->key, OPAL_PMIX_JOB_TERM_STATUS)) {
|
2017-10-10 20:08:30 +03:00
|
|
|
jobstatus = val->data.integer;
|
|
|
|
} else if (0 == strcmp(val->key, OPAL_PMIX_PROCID)) {
|
|
|
|
jobid = val->data.name.jobid;
|
2017-09-14 21:48:56 +03:00
|
|
|
}
|
|
|
|
}
|
2018-01-12 06:03:32 +03:00
|
|
|
if (orte_cmd_options.verbose && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) {
|
|
|
|
opal_output(0, "JOB %s COMPLETED WITH STATUS %d",
|
|
|
|
ORTE_JOBID_PRINT(jobid), jobstatus);
|
|
|
|
}
|
2017-09-14 21:48:56 +03:00
|
|
|
}
|
2018-01-12 06:03:32 +03:00
|
|
|
|
|
|
|
/* only terminate if this was our job - keep in mind that we
|
|
|
|
* can get notifications of job termination prior to our spawn
|
|
|
|
* having completed! */
|
|
|
|
if (!fired && (myjobid != ORTE_JOBID_INVALID && jobid == myjobid)) {
|
2017-09-15 16:50:58 +03:00
|
|
|
fired = true;
|
2017-10-06 20:04:30 +03:00
|
|
|
active = false;
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
2018-01-26 23:09:24 +03:00
|
|
|
|
|
|
|
/* we _always_ have to execute the evhandler callback or
|
|
|
|
* else the event progress engine will hang */
|
|
|
|
if (NULL != cbfunc) {
|
|
|
|
cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata);
|
|
|
|
}
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
|
2018-02-23 20:57:19 +03:00
|
|
|
typedef struct {
|
|
|
|
opal_pmix_lock_t lock;
|
|
|
|
opal_list_t list;
|
|
|
|
} mylock_t;
|
|
|
|
|
|
|
|
|
|
|
|
static void setupcbfunc(int status,
|
|
|
|
opal_list_t *info,
|
|
|
|
void *provided_cbdata,
|
|
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
|
|
|
{
|
|
|
|
mylock_t *mylock = (mylock_t*)provided_cbdata;
|
|
|
|
opal_value_t *kv;
|
|
|
|
|
|
|
|
if (NULL != info) {
|
|
|
|
/* cycle across the provided info */
|
|
|
|
while (NULL != (kv = (opal_value_t*)opal_list_remove_first(info))) {
|
|
|
|
opal_list_append(&mylock->list, &kv->super);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* release the caller */
|
|
|
|
if (NULL != cbfunc) {
|
|
|
|
cbfunc(OPAL_SUCCESS, cbdata);
|
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_PMIX_WAKEUP_THREAD(&mylock->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void launchhandler(int status,
|
|
|
|
const opal_process_name_t *source,
|
|
|
|
opal_list_t *info, opal_list_t *results,
|
|
|
|
opal_pmix_notification_complete_fn_t cbfunc,
|
|
|
|
void *cbdata)
|
|
|
|
{
|
|
|
|
opal_value_t *p;
|
|
|
|
|
|
|
|
/* the info list will include the launch directives, so
|
|
|
|
* transfer those to the myinfo_t for return to the main thread */
|
|
|
|
while (NULL != (p = (opal_value_t*)opal_list_remove_first(info))) {
|
|
|
|
opal_list_append(&myinfo.info, &p->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we _always_ have to execute the evhandler callback or
|
|
|
|
* else the event progress engine will hang */
|
|
|
|
if (NULL != cbfunc) {
|
|
|
|
cbfunc(OPAL_SUCCESS, NULL, NULL, NULL, cbdata);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now release the thread */
|
|
|
|
OPAL_PMIX_WAKEUP_THREAD(&myinfo.lock);
|
|
|
|
}
|
2017-09-13 07:40:41 +03:00
|
|
|
|
|
|
|
int prun(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
int rc, i;
|
|
|
|
char *param;
|
|
|
|
opal_pmix_lock_t lock;
|
2018-02-23 20:57:19 +03:00
|
|
|
opal_list_t apps, *lt;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_pmix_app_t *app;
|
2018-02-23 20:57:19 +03:00
|
|
|
opal_value_t *val, *kv, *kv2;
|
|
|
|
opal_list_t info, codes;
|
2017-09-15 16:50:58 +03:00
|
|
|
struct timespec tp = {0, 100000};
|
2018-02-23 20:57:19 +03:00
|
|
|
mylock_t mylock;
|
2017-09-13 07:40:41 +03:00
|
|
|
|
|
|
|
/* init the globals */
|
|
|
|
memset(&orte_cmd_options, 0, sizeof(orte_cmd_options));
|
2017-09-21 20:26:41 +03:00
|
|
|
memset(&myoptions, 0, sizeof(myoptions));
|
2017-09-13 07:40:41 +03:00
|
|
|
OBJ_CONSTRUCT(&job_info, opal_list_t);
|
|
|
|
OBJ_CONSTRUCT(&apps, opal_list_t);
|
|
|
|
|
|
|
|
/* search the argv for MCA params */
|
|
|
|
for (i=0; NULL != argv[i]; i++) {
|
|
|
|
if (':' == argv[i][0] ||
|
|
|
|
NULL == argv[i+1] || NULL == argv[i+2]) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (0 == strncmp(argv[i], "-"OPAL_MCA_CMD_LINE_ID, strlen("-"OPAL_MCA_CMD_LINE_ID)) ||
|
|
|
|
0 == strncmp(argv[i], "--"OPAL_MCA_CMD_LINE_ID, strlen("--"OPAL_MCA_CMD_LINE_ID)) ||
|
|
|
|
0 == strncmp(argv[i], "-g"OPAL_MCA_CMD_LINE_ID, strlen("-g"OPAL_MCA_CMD_LINE_ID)) ||
|
|
|
|
0 == strncmp(argv[i], "--g"OPAL_MCA_CMD_LINE_ID, strlen("--g"OPAL_MCA_CMD_LINE_ID))) {
|
|
|
|
(void) mca_base_var_env_name (argv[i+1], ¶m);
|
|
|
|
opal_setenv(param, argv[i+2], true, &environ);
|
|
|
|
free(param);
|
|
|
|
} else if (0 == strcmp(argv[i], "-am") ||
|
|
|
|
0 == strcmp(argv[i], "--am")) {
|
|
|
|
(void)mca_base_var_env_name("mca_base_param_file_prefix", ¶m);
|
|
|
|
opal_setenv(param, argv[i+1], true, &environ);
|
|
|
|
free(param);
|
|
|
|
} else if (0 == strcmp(argv[i], "-tune") ||
|
|
|
|
0 == strcmp(argv[i], "--tune")) {
|
|
|
|
(void)mca_base_var_env_name("mca_base_envar_file_prefix", ¶m);
|
|
|
|
opal_setenv(param, argv[i+1], true, &environ);
|
|
|
|
free(param);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* init only the util portion of OPAL */
|
|
|
|
if (OPAL_SUCCESS != (rc = opal_init_util(&argc, &argv))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-09-21 20:26:41 +03:00
|
|
|
/* set our proc type for schizo selection */
|
|
|
|
orte_process_info.proc_type = ORTE_PROC_TOOL;
|
|
|
|
|
|
|
|
/* open the SCHIZO framework so we can setup the command line */
|
|
|
|
if (ORTE_SUCCESS != (rc = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if (ORTE_SUCCESS != (rc = orte_schizo_base_select())) {
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
/* setup our cmd line */
|
|
|
|
orte_cmd_line = OBJ_NEW(opal_cmd_line_t);
|
|
|
|
if (OPAL_SUCCESS != (rc = opal_cmd_line_add(orte_cmd_line, cmd_line_init))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-09-21 20:26:41 +03:00
|
|
|
/* setup the rest of the cmd line only once */
|
|
|
|
if (OPAL_SUCCESS != (rc = orte_schizo.define_cli(orte_cmd_line))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
/* now that options have been defined, finish setup */
|
|
|
|
mca_base_cmd_line_setup(orte_cmd_line);
|
|
|
|
|
|
|
|
/* parse the result to get values */
|
|
|
|
if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line,
|
|
|
|
true, false, argc, argv)) ) {
|
|
|
|
if (OPAL_ERR_SILENT != rc) {
|
|
|
|
fprintf(stderr, "%s: command line error (%s)\n", argv[0],
|
|
|
|
opal_strerror(rc));
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* see if print version is requested. Do this before
|
|
|
|
* check for help so that --version --help works as
|
|
|
|
* one might expect. */
|
|
|
|
if (orte_cmd_options.version) {
|
|
|
|
char *str;
|
|
|
|
str = opal_info_make_version_str("all",
|
|
|
|
OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
|
|
|
|
OPAL_RELEASE_VERSION,
|
|
|
|
OPAL_GREEK_VERSION,
|
|
|
|
OPAL_REPO_REV);
|
|
|
|
if (NULL != str) {
|
|
|
|
fprintf(stdout, "%s (%s) %s\n\nReport bugs to %s\n",
|
|
|
|
"prun", "PMIx Reference Server", str, PACKAGE_BUGREPORT);
|
|
|
|
free(str);
|
|
|
|
}
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check if we are running as root - if we are, then only allow
|
|
|
|
* us to proceed if the allow-run-as-root flag was given. Otherwise,
|
|
|
|
* exit with a giant warning flag
|
|
|
|
*/
|
|
|
|
if (0 == geteuid() && !orte_cmd_options.run_as_root) {
|
|
|
|
/* show_help is not yet available, so print an error manually */
|
|
|
|
fprintf(stderr, "--------------------------------------------------------------------------\n");
|
|
|
|
if (orte_cmd_options.help) {
|
|
|
|
fprintf(stderr, "prun cannot provide the help message when run as root.\n\n");
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "prun has detected an attempt to run as root.\n\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "Running as root is *strongly* discouraged as any mistake (e.g., in\n");
|
|
|
|
fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n");
|
|
|
|
fprintf(stderr, "file system, leaving your system in an unusable state.\n\n");
|
|
|
|
|
|
|
|
fprintf(stderr, "We strongly suggest that you run prun as a non-root user.\n\n");
|
|
|
|
|
|
|
|
fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n");
|
|
|
|
fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n");
|
|
|
|
fprintf(stderr, "against doing so - please do so at your own risk.\n");
|
|
|
|
fprintf(stderr, "--------------------------------------------------------------------------\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* process any mca params */
|
|
|
|
rc = mca_base_cmd_line_process_args(orte_cmd_line, &environ, &environ);
|
|
|
|
if (ORTE_SUCCESS != rc) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:30:34 +03:00
|
|
|
/* Check for help request */
|
|
|
|
if (orte_cmd_options.help) {
|
|
|
|
char *str, *args = NULL;
|
|
|
|
args = opal_cmd_line_get_usage_msg(orte_cmd_line);
|
|
|
|
str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
|
|
|
|
"prun", "PSVR", OPAL_VERSION,
|
|
|
|
"prun", args,
|
|
|
|
PACKAGE_BUGREPORT);
|
|
|
|
if (NULL != str) {
|
|
|
|
printf("%s", str);
|
|
|
|
free(str);
|
|
|
|
}
|
|
|
|
free(args);
|
|
|
|
|
|
|
|
/* If someone asks for help, that should be all we do */
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2017-10-14 18:24:57 +03:00
|
|
|
/* ensure we ONLY take the ess/tool component */
|
|
|
|
opal_setenv(OPAL_MCA_PREFIX"ess", "tool", true, &environ);
|
2017-09-21 20:26:41 +03:00
|
|
|
/* tell the ess/tool component how we want to connect */
|
|
|
|
if (myoptions.system_server_only) {
|
2017-09-19 22:30:34 +03:00
|
|
|
opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_only", "1", true, &environ);
|
|
|
|
}
|
2017-09-21 20:26:41 +03:00
|
|
|
if (myoptions.system_server_first) {
|
2017-09-19 22:30:34 +03:00
|
|
|
opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_first", "1", true, &environ);
|
|
|
|
}
|
|
|
|
/* if they specified the DVM's pid, then pass it along */
|
2017-09-21 20:26:41 +03:00
|
|
|
if (0 != myoptions.pid) {
|
|
|
|
asprintf(¶m, "%d", myoptions.pid);
|
2017-09-19 22:30:34 +03:00
|
|
|
opal_setenv(OPAL_MCA_PREFIX"ess_tool_server_pid", param, true, &environ);
|
|
|
|
free(param);
|
|
|
|
}
|
2017-10-06 20:04:30 +03:00
|
|
|
/* if they specified the URI, then pass it along */
|
|
|
|
if (NULL != orte_cmd_options.hnp) {
|
|
|
|
opal_setenv("PMIX_MCA_ptl_tcp_server_uri", orte_cmd_options.hnp, true, &environ);
|
|
|
|
}
|
2017-09-15 16:50:58 +03:00
|
|
|
|
|
|
|
/* now initialize ORTE */
|
|
|
|
if (OPAL_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
|
|
|
|
OPAL_ERROR_LOG(rc);
|
|
|
|
return rc;
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* if the user just wants us to terminate a DVM, then do so */
|
2017-09-21 20:26:41 +03:00
|
|
|
if (myoptions.terminate_dvm) {
|
2017-09-15 16:50:58 +03:00
|
|
|
OBJ_CONSTRUCT(&info, opal_list_t);
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_JOB_CTRL_TERMINATE);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
fprintf(stderr, "TERMINATING DVM...");
|
|
|
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
2017-09-15 16:50:58 +03:00
|
|
|
rc = opal_pmix.job_control(NULL, &info, infocb, (void*)&lock);
|
2017-09-13 07:40:41 +03:00
|
|
|
OPAL_PMIX_WAIT_THREAD(&lock);
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
2017-09-15 16:50:58 +03:00
|
|
|
OPAL_LIST_DESTRUCT(&info);
|
2017-09-13 07:40:41 +03:00
|
|
|
fprintf(stderr, "DONE\n");
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get here if they want to run an application, so let's parse
|
|
|
|
* the cmd line to get it */
|
|
|
|
|
2017-09-15 16:50:58 +03:00
|
|
|
if (OPAL_SUCCESS != (rc = parse_locals(&apps, argc, argv))) {
|
|
|
|
OPAL_ERROR_LOG(rc);
|
2017-09-13 07:40:41 +03:00
|
|
|
OPAL_LIST_DESTRUCT(&apps);
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* bozo check */
|
2017-09-15 16:50:58 +03:00
|
|
|
if (0 == opal_list_get_size(&apps)) {
|
|
|
|
opal_output(0, "No application specified!");
|
2017-09-13 07:40:41 +03:00
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
|
2017-09-15 16:50:58 +03:00
|
|
|
/* init flag */
|
|
|
|
active = true;
|
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
/* register for job terminations so we get notified when
|
|
|
|
* our job completes */
|
|
|
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
2017-09-15 16:50:58 +03:00
|
|
|
OBJ_CONSTRUCT(&info, opal_list_t);
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup("foo");
|
|
|
|
val->type = OPAL_INT;
|
|
|
|
val->data.integer = OPAL_ERR_JOB_TERMINATED;
|
|
|
|
opal_list_append(&info, &val->super);
|
|
|
|
opal_pmix.register_evhandler(&info, NULL, evhandler, regcbfunc, &lock);
|
2017-09-13 07:40:41 +03:00
|
|
|
OPAL_PMIX_WAIT_THREAD(&lock);
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
2017-09-15 16:50:58 +03:00
|
|
|
OPAL_LIST_DESTRUCT(&info);
|
2017-09-13 07:40:41 +03:00
|
|
|
|
2017-09-21 20:26:41 +03:00
|
|
|
/* we want to be notified upon job completion */
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_NOTIFY_COMPLETION);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
|
|
|
|
/* see if they specified the personality */
|
|
|
|
if (NULL != orte_cmd_options.personality) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_PERSONALITY);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = strdup(orte_cmd_options.personality);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check for stdout/err directives */
|
|
|
|
/* if we were asked to tag output, mark it so */
|
|
|
|
if (orte_cmd_options.tag_output) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_TAG_OUTPUT);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
/* if we were asked to timestamp output, mark it so */
|
|
|
|
if (orte_cmd_options.timestamp_output) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_TIMESTAMP_OUTPUT);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
/* if we were asked to output to files, pass it along */
|
|
|
|
if (NULL != orte_cmd_options.output_filename) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_OUTPUT_TO_FILE);
|
|
|
|
val->type = OPAL_STRING;
|
2018-02-23 20:57:19 +03:00
|
|
|
/* if the given filename isn't an absolute path, then
|
|
|
|
* convert it to one so the name will be relative to
|
|
|
|
* the directory where prun was given as that is what
|
|
|
|
* the user will have seen */
|
|
|
|
if (!opal_path_is_absolute(orte_cmd_options.output_filename)) {
|
|
|
|
char cwd[OPAL_PATH_MAX];
|
|
|
|
getcwd(cwd, sizeof(cwd));
|
|
|
|
val->data.string = opal_os_path(false, cwd, orte_cmd_options.output_filename, NULL);
|
|
|
|
} else {
|
|
|
|
val->data.string = strdup(orte_cmd_options.output_filename);
|
|
|
|
}
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
/* if we were asked to merge stderr to stdout, mark it so */
|
|
|
|
if (orte_cmd_options.merge) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_MERGE_STDERR_STDOUT);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check what user wants us to do with stdin */
|
|
|
|
if (NULL != orte_cmd_options.stdin_target) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_STDIN_TGT);
|
|
|
|
val->type = OPAL_UINT32;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
if (0 == strcmp(orte_cmd_options.stdin_target, "all")) {
|
|
|
|
val->data.uint32 = ORTE_VPID_WILDCARD;
|
|
|
|
} else if (0 == strcmp(orte_cmd_options.stdin_target, "none")) {
|
|
|
|
val->data.uint32 = ORTE_VPID_INVALID;
|
|
|
|
} else {
|
|
|
|
val->data.uint32 = strtoul(orte_cmd_options.stdin_target, NULL, 10);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if we want the argv's indexed, indicate that */
|
|
|
|
if (orte_cmd_options.index_argv) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_INDEX_ARGV);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NULL != orte_cmd_options.mapping_policy) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_MAPBY);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = strdup(orte_cmd_options.mapping_policy);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
} else if (orte_cmd_options.pernode) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_PPR);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = strdup("1:node");
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
} else if (0 < orte_cmd_options.npernode) {
|
|
|
|
/* define the ppr */
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_PPR);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
(void)asprintf(&val->data.string, "%d:node", orte_cmd_options.npernode);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
} else if (0 < orte_cmd_options.npersocket) {
|
|
|
|
/* define the ppr */
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_PPR);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
(void)asprintf(&val->data.string, "%d:socket", orte_cmd_options.npernode);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if the user specified cpus/rank, set it */
|
|
|
|
if (0 < orte_cmd_options.cpus_per_proc) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_CPUS_PER_PROC);
|
|
|
|
val->type = OPAL_UINT32;
|
|
|
|
val->data.uint32 = orte_cmd_options.cpus_per_proc;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if the user specified a ranking policy, then set it */
|
|
|
|
if (NULL != orte_cmd_options.ranking_policy) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_RANKBY);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = strdup(orte_cmd_options.ranking_policy);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if the user specified a binding policy, then set it */
|
|
|
|
if (NULL != orte_cmd_options.binding_policy) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_BINDTO);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = strdup(orte_cmd_options.binding_policy);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if they asked for nolocal, mark it so */
|
|
|
|
if (orte_cmd_options.nolocal) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_NO_PROCS_ON_HEAD);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
if (orte_cmd_options.no_oversubscribe) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
if (orte_cmd_options.oversubscribe) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_NO_OVERSUBSCRIBE);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = false;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
if (orte_cmd_options.report_bindings) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_REPORT_BINDINGS);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
if (NULL != orte_cmd_options.cpu_list) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_CPU_LIST);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = strdup(orte_cmd_options.cpu_list);
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark if recovery was enabled on the cmd line */
|
|
|
|
if (orte_enable_recovery) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_JOB_RECOVERABLE);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
/* record the max restarts */
|
|
|
|
if (0 < orte_max_restarts) {
|
|
|
|
OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_MAX_RESTARTS);
|
|
|
|
val->type = OPAL_UINT32;
|
|
|
|
val->data.uint32 = orte_max_restarts;
|
|
|
|
opal_list_append(&app->info, &val->super);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* if continuous operation was specified */
|
|
|
|
if (orte_cmd_options.continuous) {
|
|
|
|
/* mark this job as continuously operating */
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_JOB_CONTINUOUS);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
}
|
|
|
|
|
2018-02-23 20:57:19 +03:00
|
|
|
/* pickup any relevant envars */
|
|
|
|
if (NULL != opal_pmix.server_setup_application) {
|
|
|
|
OBJ_CONSTRUCT(&info, opal_list_t);
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_SETUP_APP_ENVARS);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
|
|
|
opal_list_append(&info, &val->super);
|
|
|
|
|
|
|
|
OPAL_PMIX_CONSTRUCT_LOCK(&mylock.lock);
|
|
|
|
OBJ_CONSTRUCT(&mylock.list, opal_list_t);
|
|
|
|
rc = opal_pmix.server_setup_application(ORTE_PROC_MY_NAME->jobid,
|
|
|
|
&info, setupcbfunc, &mylock);
|
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
OPAL_LIST_DESTRUCT(&info);
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock);
|
|
|
|
OBJ_DESTRUCT(&mylock.list);
|
|
|
|
goto DONE;
|
|
|
|
}
|
|
|
|
OPAL_PMIX_WAIT_THREAD(&mylock.lock);
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&mylock.lock);
|
|
|
|
/* transfer any returned ENVARS to the job_info */
|
|
|
|
while (NULL != (val = (opal_value_t*)opal_list_remove_first(&mylock.list))) {
|
|
|
|
if (0 == strcmp(val->key, OPAL_PMIX_SET_ENVAR) ||
|
|
|
|
0 == strcmp(val->key, OPAL_PMIX_ADD_ENVAR) ||
|
|
|
|
0 == strcmp(val->key, OPAL_PMIX_UNSET_ENVAR) ||
|
|
|
|
0 == strcmp(val->key, OPAL_PMIX_PREPEND_ENVAR) ||
|
|
|
|
0 == strcmp(val->key, OPAL_PMIX_APPEND_ENVAR)) {
|
|
|
|
opal_list_append(&job_info, &val->super);
|
|
|
|
} else {
|
|
|
|
OBJ_RELEASE(val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
OPAL_LIST_DESTRUCT(&mylock.list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if we were launched by a tool wanting to direct our
|
|
|
|
* operation, then we need to pause here and give it
|
|
|
|
* a chance to tell us what we need to do */
|
|
|
|
if (NULL != (param = getenv("PMIX_LAUNCHER_PAUSE_FOR_TOOL")) &&
|
|
|
|
0 == strcmp(param, "1")) {
|
|
|
|
/* register for the PMIX_LAUNCH_DIRECTIVE event */
|
|
|
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
|
|
|
OBJ_CONSTRUCT(&codes, opal_list_t);
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup("foo");
|
|
|
|
val->type = OPAL_INT;
|
|
|
|
val->data.integer = OPAL_PMIX_LAUNCH_DIRECTIVE;
|
|
|
|
opal_list_append(&codes, &val->super);
|
|
|
|
/* setup the myinfo object to capture the returned
|
|
|
|
* values - must do so prior to registering in case
|
|
|
|
* the event has already arrived */
|
|
|
|
OBJ_CONSTRUCT(&myinfo, myinfo_t);
|
|
|
|
/* go ahead and register */
|
|
|
|
opal_pmix.register_evhandler(&codes, NULL, launchhandler, regcbfunc, &lock);
|
|
|
|
OPAL_PMIX_WAIT_THREAD(&lock);
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
|
|
|
OPAL_LIST_DESTRUCT(&codes);
|
|
|
|
/* now wait for the launch directives to arrive */
|
|
|
|
OPAL_PMIX_WAIT_THREAD(&myinfo.lock);
|
|
|
|
/* process the returned directives */
|
|
|
|
OPAL_LIST_FOREACH(val, &myinfo.info, opal_value_t) {
|
|
|
|
if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_JOB_DIRECTIVES)) {
|
|
|
|
/* there will be a pointer to a list containing the directives */
|
|
|
|
lt = (opal_list_t*)val->data.ptr;
|
|
|
|
while (NULL != (kv = (opal_value_t*)opal_list_remove_first(lt))) {
|
|
|
|
opal_output(0, "JOB DIRECTIVE: %s", kv->key);
|
|
|
|
opal_list_append(&job_info, &kv->super);
|
|
|
|
}
|
|
|
|
} else if (0 == strcmp(val->key, OPAL_PMIX_DEBUG_APP_DIRECTIVES)) {
|
|
|
|
/* there will be a pointer to a list containing the directives */
|
|
|
|
lt = (opal_list_t*)val->data.ptr;
|
|
|
|
OPAL_LIST_FOREACH(kv, lt, opal_value_t) {
|
|
|
|
opal_output(0, "APP DIRECTIVE: %s", kv->key);
|
|
|
|
OPAL_LIST_FOREACH(app, &apps, opal_pmix_app_t) {
|
|
|
|
/* the value can only be on one list at a time, so replicate it */
|
|
|
|
kv2 = OBJ_NEW(opal_value_t);
|
|
|
|
opal_value_xfer(kv2, kv);
|
|
|
|
opal_list_append(&app->info, &kv2->super);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 06:03:32 +03:00
|
|
|
if (OPAL_SUCCESS != (rc = opal_pmix.spawn(&job_info, &apps, &myjobid))) {
|
2017-09-15 16:50:58 +03:00
|
|
|
opal_output(0, "Job failed to spawn: %s", opal_strerror(rc));
|
|
|
|
goto DONE;
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
OPAL_LIST_DESTRUCT(&job_info);
|
|
|
|
OPAL_LIST_DESTRUCT(&apps);
|
|
|
|
|
2017-10-10 20:08:30 +03:00
|
|
|
if (orte_cmd_options.verbose) {
|
2018-01-12 06:03:32 +03:00
|
|
|
opal_output(0, "JOB %s EXECUTING", OPAL_JOBID_PRINT(myjobid));
|
2017-10-10 20:08:30 +03:00
|
|
|
}
|
2017-09-15 16:50:58 +03:00
|
|
|
|
|
|
|
while (active) {
|
|
|
|
nanosleep(&tp, NULL);
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
2017-10-06 20:04:30 +03:00
|
|
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
|
|
|
opal_pmix.deregister_evhandler(evid, opcbfunc, &lock);
|
|
|
|
OPAL_PMIX_WAIT_THREAD(&lock);
|
|
|
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
2017-09-13 07:40:41 +03:00
|
|
|
|
2017-09-15 16:50:58 +03:00
|
|
|
DONE:
|
2017-09-13 07:40:41 +03:00
|
|
|
/* cleanup and leave */
|
2017-09-15 16:50:58 +03:00
|
|
|
orte_finalize();
|
2017-09-13 07:40:41 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_locals(opal_list_t *jdata, int argc, char* argv[])
|
|
|
|
{
|
|
|
|
int i, rc;
|
|
|
|
int temp_argc;
|
|
|
|
char **temp_argv, **env;
|
|
|
|
opal_pmix_app_t *app;
|
|
|
|
bool made_app;
|
|
|
|
|
|
|
|
/* Make the apps */
|
|
|
|
temp_argc = 0;
|
|
|
|
temp_argv = NULL;
|
|
|
|
opal_argv_append(&temp_argc, &temp_argv, argv[0]);
|
|
|
|
|
|
|
|
/* NOTE: This bogus env variable is necessary in the calls to
|
|
|
|
create_app(), below. See comment immediately before the
|
|
|
|
create_app() function for an explanation. */
|
|
|
|
|
|
|
|
env = NULL;
|
|
|
|
for (i = 1; i < argc; ++i) {
|
|
|
|
if (0 == strcmp(argv[i], ":")) {
|
|
|
|
/* Make an app with this argv */
|
|
|
|
if (opal_argv_count(temp_argv) > 1) {
|
|
|
|
if (NULL != env) {
|
|
|
|
opal_argv_free(env);
|
|
|
|
env = NULL;
|
|
|
|
}
|
|
|
|
app = NULL;
|
|
|
|
rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env);
|
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
/* Assume that the error message has already been
|
|
|
|
printed; no need to cleanup -- we can just
|
|
|
|
exit */
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (made_app) {
|
|
|
|
opal_list_append(jdata, &app->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset the temps */
|
|
|
|
|
|
|
|
temp_argc = 0;
|
|
|
|
temp_argv = NULL;
|
|
|
|
opal_argv_append(&temp_argc, &temp_argv, argv[0]);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
opal_argv_append(&temp_argc, &temp_argv, argv[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opal_argv_count(temp_argv) > 1) {
|
|
|
|
app = NULL;
|
|
|
|
rc = create_app(temp_argc, temp_argv, jdata, &app, &made_app, &env);
|
|
|
|
if (ORTE_SUCCESS != rc) {
|
|
|
|
/* Assume that the error message has already been printed;
|
|
|
|
no need to cleanup -- we can just exit */
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (made_app) {
|
|
|
|
opal_list_append(jdata, &app->super);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (NULL != env) {
|
|
|
|
opal_argv_free(env);
|
|
|
|
}
|
|
|
|
opal_argv_free(temp_argv);
|
|
|
|
|
|
|
|
/* All done */
|
|
|
|
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function takes a "char ***app_env" parameter to handle the
|
|
|
|
* specific case:
|
|
|
|
*
|
|
|
|
* orterun --mca foo bar -app appfile
|
|
|
|
*
|
|
|
|
* That is, we'll need to keep foo=bar, but the presence of the app
|
|
|
|
* file will cause an invocation of parse_appfile(), which will cause
|
|
|
|
* one or more recursive calls back to create_app(). Since the
|
|
|
|
* foo=bar value applies globally to all apps in the appfile, we need
|
|
|
|
* to pass in the "base" environment (that contains the foo=bar value)
|
|
|
|
* when we parse each line in the appfile.
|
|
|
|
*
|
|
|
|
* This is really just a special case -- when we have a simple case like:
|
|
|
|
*
|
|
|
|
* orterun --mca foo bar -np 4 hostname
|
|
|
|
*
|
|
|
|
* Then the upper-level function (parse_locals()) calls create_app()
|
|
|
|
* with a NULL value for app_env, meaning that there is no "base"
|
|
|
|
* environment that the app needs to be created from.
|
|
|
|
*/
|
|
|
|
static int create_app(int argc, char* argv[],
|
|
|
|
opal_list_t *jdata,
|
|
|
|
opal_pmix_app_t **app_ptr,
|
|
|
|
bool *made_app, char ***app_env)
|
|
|
|
{
|
|
|
|
char cwd[OPAL_PATH_MAX];
|
|
|
|
int i, j, count, rc;
|
|
|
|
char *param, *value;
|
|
|
|
opal_pmix_app_t *app = NULL;
|
|
|
|
bool found = false;
|
|
|
|
char *appname = NULL;
|
|
|
|
opal_value_t *val;
|
|
|
|
|
|
|
|
*made_app = false;
|
|
|
|
|
|
|
|
/* parse the cmd line - do this every time thru so we can
|
|
|
|
* repopulate the globals */
|
|
|
|
if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, true, false,
|
|
|
|
argc, argv)) ) {
|
|
|
|
if (OPAL_ERR_SILENT != rc) {
|
|
|
|
fprintf(stderr, "%s: command line error (%s)\n", argv[0],
|
|
|
|
opal_strerror(rc));
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Setup application context */
|
|
|
|
app = OBJ_NEW(opal_pmix_app_t);
|
|
|
|
opal_cmd_line_get_tail(orte_cmd_line, &count, &app->argv);
|
|
|
|
|
|
|
|
/* See if we have anything left */
|
|
|
|
if (0 == count) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:executable-not-specified",
|
|
|
|
true, "prun", "prun");
|
|
|
|
rc = OPAL_ERR_NOT_FOUND;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Grab all MCA environment variables */
|
|
|
|
app->env = opal_argv_copy(*app_env);
|
|
|
|
for (i=0; NULL != environ[i]; i++) {
|
2017-10-24 02:27:41 +03:00
|
|
|
if (0 == strncmp("PMIX_", environ[i], 5) ||
|
|
|
|
0 == strncmp("OMPI_", environ[i], 5)) {
|
2017-09-13 07:40:41 +03:00
|
|
|
/* check for duplicate in app->env - this
|
|
|
|
* would have been placed there by the
|
|
|
|
* cmd line processor. By convention, we
|
|
|
|
* always let the cmd line override the
|
|
|
|
* environment
|
|
|
|
*/
|
|
|
|
param = strdup(environ[i]);
|
|
|
|
value = strchr(param, '=');
|
|
|
|
*value = '\0';
|
|
|
|
value++;
|
|
|
|
opal_setenv(param, value, false, &app->env);
|
|
|
|
free(param);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-10 20:08:30 +03:00
|
|
|
/* set necessary env variables for external usage from tune conf file*/
|
|
|
|
int set_from_file = 0;
|
|
|
|
char **vars = NULL;
|
|
|
|
if (OPAL_SUCCESS == mca_base_var_process_env_list_from_file(&vars) &&
|
|
|
|
NULL != vars) {
|
|
|
|
for (i=0; NULL != vars[i]; i++) {
|
|
|
|
value = strchr(vars[i], '=');
|
|
|
|
/* terminate the name of the param */
|
|
|
|
*value = '\0';
|
|
|
|
/* step over the equals */
|
|
|
|
value++;
|
|
|
|
/* overwrite any prior entry */
|
|
|
|
opal_setenv(vars[i], value, true, &app->env);
|
|
|
|
/* save it for any comm_spawn'd apps */
|
|
|
|
opal_setenv(vars[i], value, true, &orte_forwarded_envars);
|
|
|
|
}
|
|
|
|
set_from_file = 1;
|
|
|
|
opal_argv_free(vars);
|
|
|
|
}
|
|
|
|
/* Did the user request to export any environment variables on the cmd line? */
|
|
|
|
char *env_set_flag;
|
|
|
|
env_set_flag = getenv("OMPI_MCA_mca_base_env_list");
|
|
|
|
if (opal_cmd_line_is_taken(orte_cmd_line, "x")) {
|
|
|
|
if (NULL != env_set_flag) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false);
|
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
}
|
|
|
|
j = opal_cmd_line_get_ninsts(orte_cmd_line, "x");
|
|
|
|
for (i = 0; i < j; ++i) {
|
|
|
|
param = opal_cmd_line_get_param(orte_cmd_line, "x", i, 0);
|
|
|
|
|
|
|
|
if (NULL != (value = strchr(param, '='))) {
|
|
|
|
/* terminate the name of the param */
|
|
|
|
*value = '\0';
|
|
|
|
/* step over the equals */
|
|
|
|
value++;
|
|
|
|
/* overwrite any prior entry */
|
|
|
|
opal_setenv(param, value, true, &app->env);
|
|
|
|
/* save it for any comm_spawn'd apps */
|
|
|
|
opal_setenv(param, value, true, &orte_forwarded_envars);
|
|
|
|
} else {
|
|
|
|
value = getenv(param);
|
|
|
|
if (NULL != value) {
|
|
|
|
/* overwrite any prior entry */
|
|
|
|
opal_setenv(param, value, true, &app->env);
|
|
|
|
/* save it for any comm_spawn'd apps */
|
|
|
|
opal_setenv(param, value, true, &orte_forwarded_envars);
|
|
|
|
} else {
|
|
|
|
opal_output(0, "Warning: could not find environment variable \"%s\"\n", param);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (NULL != env_set_flag) {
|
|
|
|
/* if mca_base_env_list was set, check if some of env vars were set via -x from a conf file.
|
|
|
|
* If this is the case, error out.
|
|
|
|
*/
|
|
|
|
if (!set_from_file) {
|
|
|
|
/* set necessary env variables for external usage */
|
|
|
|
vars = NULL;
|
|
|
|
if (OPAL_SUCCESS == mca_base_var_process_env_list(env_set_flag, &vars) &&
|
|
|
|
NULL != vars) {
|
|
|
|
for (i=0; NULL != vars[i]; i++) {
|
|
|
|
value = strchr(vars[i], '=');
|
|
|
|
/* terminate the name of the param */
|
|
|
|
*value = '\0';
|
|
|
|
/* step over the equals */
|
|
|
|
value++;
|
|
|
|
/* overwrite any prior entry */
|
|
|
|
opal_setenv(vars[i], value, true, &app->env);
|
|
|
|
/* save it for any comm_spawn'd apps */
|
|
|
|
opal_setenv(vars[i], value, true, &orte_forwarded_envars);
|
|
|
|
}
|
|
|
|
opal_argv_free(vars);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:conflict-env-set", false);
|
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-13 07:40:41 +03:00
|
|
|
/* Did the user request a specific wdir? */
|
|
|
|
|
|
|
|
if (NULL != orte_cmd_options.wdir) {
|
|
|
|
/* if this is a relative path, convert it to an absolute path */
|
|
|
|
if (opal_path_is_absolute(orte_cmd_options.wdir)) {
|
|
|
|
app->cwd = strdup(orte_cmd_options.wdir);
|
|
|
|
} else {
|
|
|
|
/* get the cwd */
|
|
|
|
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:init-failure",
|
|
|
|
true, "get the cwd", rc);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* construct the absolute path */
|
|
|
|
app->cwd = opal_os_path(false, cwd, orte_cmd_options.wdir, NULL);
|
|
|
|
}
|
|
|
|
} else if (orte_cmd_options.set_cwd_to_session_dir) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_SET_SESSION_CWD);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
} else {
|
|
|
|
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:init-failure",
|
|
|
|
true, "get the cwd", rc);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
app->cwd = strdup(cwd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Did the user specify a hostfile. Need to check for both
|
|
|
|
* hostfile and machine file.
|
|
|
|
* We can only deal with one hostfile per app context, otherwise give an error.
|
|
|
|
*/
|
|
|
|
found = false;
|
|
|
|
if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "hostfile"))) {
|
|
|
|
if (1 < j) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
|
|
|
|
true, "prun", NULL);
|
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
} else {
|
|
|
|
value = opal_cmd_line_get_param(orte_cmd_line, "hostfile", 0, 0);
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_HOSTFILE);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = value;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "machinefile"))) {
|
|
|
|
if (1 < j || found) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
|
|
|
|
true, "prun", NULL);
|
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
} else {
|
|
|
|
value = opal_cmd_line_get_param(orte_cmd_line, "machinefile", 0, 0);
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_HOSTFILE);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = value;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Did the user specify any hosts? */
|
|
|
|
if (0 < (j = opal_cmd_line_get_ninsts(orte_cmd_line, "host"))) {
|
|
|
|
char **targ=NULL, *tval;
|
|
|
|
for (i = 0; i < j; ++i) {
|
|
|
|
value = opal_cmd_line_get_param(orte_cmd_line, "host", i, 0);
|
|
|
|
opal_argv_append_nosize(&targ, value);
|
|
|
|
}
|
|
|
|
tval = opal_argv_join(targ, ',');
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_HOST);
|
|
|
|
val->type = OPAL_STRING;
|
|
|
|
val->data.string = tval;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* check for bozo error */
|
|
|
|
if (0 > orte_cmd_options.num_procs) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:negative-nprocs",
|
|
|
|
true, "prun", app->argv[0],
|
|
|
|
orte_cmd_options.num_procs, NULL);
|
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
app->maxprocs = orte_cmd_options.num_procs;
|
|
|
|
|
|
|
|
/* see if we need to preload the binary to
|
|
|
|
* find the app - don't do this for java apps, however, as we
|
|
|
|
* can't easily find the class on the cmd line. Java apps have to
|
|
|
|
* preload their binary via the preload_files option
|
|
|
|
*/
|
|
|
|
if (NULL == strstr(app->argv[0], "java")) {
|
|
|
|
if (orte_cmd_options.preload_binaries) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_SET_SESSION_CWD);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_PRELOAD_BIN);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (NULL != orte_cmd_options.preload_files) {
|
|
|
|
val = OBJ_NEW(opal_value_t);
|
|
|
|
val->key = strdup(OPAL_PMIX_PRELOAD_FILES);
|
|
|
|
val->type = OPAL_BOOL;
|
|
|
|
val->data.flag = true;
|
2017-09-21 20:26:41 +03:00
|
|
|
opal_list_append(&app->info, &val->super);
|
2017-09-13 07:40:41 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Do not try to find argv[0] here -- the starter is responsible
|
|
|
|
for that because it may not be relevant to try to find it on
|
|
|
|
the node where orterun is executing. So just strdup() argv[0]
|
|
|
|
into app. */
|
|
|
|
|
|
|
|
app->cmd = strdup(app->argv[0]);
|
|
|
|
if (NULL == app->cmd) {
|
|
|
|
opal_show_help("help-orterun.txt", "orterun:call-failed",
|
|
|
|
true, "prun", "library", "strdup returned NULL", errno);
|
|
|
|
rc = ORTE_ERR_NOT_FOUND;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if this is a Java application, we have a bit more work to do. Such
|
|
|
|
* applications actually need to be run under the Java virtual machine
|
|
|
|
* and the "java" command will start the "executable". So we need to ensure
|
|
|
|
* that all the proper java-specific paths are provided
|
|
|
|
*/
|
|
|
|
appname = opal_basename(app->cmd);
|
|
|
|
if (0 == strcmp(appname, "java")) {
|
|
|
|
/* see if we were given a library path */
|
|
|
|
found = false;
|
|
|
|
for (i=1; NULL != app->argv[i]; i++) {
|
|
|
|
if (NULL != strstr(app->argv[i], "java.library.path")) {
|
|
|
|
char *dptr;
|
|
|
|
/* find the '=' that delineates the option from the path */
|
|
|
|
if (NULL == (dptr = strchr(app->argv[i], '='))) {
|
|
|
|
/* that's just wrong */
|
|
|
|
rc = ORTE_ERR_BAD_PARAM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
/* step over the '=' */
|
|
|
|
++dptr;
|
|
|
|
/* yep - but does it include the path to the mpi libs? */
|
|
|
|
found = true;
|
|
|
|
if (NULL == strstr(app->argv[i], opal_install_dirs.libdir)) {
|
|
|
|
/* doesn't appear to - add it to be safe */
|
|
|
|
if (':' == app->argv[i][strlen(app->argv[i]-1)]) {
|
|
|
|
asprintf(&value, "-Djava.library.path=%s%s", dptr, opal_install_dirs.libdir);
|
|
|
|
} else {
|
|
|
|
asprintf(&value, "-Djava.library.path=%s:%s", dptr, opal_install_dirs.libdir);
|
|
|
|
}
|
|
|
|
free(app->argv[i]);
|
|
|
|
app->argv[i] = value;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
/* need to add it right after the java command */
|
|
|
|
asprintf(&value, "-Djava.library.path=%s", opal_install_dirs.libdir);
|
|
|
|
opal_argv_insert_element(&app->argv, 1, value);
|
|
|
|
free(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* see if we were given a class path */
|
|
|
|
found = false;
|
|
|
|
for (i=1; NULL != app->argv[i]; i++) {
|
|
|
|
if (NULL != strstr(app->argv[i], "cp") ||
|
|
|
|
NULL != strstr(app->argv[i], "classpath")) {
|
|
|
|
/* yep - but does it include the path to the mpi libs? */
|
|
|
|
found = true;
|
|
|
|
/* check if mpi.jar exists - if so, add it */
|
|
|
|
value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
|
|
|
|
if (access(value, F_OK ) != -1) {
|
|
|
|
set_classpath_jar_file(app, i+1, "mpi.jar");
|
|
|
|
}
|
|
|
|
free(value);
|
|
|
|
/* check for oshmem support */
|
|
|
|
value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
|
|
|
|
if (access(value, F_OK ) != -1) {
|
|
|
|
set_classpath_jar_file(app, i+1, "shmem.jar");
|
|
|
|
}
|
|
|
|
free(value);
|
|
|
|
/* always add the local directory */
|
|
|
|
asprintf(&value, "%s:%s", app->cwd, app->argv[i+1]);
|
|
|
|
free(app->argv[i+1]);
|
|
|
|
app->argv[i+1] = value;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
/* check to see if CLASSPATH is in the environment */
|
|
|
|
found = false; // just to be pedantic
|
|
|
|
for (i=0; NULL != environ[i]; i++) {
|
|
|
|
if (0 == strncmp(environ[i], "CLASSPATH", strlen("CLASSPATH"))) {
|
|
|
|
value = strchr(environ[i], '=');
|
|
|
|
++value; /* step over the = */
|
|
|
|
opal_argv_insert_element(&app->argv, 1, value);
|
|
|
|
/* check for mpi.jar */
|
|
|
|
value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
|
|
|
|
if (access(value, F_OK ) != -1) {
|
|
|
|
set_classpath_jar_file(app, 1, "mpi.jar");
|
|
|
|
}
|
|
|
|
free(value);
|
|
|
|
/* check for shmem.jar */
|
|
|
|
value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
|
|
|
|
if (access(value, F_OK ) != -1) {
|
|
|
|
set_classpath_jar_file(app, 1, "shmem.jar");
|
|
|
|
}
|
|
|
|
free(value);
|
|
|
|
/* always add the local directory */
|
|
|
|
(void)asprintf(&value, "%s:%s", app->cwd, app->argv[1]);
|
|
|
|
free(app->argv[1]);
|
|
|
|
app->argv[1] = value;
|
|
|
|
opal_argv_insert_element(&app->argv, 1, "-cp");
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
/* need to add it right after the java command - have
|
|
|
|
* to include the working directory and trust that
|
|
|
|
* the user set cwd if necessary
|
|
|
|
*/
|
|
|
|
char *str, *str2;
|
|
|
|
/* always start with the working directory */
|
|
|
|
str = strdup(app->cwd);
|
|
|
|
/* check for mpi.jar */
|
|
|
|
value = opal_os_path(false, opal_install_dirs.libdir, "mpi.jar", NULL);
|
|
|
|
if (access(value, F_OK ) != -1) {
|
|
|
|
(void)asprintf(&str2, "%s:%s", str, value);
|
|
|
|
free(str);
|
|
|
|
str = str2;
|
|
|
|
}
|
|
|
|
free(value);
|
|
|
|
/* check for shmem.jar */
|
|
|
|
value = opal_os_path(false, opal_install_dirs.libdir, "shmem.jar", NULL);
|
|
|
|
if (access(value, F_OK ) != -1) {
|
|
|
|
asprintf(&str2, "%s:%s", str, value);
|
|
|
|
free(str);
|
|
|
|
str = str2;
|
|
|
|
}
|
|
|
|
free(value);
|
|
|
|
opal_argv_insert_element(&app->argv, 1, str);
|
|
|
|
free(str);
|
|
|
|
opal_argv_insert_element(&app->argv, 1, "-cp");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* try to find the actual command - may not be perfect */
|
|
|
|
for (i=1; i < opal_argv_count(app->argv); i++) {
|
|
|
|
if (NULL != strstr(app->argv[i], "java.library.path")) {
|
|
|
|
continue;
|
|
|
|
} else if (NULL != strstr(app->argv[i], "cp") ||
|
|
|
|
NULL != strstr(app->argv[i], "classpath")) {
|
|
|
|
/* skip the next field */
|
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* declare this the winner */
|
|
|
|
opal_setenv("OMPI_COMMAND", app->argv[i], true, &app->env);
|
|
|
|
/* collect everything else as the cmd line */
|
|
|
|
if ((i+1) < opal_argv_count(app->argv)) {
|
|
|
|
value = opal_argv_join(&app->argv[i+1], ' ');
|
|
|
|
opal_setenv("OMPI_ARGV", value, true, &app->env);
|
|
|
|
free(value);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* add the cmd to the environment for MPI_Info to pickup */
|
|
|
|
opal_setenv("OMPI_COMMAND", appname, true, &app->env);
|
|
|
|
if (1 < opal_argv_count(app->argv)) {
|
|
|
|
value = opal_argv_join(&app->argv[1], ' ');
|
|
|
|
opal_setenv("OMPI_ARGV", value, true, &app->env);
|
|
|
|
free(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*app_ptr = app;
|
|
|
|
app = NULL;
|
|
|
|
*made_app = true;
|
|
|
|
|
|
|
|
/* All done */
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if (NULL != app) {
|
|
|
|
OBJ_RELEASE(app);
|
|
|
|
}
|
|
|
|
if (NULL != appname) {
|
|
|
|
free(appname);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_classpath_jar_file(opal_pmix_app_t *app, int index, char *jarfile)
|
|
|
|
{
|
|
|
|
if (NULL == strstr(app->argv[index], jarfile)) {
|
|
|
|
/* nope - need to add it */
|
|
|
|
char *fmt = ':' == app->argv[index][strlen(app->argv[index]-1)]
|
|
|
|
? "%s%s/%s" : "%s:%s/%s";
|
|
|
|
char *str;
|
|
|
|
asprintf(&str, fmt, app->argv[index], opal_install_dirs.libdir, jarfile);
|
|
|
|
free(app->argv[index]);
|
|
|
|
app->argv[index] = str;
|
|
|
|
}
|
|
|
|
}
|