1
1

Fix tool connection logic so we properly search for default session server, perform specified number of retries, etc.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
(cherry picked from commit 7c755e01004f8b86c71f1729662979ea45ab1adb)
Этот коммит содержится в:
Ralph Castain 2017-09-19 12:30:34 -07:00
родитель 16de607607
Коммит e575c4d6f9
14 изменённых файлов: 244 добавлений и 91 удалений

Просмотреть файл

@ -125,6 +125,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
char myhost[PMIX_MAXHOSTNAMELEN];
bool system_level = false;
bool system_level_only = false;
pid_t pid = 0;
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"ptl:tcp: connecting to server");
@ -224,12 +225,17 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
system_level = info[n].value.data.flag;
}
} else if (0 == strcmp(info[n].key, PMIX_SERVER_PIDINFO)) {
mca_ptl_tcp_component.tool_pid = info[n].value.data.pid;
pid = info[n].value.data.pid;
pmix_output(0, "GOT PID %d", (int)pid);
} else if (0 == strcmp(info[n].key, PMIX_SERVER_URI)) {
if (NULL == mca_ptl_tcp_component.super.uri) {
free(mca_ptl_tcp_component.super.uri);
}
mca_ptl_tcp_component.super.uri = strdup(info[n].value.data.string);
} else if (0 == strcmp(info[n].key, PMIX_CONNECT_RETRY_DELAY)) {
mca_ptl_tcp_component.wait_to_connect = info[n].value.data.uint32;
} else if (0 == strcmp(info[n].key, PMIX_CONNECT_MAX_RETRIES)) {
mca_ptl_tcp_component.max_retries = info[n].value.data.uint32;
}
}
}
@ -263,6 +269,29 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
goto complete;
}
/* if they gave us a pid, then look for it */
if (0 != pid) {
if (0 > asprintf(&filename, "pmix.%s.tool.%d", myhost, pid)) {
return PMIX_ERR_NOMEM;
}
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"ptl:tcp:tool searching for given session server %s",
filename);
nspace = NULL;
rc = df_search(mca_ptl_tcp_component.system_tmpdir,
filename, &sd, &nspace, &rank);
free(filename);
if (PMIX_SUCCESS == rc) {
goto complete;
}
if (NULL != nspace) {
free(nspace);
}
/* since they gave us a specific pid and we couldn't
* connect to it, return an error */
return PMIX_ERR_UNREACH;
}
/* if they asked for system-level, we start there */
if (system_level || system_level_only) {
@ -297,31 +326,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
return PMIX_ERR_UNREACH;
}
/* now try the session-level connection - if they gave us a pid, then
* look for it */
if (0 != mca_ptl_tcp_component.tool_pid) {
if (0 > asprintf(&filename, "pmix.%s.tool.%d",
myhost, mca_ptl_tcp_component.tool_pid)) {
return PMIX_ERR_NOMEM;
}
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"ptl:tcp:tool searching for given session server %s",
filename);
nspace = NULL;
rc = df_search(mca_ptl_tcp_component.system_tmpdir,
filename, &sd, &nspace, &rank);
free(filename);
if (PMIX_SUCCESS == rc) {
goto complete;
}
if (NULL != nspace) {
free(nspace);
}
/* since they gave us a specific pid and we couldn't
* connect to it, return an error */
return PMIX_ERR_UNREACH;
}
/* they didn't give us a pid, so we will search to see what session-level
* tools are available to this user. We will take the first connection
* that succeeds - this is based on the likelihood that there is only
@ -441,6 +445,11 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer,
return PMIX_SUCCESS;
}
static void timeout(int sd, short args, void *cbdata)
{
pmix_lock_t *lock = (pmix_lock_t*)cbdata;
PMIX_WAKEUP_THREAD(lock);
}
/**** SUPPORTING FUNCTIONS ****/
static pmix_status_t parse_uri_file(char *filename,
@ -450,14 +459,48 @@ static pmix_status_t parse_uri_file(char *filename,
{
FILE *fp;
char *srvr, *p, *p2;
pmix_lock_t lock;
pmix_event_t ev;
struct timeval tv;
int retries;
fp = fopen(filename, "r");
if (NULL == fp) {
/* if we cannot open the file, then the server must not
* be configured to support tool connections, or this
* user isn't authorized to access it */
* user isn't authorized to access it - or it may just
* not exist yet! Check for existence */
if (0 != access(filename, R_OK)) {
if (ENOENT == errno) {
/* the file does not exist, so give it
* a little time to see if the server
* is still starting up */
retries = 0;
do {
++retries;
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"WAITING FOR CONNECTION FILE");
PMIX_CONSTRUCT_LOCK(&lock);
tv.tv_sec = mca_ptl_tcp_component.wait_to_connect;
tv.tv_usec = 0;
pmix_event_evtimer_set(pmix_globals.evbase, &ev,
timeout, &lock);
pmix_event_evtimer_add(&ev, &tv);
PMIX_WAIT_THREAD(&lock);
PMIX_DESTRUCT_LOCK(&lock);
fp = fopen(filename, "r");
if (NULL != fp) {
/* we found it! */
goto process;
}
} while (retries < mca_ptl_tcp_component.max_retries);
/* otherwise, mark it as unreachable */
}
}
return PMIX_ERR_UNREACH;
}
process:
/* get the URI */
srvr = pmix_getline(fp);
if (NULL == srvr) {
@ -916,8 +959,9 @@ static pmix_status_t df_search(char *dirname, char *prefix,
char *suri, *nsp, *newdir;
pmix_rank_t rk;
pmix_status_t rc;
struct stat buf;
DIR *cur_dirp;
struct dirent * dir_entry;
struct dirent *dir_entry;
if (NULL == (cur_dirp = opendir(dirname))) {
return PMIX_ERR_NOT_FOUND;
@ -933,9 +977,12 @@ static pmix_status_t df_search(char *dirname, char *prefix,
0 == strcmp(dir_entry->d_name, "..")) {
continue;
}
newdir = pmix_os_path(false, dirname, dir_entry->d_name, NULL);
if (-1 == stat(newdir, &buf)) {
continue;
}
/* if it is a directory, down search */
if (DT_DIR == dir_entry->d_type) {
newdir = pmix_os_path(false, dirname, dir_entry->d_name, NULL);
if (S_ISDIR(buf.st_mode)) {
rc = df_search(newdir, prefix, sd, nspace, rank);
free(newdir);
if (PMIX_SUCCESS == rc) {
@ -944,22 +991,14 @@ static pmix_status_t df_search(char *dirname, char *prefix,
}
continue;
}
/* if it isn't a regular file, ignore it */
if (DT_REG != dir_entry->d_type) {
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"pmix:tcp: ignoring %s", dir_entry->d_name);
continue;
}
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"pmix:tcp: checking %s vs %s", dir_entry->d_name, prefix);
/* see if it starts with our prefix */
if (0 == strncmp(dir_entry->d_name, prefix, strlen(prefix))) {
/* try to read this file */
newdir = pmix_os_path(false, dirname, dir_entry->d_name, NULL);
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"pmix:tcp: reading file %s", newdir);
rc = parse_uri_file(newdir, &suri, &nsp, &rk);
free(newdir);
if (PMIX_SUCCESS == rc) {
if (NULL != mca_ptl_tcp_component.super.uri) {
free(mca_ptl_tcp_component.super.uri);
@ -972,11 +1011,13 @@ static pmix_status_t df_search(char *dirname, char *prefix,
(*nspace) = nsp;
*rank = rk;
closedir(cur_dirp);
free(newdir);
return PMIX_SUCCESS;
}
free(nsp);
}
}
free(newdir);
}
closedir(cur_dirp);
return PMIX_ERR_NOT_FOUND;

Просмотреть файл

@ -47,7 +47,8 @@ typedef struct {
struct sockaddr_storage connection;
char *session_filename;
char *system_filename;
pid_t tool_pid;
int wait_to_connect;
int max_retries;
} pmix_ptl_tcp_component_t;
extern pmix_ptl_tcp_component_t mca_ptl_tcp_component;

Просмотреть файл

@ -113,7 +113,8 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo,
.disable_ipv6_family = true,
.session_filename = NULL,
.system_filename = NULL,
.tool_pid = 0
.wait_to_connect = 4,
.max_retries = 2
};
static char **split_and_resolve(char **orig_str, char *name);
@ -132,13 +133,6 @@ static int component_register(void)
PMIX_MCA_BASE_VAR_SCOPE_LOCAL,
&mca_ptl_tcp_component.super.uri);
(void)pmix_mca_base_component_var_register(component, "tool_pid",
"pid of a tool we are to connect to",
PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
PMIX_INFO_LVL_2,
PMIX_MCA_BASE_VAR_SCOPE_LOCAL,
&mca_ptl_tcp_component.tool_pid);
(void)pmix_mca_base_component_var_register(component, "if_include",
"Comma-delimited list of devices and/or CIDR notation of TCP networks (e.g., \"eth0,192.168.0.0/16\"). Mutually exclusive with ptl_tcp_if_exclude.",
PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
@ -192,6 +186,20 @@ static int component_register(void)
PMIX_MCA_BASE_VAR_SCOPE_READONLY,
&mca_ptl_tcp_component.disable_ipv6_family);
(void)pmix_mca_base_component_var_register(component, "connection_wait_time",
"Number of seconds to wait for the server connection file to appear",
PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
PMIX_INFO_LVL_4,
PMIX_MCA_BASE_VAR_SCOPE_READONLY,
&mca_ptl_tcp_component.wait_to_connect);
(void)pmix_mca_base_component_var_register(component, "max_retries",
"Number of times to look for the connection file before quitting",
PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
PMIX_INFO_LVL_4,
PMIX_MCA_BASE_VAR_SCOPE_READONLY,
&mca_ptl_tcp_component.max_retries);
return PMIX_SUCCESS;
}

Просмотреть файл

@ -111,7 +111,7 @@ static int rte_init(void)
if (ORTE_PROC_IS_TOOL) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto fn_fail;

Просмотреть файл

@ -65,7 +65,7 @@ ORTE_DECLSPEC int orte_ess_base_app_setup(bool db_restrict_local);
ORTE_DECLSPEC int orte_ess_base_app_finalize(void);
ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report);
ORTE_DECLSPEC int orte_ess_base_tool_setup(uint8_t flags);
ORTE_DECLSPEC int orte_ess_base_tool_setup(opal_list_t *flags);
ORTE_DECLSPEC int orte_ess_base_tool_finalize(void);
ORTE_DECLSPEC int orte_ess_base_orted_setup(void);

Просмотреть файл

@ -90,7 +90,7 @@ static void infocb(int status,
OPAL_PMIX_WAKEUP_THREAD(lock);
}
int orte_ess_base_tool_setup(uint8_t flags)
int orte_ess_base_tool_setup(opal_list_t *flags)
{
int ret;
char *error = NULL;
@ -98,7 +98,7 @@ int orte_ess_base_tool_setup(uint8_t flags)
orte_jobid_t jobid;
orte_vpid_t vpid;
opal_list_t info;
opal_value_t *kv, val;
opal_value_t *kv, *knext, val;
opal_pmix_query_t *q;
opal_pmix_lock_t lock;
opal_buffer_t *buf;
@ -181,26 +181,12 @@ int orte_ess_base_tool_setup(uint8_t flags)
kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid;
kv->type = OPAL_VPID;
opal_list_append(&info, &kv->super);
if (0 != flags) {
/* instruct the PMIx layer on if/how to connect */
kv = OBJ_NEW(opal_value_t);
if (0x01 == flags) {
kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
} else if (0x02 == flags) {
kv->key = strdup(OPAL_PMIX_CONNECT_SYSTEM_FIRST);
} else if (0x04 == flags) {
kv->key = strdup(OPAL_PMIX_CONNECT_TO_SYSTEM);
} else {
opal_output(0, "UNKNOWN CONNECTION FLAG %0x", flags);
error = "unknown connection flags";
ret = ORTE_ERR_BAD_PARAM;
OPAL_LIST_DESTRUCT(&info);
OBJ_RELEASE(kv);
goto error;
if (NULL != flags) {
/* pass along any directives */
OPAL_LIST_FOREACH_SAFE(kv, knext, flags, opal_value_t) {
opal_list_remove_item(flags, &kv->super);
opal_list_append(&info, &kv->super);
}
kv->data.flag = true;
kv->type = OPAL_BOOL;
opal_list_append(&info, &kv->super);
}
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -92,7 +92,7 @@ static int rte_init(void)
if (ORTE_PROC_IS_TOOL) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto error;

Просмотреть файл

@ -87,7 +87,7 @@ static int rte_init(void)
if (ORTE_PROC_IS_TOOL) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto error;

Просмотреть файл

@ -91,7 +91,7 @@ static int rte_init(void)
if (ORTE_PROC_IS_TOOL) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(0))) {
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto error;

Просмотреть файл

@ -35,6 +35,9 @@ typedef struct {
bool system_server_first;
bool system_server_only;
bool do_not_connect;
int wait_to_connect;
int num_retries;
int pid;
} orte_ess_tool_component_t;
ORTE_MODULE_DECLSPEC extern orte_ess_tool_component_t mca_ess_tool_component;

Просмотреть файл

@ -63,21 +63,24 @@ orte_ess_tool_component_t mca_ess_tool_component = {
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
},
.async = false
.async = false,
.system_server_first = false,
.system_server_only = false,
.wait_to_connect = 0,
.num_retries = 0,
.pid = 0
};
static int tool_component_register(void)
{
mca_base_component_t *c = &mca_ess_tool_component.super.base_version;
mca_ess_tool_component.async = false;
(void) mca_base_component_var_register (c, "async_progress", "Setup an async progress thread",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.async);
mca_ess_tool_component.do_not_connect = false;
(void) mca_base_component_var_register (c, "do_not_connect",
"Do not connect to a PMIx server",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
@ -85,7 +88,6 @@ static int tool_component_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.do_not_connect);
mca_ess_tool_component.system_server_first = false;
(void) mca_base_component_var_register (c, "system_server_first",
"Look for a system PMIx server first",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
@ -93,13 +95,33 @@ static int tool_component_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.system_server_first);
mca_ess_tool_component.system_server_only = false;
(void) mca_base_component_var_register (c, "system_server_only",
"Only connect to a system server (and not an mpirun)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.system_server_only);
(void) mca_base_component_var_register (c, "wait_to_connect",
"Time in seconds to wait before retrying connection to server",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.wait_to_connect);
(void) mca_base_component_var_register (c, "num_retries",
"Number of times to retry connecting to server",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.num_retries);
(void) mca_base_component_var_register (c, "server_pid",
"PID of the server to which we are to connect",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_2,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_tool_component.pid);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -32,6 +32,7 @@
#endif
#include "opal/runtime/opal_progress_threads.h"
#include "opal/mca/pmix/pmix_types.h"
#include "orte/util/show_help.h"
#include "orte/mca/plm/base/base.h"
@ -63,7 +64,8 @@ static int rte_init(void)
{
int ret;
char *error = NULL;
uint8_t flags;
opal_list_t flags;
opal_value_t *val;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -81,21 +83,57 @@ static int rte_init(void)
}
/* setup the tool connection flags */
flags = 0;
OBJ_CONSTRUCT(&flags, opal_list_t);
if (mca_ess_tool_component.do_not_connect) {
flags = 0x01;
val = OBJ_NEW(opal_value_t);
val->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
val->type = OPAL_BOOL;
val->data.flag = true;
opal_list_append(&flags, &val->super);
} else if (mca_ess_tool_component.system_server_first) {
flags = 0x02;
val = OBJ_NEW(opal_value_t);
val->key = strdup(OPAL_PMIX_CONNECT_SYSTEM_FIRST);
val->type = OPAL_BOOL;
val->data.flag = true;
opal_list_append(&flags, &val->super);
} else if (mca_ess_tool_component.system_server_only) {
flags = 0x04;
val = OBJ_NEW(opal_value_t);
val->key = strdup(OPAL_PMIX_CONNECT_TO_SYSTEM);
val->type = OPAL_BOOL;
val->data.flag = true;
opal_list_append(&flags, &val->super);
}
if (0 < mca_ess_tool_component.wait_to_connect) {
val = OBJ_NEW(opal_value_t);
val->key = strdup(OPAL_PMIX_CONNECT_RETRY_DELAY);
val->type = OPAL_UINT32;
val->data.uint32 = mca_ess_tool_component.wait_to_connect;
opal_list_append(&flags, &val->super);
}
if (0 < mca_ess_tool_component.num_retries) {
val = OBJ_NEW(opal_value_t);
val->key = strdup(OPAL_PMIX_CONNECT_MAX_RETRIES);
val->type = OPAL_UINT32;
val->data.uint32 = mca_ess_tool_component.num_retries;
opal_list_append(&flags, &val->super);
}
if (0 < mca_ess_tool_component.pid) {
val = OBJ_NEW(opal_value_t);
val->key = strdup(OPAL_PMIX_SERVER_PIDINFO);
val->type = OPAL_PID;
val->data.pid = mca_ess_tool_component.pid;
opal_list_append(&flags, &val->super);
}
/* do the standard tool init */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(flags))) {
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(&flags))) {
ORTE_ERROR_LOG(ret);
OPAL_LIST_DESTRUCT(&flags);
error = "orte_ess_base_tool_setup";
goto error;
}
OPAL_LIST_DESTRUCT(&flags);
return ORTE_SUCCESS;

Просмотреть файл

@ -105,6 +105,7 @@ static struct {
bool run_as_root;
bool set_sid;
bool daemonize;
bool system_server;
} myglobals;
static opal_cmd_line_init_t cmd_line_init[] = {
@ -165,6 +166,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
NULL, OPAL_CMD_LINE_TYPE_STRING,
"List of hosts to invoke processes on" },
{ NULL, '\0', "system-server", "system-server", 0,
&myglobals.system_server, OPAL_CMD_LINE_TYPE_BOOL,
"Provide a system-level server connection point - only one allowed per node" },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -280,10 +285,12 @@ int main(int argc, char *argv[])
exit(0);
}
/* we should act as system-level PMIx server */
opal_setenv("OMPI_MCA_pmix_system_server", "1", true, &environ);
/* and as session-level PMIx server */
opal_setenv("OMPI_MCA_pmix_session_server", "1", true, &environ);
if (myglobals.system_server) {
/* we should act as system-level PMIx server */
opal_setenv(OPAL_MCA_PREFIX"pmix_system_server", "1", true, &environ);
}
/* always act as session-level PMIx server */
opal_setenv(OPAL_MCA_PREFIX"pmix_session_server", "1", true, &environ);
/* Setup MCA params */
orte_register_params();

Просмотреть файл

@ -139,6 +139,9 @@ struct orte_cmd_options_t {
int timeout;
bool report_state_on_timeout;
bool get_stack_traces;
int pid;
bool system_server_only;
bool system_server_first;
};
typedef struct orte_cmd_options_t orte_cmd_options_t;
static orte_cmd_options_t orte_cmd_options = {0};
@ -471,6 +474,22 @@ static opal_cmd_line_init_t cmd_line_init[] = {
&orte_cmd_options.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL,
"Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM },
/* look first for a system server */
{ NULL, '\0', "system-server-first", "system-server-first", 0,
&orte_cmd_options.system_server_first, OPAL_CMD_LINE_TYPE_BOOL,
"First look for a system server and connect to it if found", OPAL_CMD_LINE_OTYPE_DVM },
/* connect only to a system server */
{ NULL, '\0', "system-server-only", "system-server-only", 0,
&orte_cmd_options.system_server_only, OPAL_CMD_LINE_TYPE_BOOL,
"Connect only to a system-level server", OPAL_CMD_LINE_OTYPE_DVM },
/* provide a connection PID */
{ NULL, '\0', "pid", "pid", 1,
&orte_cmd_options.pid, OPAL_CMD_LINE_TYPE_INT,
"PID of the session-level daemon to which we should connect",
OPAL_CMD_LINE_OTYPE_DVM },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -647,9 +666,38 @@ int prun(int argc, char *argv[])
return rc;
}
/* tell the ess/tool component that we want to connect to a system-level
/* Check for help request */
if (orte_cmd_options.help) {
char *str, *args = NULL;
args = opal_cmd_line_get_usage_msg(orte_cmd_line);
str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
"prun", "PSVR", OPAL_VERSION,
"prun", args,
PACKAGE_BUGREPORT);
if (NULL != str) {
printf("%s", str);
free(str);
}
free(args);
/* If someone asks for help, that should be all we do */
exit(0);
}
/* tell the ess/tool component that we want to connect only to a system-level
* PMIx server */
opal_setenv("OMPI_MCA_ess_tool_system_server_only", "1", true, &environ);
if (orte_cmd_options.system_server_only) {
opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_only", "1", true, &environ);
}
if (orte_cmd_options.system_server_first) {
opal_setenv(OPAL_MCA_PREFIX"ess_tool_system_server_first", "1", true, &environ);
}
/* if they specified the DVM's pid, then pass it along */
if (0 != orte_cmd_options.pid) {
asprintf(&param, "%d", orte_cmd_options.pid);
opal_setenv(OPAL_MCA_PREFIX"ess_tool_server_pid", param, true, &environ);
free(param);
}
/* now initialize ORTE */
if (OPAL_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
@ -665,7 +713,6 @@ int prun(int argc, char *argv[])
val->type = OPAL_BOOL;
val->data.flag = true;
opal_list_append(&info, &val->super);
fprintf(stderr, "TERMINATING DVM...");
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
rc = opal_pmix.job_control(NULL, &info, infocb, (void*)&lock);