A bit of cleanup and a couple of bug fixes for remote orted launching
using orteprobe. Created a header file for orte_setup_hnp. [HNP = Head Node Process] General cleanup and added a bit of documentation in orte_setup_hnp.c Also fixed a cellid tokens issue (circa line 285) Changed the launched scope from private to public In orteprobe: - added reference to orted.h to avoid duplicate header contents in orteprobe.h - removed the version tag, and put in a verbose argument - Fixed a buffer packing problem that was causing the parent from receiving the proper contact information for the new daemon. This commit was SVN r6802.
Этот коммит содержится в:
родитель
b405316075
Коммит
afe7e687cb
@ -63,19 +63,10 @@
|
||||
#include "mca/errmgr/errmgr.h"
|
||||
|
||||
#include "runtime/runtime.h"
|
||||
#include "runtime/orte_setup_hnp.h"
|
||||
|
||||
extern char **environ;
|
||||
|
||||
/*
|
||||
* Local data structure
|
||||
*/
|
||||
typedef struct {
|
||||
char *target_cluster;
|
||||
char *headnode;
|
||||
orte_process_name_t *name;
|
||||
orte_jobid_t jobid;
|
||||
} orte_setup_hnp_cb_data_t;
|
||||
|
||||
/* Local condition variables and mutex
|
||||
*/
|
||||
static opal_mutex_t orte_setup_hnp_mutex;
|
||||
@ -85,7 +76,7 @@ static int orte_setup_hnp_rc;
|
||||
/* Local uri storage */
|
||||
static char *orte_setup_hnp_orted_uri;
|
||||
|
||||
static orte_setup_hnp_cb_data_t orte_setup_hnp_cbdata = {NULL, NULL, NULL, 0};
|
||||
static orte_setup_hnp_cb_data_t orte_setup_hnp_cbdata;
|
||||
|
||||
/*
|
||||
* NON-BLOCKING RECEIVER
|
||||
@ -161,20 +152,24 @@ int orte_setup_hnp(char *target_cluster, char *headnode, char *username)
|
||||
goto MOVEON;
|
||||
|
||||
} else { /* lookup the headnode's cellid */
|
||||
hn = strdup(headnode);
|
||||
hn = strdup(headnode);
|
||||
keys[0] = ORTE_RDS_FE_NAME;
|
||||
keys[1] = ORTE_RDS_FE_SSH;
|
||||
keys[2] = ORTE_CELLID_KEY;
|
||||
keys[3] = NULL;
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
|
||||
ORTE_RESOURCE_SEGMENT,
|
||||
NULL, keys, &cnt, &values))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
|
||||
rc = orte_gpr.get(ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
|
||||
ORTE_RESOURCE_SEGMENT,
|
||||
NULL, keys, &cnt, &values);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (0 == cnt || 0 == values[0]->cnt) { /* nothing found */
|
||||
/* Nothing found */
|
||||
if (0 == cnt || 0 == values[0]->cnt) {
|
||||
goto MOVEON;
|
||||
}
|
||||
|
||||
on_gpr = true;
|
||||
for (i=0; i < cnt; i++) {
|
||||
keyvals = values[i]->keyvals;
|
||||
@ -202,7 +197,8 @@ int orte_setup_hnp(char *target_cluster, char *headnode, char *username)
|
||||
|
||||
MOVEON:
|
||||
if (NULL != values) {
|
||||
for (i=0; i < cnt; i++) OBJ_RELEASE(values[i]);
|
||||
for (i=0; i < cnt; i++)
|
||||
OBJ_RELEASE(values[i]);
|
||||
free(values);
|
||||
}
|
||||
|
||||
@ -214,6 +210,7 @@ MOVEON:
|
||||
* synonymous with the headnode name), a headnode name (on a named or
|
||||
* unnamed target_cluster), or both.
|
||||
*/
|
||||
|
||||
/* get new cellid for this site/resource */
|
||||
if (NULL != target_cluster) {
|
||||
cellname = strdup(target_cluster);
|
||||
@ -223,28 +220,33 @@ MOVEON:
|
||||
*/
|
||||
cellname = strdup(headnode);
|
||||
}
|
||||
|
||||
/* can't know the site name, so it becomes "unknown" */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.create_cellid(&cellid, "UNKNOWN",
|
||||
cellname))) {
|
||||
rc = orte_ns.create_cellid(&cellid, "unknown", cellname);
|
||||
if (ORTE_SUCCESS != rc ) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(cellname);
|
||||
return rc;
|
||||
}
|
||||
/* now store the cell info on the resource segment of the registry */
|
||||
|
||||
/*
|
||||
* Store the cell info on the resource segment of the registry
|
||||
*/
|
||||
value = OBJ_NEW(orte_gpr_value_t);
|
||||
if (NULL == value) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
value->addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
|
||||
value->segment = strdup(ORTE_RESOURCE_SEGMENT);
|
||||
value->segment = strdup(ORTE_RESOURCE_SEGMENT);
|
||||
|
||||
value->cnt = 4;
|
||||
value->cnt = 4;
|
||||
value->keyvals = (orte_gpr_keyval_t**)malloc(value->cnt * sizeof(orte_gpr_keyval_t*));
|
||||
if (NULL == value->keyvals) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (i=0; i < value->cnt; i++) {
|
||||
value->keyvals[i] = OBJ_NEW(orte_gpr_keyval_t);
|
||||
if (NULL == value->keyvals[i]) {
|
||||
@ -252,21 +254,29 @@ MOVEON:
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
value->keyvals[0]->key = strdup(ORTE_RDS_NAME);
|
||||
value->keyvals[0]->type = ORTE_STRING;
|
||||
|
||||
/* Set Cell Name */
|
||||
value->keyvals[0]->key = strdup(ORTE_RDS_NAME);
|
||||
value->keyvals[0]->type = ORTE_STRING;
|
||||
value->keyvals[0]->value.strptr = strdup(cellname);
|
||||
value->keyvals[1]->key = strdup(ORTE_CELLID_KEY);
|
||||
value->keyvals[1]->type = ORTE_CELLID;
|
||||
|
||||
/* Set Cell ID */
|
||||
value->keyvals[1]->key = strdup(ORTE_CELLID_KEY);
|
||||
value->keyvals[1]->type = ORTE_CELLID;
|
||||
value->keyvals[1]->value.cellid = cellid;
|
||||
value->keyvals[2]->key = strdup(ORTE_RDS_FE_NAME);
|
||||
|
||||
/* Set Front End Name */
|
||||
value->keyvals[2]->key = strdup(ORTE_RDS_FE_NAME);
|
||||
value->keyvals[2]->type = ORTE_STRING;
|
||||
if (NULL == headnode) {
|
||||
value->keyvals[2]->value.strptr = strdup(cellname);
|
||||
} else {
|
||||
value->keyvals[2]->value.strptr = strdup(headnode);
|
||||
}
|
||||
value->keyvals[3]->key = strdup(ORTE_RDS_FE_SSH);
|
||||
value->keyvals[3]->type = ORTE_BOOL;
|
||||
|
||||
/* Asssume ability to ssh to front end node*/
|
||||
value->keyvals[3]->key = strdup(ORTE_RDS_FE_SSH);
|
||||
value->keyvals[3]->type = ORTE_BOOL;
|
||||
value->keyvals[3]->value.tf_flag = true;
|
||||
|
||||
value->num_tokens = 3;
|
||||
@ -275,25 +285,27 @@ MOVEON:
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&(value->tokens[0]), cellid))) {
|
||||
|
||||
rc = orte_schema.get_node_tokens(&value->tokens, &value->num_tokens, cellid, cellname);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
value->tokens[1] = strdup("UNKNOWN"); /* site name is unknown */
|
||||
value->tokens[2] = strdup(cellname);
|
||||
|
||||
if (ORTE_SUCCESS != orte_gpr.put(1, &value)) {
|
||||
/* Place tokens in GPR */
|
||||
rc = orte_gpr.put(1, &value);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return rc;
|
||||
}
|
||||
|
||||
OBJ_RELEASE(value);
|
||||
free(cellname);
|
||||
|
||||
can_launch = true;
|
||||
}
|
||||
|
||||
orte_gpr.dump_segment(NULL, 0);
|
||||
|
||||
if (!can_launch || ORTE_CELLID_MAX == cellid) {
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
@ -312,28 +324,40 @@ MOVEON:
|
||||
OBJ_CONSTRUCT(&orte_setup_hnp_condition, opal_condition_t);
|
||||
|
||||
/* get a jobid for the probe */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.create_jobid(&jobid))) {
|
||||
rc = orte_ns.create_jobid(&jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* get a vpid for the probe */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, 1, &vpid))) {
|
||||
rc = orte_ns.reserve_range(jobid, 1, &vpid);
|
||||
if (ORTE_SUCCESS != rc ) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* initialize probe's process name... */
|
||||
rc = orte_ns.create_process_name(&(orte_setup_hnp_cbdata.name), cellid, jobid, vpid);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* ...and get string representation */
|
||||
if(ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string(&name_string, orte_setup_hnp_cbdata.name))) {
|
||||
rc = orte_ns.get_proc_name_string(&name_string, orte_setup_hnp_cbdata.name);
|
||||
if (ORTE_SUCCESS != rc ) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* setup callback data on sigchild */
|
||||
orte_setup_hnp_cbdata.target_cluster = strdup(target_cluster);
|
||||
if (NULL != target_cluster) {
|
||||
orte_setup_hnp_cbdata.target_cluster = strdup(target_cluster);
|
||||
} else {
|
||||
orte_setup_hnp_cbdata.target_cluster = NULL;
|
||||
}
|
||||
|
||||
orte_setup_hnp_cbdata.headnode = strdup(headnode);
|
||||
orte_setup_hnp_cbdata.jobid = jobid;
|
||||
|
||||
@ -405,7 +429,7 @@ MOVEON:
|
||||
/* pass along any parameters for the head node process
|
||||
* in case one needs to be created
|
||||
*/
|
||||
id = mca_base_param_register_string("scope",NULL,NULL,NULL,"private");
|
||||
id = mca_base_param_register_string("scope",NULL,NULL,NULL,"public");
|
||||
mca_base_param_lookup_string(id, ¶m);
|
||||
opal_argv_append(&argc, &argv, "--scope");
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
@ -459,25 +483,29 @@ MOVEON:
|
||||
* utilities, though, or we will lose all of our MCA parameters
|
||||
*/
|
||||
orte_system_finalize();
|
||||
|
||||
/*
|
||||
* now set the relevant MCA parameters to point us at the remote daemon...
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = opal_setenv("OMPI_MCA_gpr_replica_uri",
|
||||
orte_setup_hnp_orted_uri, true, &environ))) {
|
||||
rc = opal_setenv("OMPI_MCA_gpr_replica_uri",
|
||||
orte_setup_hnp_orted_uri, true, &environ);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
fprintf(stderr, "orte_setup_hnp: could not set gpr_replica_uri in environ\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_setenv("OMPI_MCA_ns_replica_uri",
|
||||
orte_setup_hnp_orted_uri, true, &environ))) {
|
||||
rc = opal_setenv("OMPI_MCA_ns_replica_uri",
|
||||
orte_setup_hnp_orted_uri, true, &environ);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
fprintf(stderr, "orte_setup_hnp: could not set ns_replica_uri in environ\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
opal_unsetenv("OMPI_MCA_seed", &environ);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_setenv("OMPI_MCA_universe_uri",
|
||||
orte_setup_hnp_orted_uri, true, &environ))) {
|
||||
rc = opal_setenv("OMPI_MCA_universe_uri",
|
||||
orte_setup_hnp_orted_uri, true, &environ);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
fprintf(stderr, "orte_setup_hnp: could not set universe_uri in environ\n");
|
||||
return rc;
|
||||
}
|
||||
@ -485,10 +513,12 @@ MOVEON:
|
||||
/*
|
||||
* ...re-init ourselves...
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_system_init())) {
|
||||
rc = orte_system_init();
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* ...and we are now ready to go!
|
||||
*/
|
||||
|
39
orte/runtime/orte_setup_hnp.h
Обычный файл
39
orte/runtime/orte_setup_hnp.h
Обычный файл
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Establish a Head Node Process on a cluster's front end
|
||||
*/
|
||||
|
||||
#ifndef ORTE_SETUP_HNP_H
|
||||
#define ORTE_SETUP_HNP_H
|
||||
|
||||
/*
|
||||
* Local data structure
|
||||
*/
|
||||
typedef struct {
|
||||
char *target_cluster;
|
||||
char *headnode;
|
||||
orte_process_name_t *name;
|
||||
orte_jobid_t jobid;
|
||||
} orte_setup_hnp_cb_data_t;
|
||||
|
||||
|
||||
int orte_setup_hnp(char *target_cluster, char *headnode, char *username);
|
||||
|
||||
#endif
|
@ -78,9 +78,9 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
|
||||
&orteprobe_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"This help message" },
|
||||
|
||||
{ NULL, NULL, NULL, '\0', NULL, "version", 0,
|
||||
&orteprobe_globals.version, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Show the orteprobe version" },
|
||||
{ NULL, NULL, NULL, NULL, NULL, "verbose", 0,
|
||||
&orteprobe_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Toggle Verbosity" },
|
||||
|
||||
{ NULL, NULL, NULL, 'd', NULL, "debug", 0,
|
||||
&orteprobe_globals.debug, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
@ -144,12 +144,13 @@ int main(int argc, char *argv[])
|
||||
memset(&orteprobe_globals, 0, sizeof(orteprobe_globals));
|
||||
cmd_line = OBJ_NEW(opal_cmd_line_t);
|
||||
opal_cmd_line_create(cmd_line, orte_cmd_line_opts);
|
||||
if (OMPI_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, true,
|
||||
argc, argv))) {
|
||||
|
||||
ret = opal_cmd_line_parse(cmd_line, true, argc, argv);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* check for help and version requests */
|
||||
/* check for help request */
|
||||
if (orteprobe_globals.help) {
|
||||
char *args = NULL;
|
||||
args = opal_cmd_line_get_usage_msg(cmd_line);
|
||||
@ -159,12 +160,6 @@ int main(int argc, char *argv[])
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (orteprobe_globals.version) {
|
||||
/* show version message */
|
||||
printf("...showing off my version!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to parse the probe's name and save in proc_info
|
||||
*/
|
||||
@ -172,7 +167,7 @@ int main(int argc, char *argv[])
|
||||
ret = orte_ns_base_convert_string_to_process_name(
|
||||
&orte_process_info.my_name, orteprobe_globals.name_string);
|
||||
if(ORTE_SUCCESS != ret) {
|
||||
fprintf(stderr, "Couldn't convert environmental string to probe's process name\n");
|
||||
fprintf(stderr, "orteprobe: Couldn't convert environmental string to probe's process name\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -297,16 +292,16 @@ int main(int argc, char *argv[])
|
||||
if (orteprobe_globals.requestor_string) {
|
||||
if(ORTE_SUCCESS != (ret = orte_rml.parse_uris(
|
||||
orteprobe_globals.requestor_string, &requestor, NULL))) {
|
||||
fprintf(stderr, "Couldn't parse environmental string for requestor's contact info\n");
|
||||
fprintf(stderr, "orteprobe: Couldn't parse environmental string for requestor's contact info\n");
|
||||
return 1;
|
||||
}
|
||||
/* set the contact info */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.set_uri(orteprobe_globals.requestor_string))) {
|
||||
fprintf(stderr, "Couldn't set contact info for requestor\n");
|
||||
fprintf(stderr, "orteprobe: Couldn't set contact info for requestor\n");
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "No contact info received for requestor\n");
|
||||
fprintf(stderr, "orteprobe: No contact info received for requestor\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -315,29 +310,37 @@ int main(int argc, char *argv[])
|
||||
*/
|
||||
if (ORTE_SUCCESS == (ret = orte_universe_exists(&univ))) {
|
||||
/* universe is here! send info back and die */
|
||||
fprintf(stderr, "contacted existing universe - sending contact info back\n");
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Contacted existing universe - sending contact info back\n");
|
||||
|
||||
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
|
||||
orted_uri_ptr = &(univ.seed_uri);
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_dps.pack(&buffer, &orted_uri_ptr, 1, ORTE_STRING))) {
|
||||
fprintf(stderr, "orteprobe: failed to pack contact info for existing universe\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (0 > orte_rml.send_buffer(&requestor, &buffer, ORTE_RML_TAG_PROBE, 0)) {
|
||||
fprintf(stderr, "orteprobe: comm failure when sending contact info for existing univ back to requestor\n");
|
||||
OBJ_DESTRUCT(&buffer);
|
||||
exit(1);
|
||||
}
|
||||
OBJ_DESTRUCT(&buffer);
|
||||
|
||||
OBJ_DESTRUCT(&buffer);
|
||||
} else {
|
||||
/* existing universe is not here or does not allow contact.
|
||||
* ensure we have a unique universe name, fork/exec an appropriate
|
||||
* daemon, and then tell whomever spawned us how to talk to the new
|
||||
* daemon
|
||||
*/
|
||||
fprintf(stderr, "could not connect to existing universe\n");
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Could not connect to existing universe\n");
|
||||
|
||||
if (ORTE_ERR_NOT_FOUND != ret) {
|
||||
fprintf(stderr, "existing universe did not respond\n");
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Existing universe did not respond\n");
|
||||
|
||||
/* if it exists but no contact could be established,
|
||||
* define unique name based on current one.
|
||||
*/
|
||||
@ -345,11 +348,13 @@ fprintf(stderr, "existing universe did not respond\n");
|
||||
free(orte_universe_info.name);
|
||||
orte_universe_info.name = NULL;
|
||||
pid = getpid();
|
||||
|
||||
if (0 > asprintf(&orte_universe_info.name, "%s-%d", universe, pid)) {
|
||||
fprintf(stderr, "orteprobe: failed to create unique universe name");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* setup to fork/exec the new universe */
|
||||
/* setup the pipe to get the contact info back */
|
||||
if (pipe(orted_pipe)) {
|
||||
@ -361,7 +366,8 @@ fprintf(stderr, "existing universe did not respond\n");
|
||||
id = mca_base_param_register_string("orted",NULL,NULL,NULL,"orted");
|
||||
mca_base_param_lookup_string(id, &orted);
|
||||
|
||||
fprintf(stderr, "using %s for orted command\n", orted);
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Using \"%s\" for orted command\n", orted);
|
||||
|
||||
/* Initialize the argv array */
|
||||
ortedargv = opal_argv_split(orted, ' ');
|
||||
@ -374,7 +380,8 @@ fprintf(stderr, "using %s for orted command\n", orted);
|
||||
/* setup the path */
|
||||
path = opal_path_findv(ortedargv[0], 0, environ, NULL);
|
||||
|
||||
fprintf(stderr, "path setup as %s\n", path);
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Path setup as \"%s\"\n", path);
|
||||
|
||||
/* tell the daemon it's the seed */
|
||||
opal_argv_append(&ortedargc, &ortedargv, "--seed");
|
||||
@ -394,7 +401,8 @@ fprintf(stderr, "path setup as %s\n", path);
|
||||
opal_argv_append(&ortedargc, &ortedargv, param);
|
||||
free(param);
|
||||
|
||||
fprintf(stderr, "forking now\n");
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Forking now\n");
|
||||
|
||||
/* Create the child process. */
|
||||
pid = fork ();
|
||||
@ -412,7 +420,8 @@ fprintf(stderr, "forking now\n");
|
||||
/* This is the parent process.
|
||||
Close write end first. */
|
||||
|
||||
fprintf(stderr, "attempting to read from daemon\n");
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: Attempting to read from daemon\n");
|
||||
|
||||
read(orted_pipe[0], orted_uri, 255);
|
||||
close(orted_pipe[0]);
|
||||
@ -421,26 +430,34 @@ fprintf(stderr, "attempting to read from daemon\n");
|
||||
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
|
||||
param = orted_uri;
|
||||
orted_uri_ptr = ¶m;
|
||||
if (ORTE_SUCCESS != (ret = orte_dps.pack(&buffer, &orted_uri_ptr, 1, ORTE_STRING))) {
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_dps.pack(&buffer, &orted_uri_ptr[0], 1, ORTE_STRING))) {
|
||||
fprintf(stderr, "orteprobe: failed to pack daemon uri\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (0 > orte_rml.send_buffer(&requestor, &buffer, ORTE_RML_TAG_PROBE, 0)) {
|
||||
fprintf(stderr, "orteprobe: could not send daemon uri info back to probe\n");
|
||||
OBJ_DESTRUCT(&buffer);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&buffer);
|
||||
}
|
||||
}
|
||||
|
||||
if(orteprobe_globals.verbose)
|
||||
fprintf(stderr, "orteprobe: All finished!\n");
|
||||
|
||||
/* cleanup */
|
||||
if (NULL != contact_path) {
|
||||
unlink(contact_path);
|
||||
}
|
||||
|
||||
if (NULL != log_path) {
|
||||
unlink(log_path);
|
||||
}
|
||||
|
||||
/* finalize the system */
|
||||
orte_finalize();
|
||||
|
||||
|
@ -27,28 +27,15 @@
|
||||
|
||||
#include "opal/util/cmd_line.h"
|
||||
#include "mca/mca.h"
|
||||
|
||||
/*
|
||||
* Definitions needed for communication
|
||||
*/
|
||||
#define ORTE_DAEMON_CMD ORTE_INT16
|
||||
|
||||
#define ORTE_DAEMON_HOSTFILE_CMD 0x01
|
||||
#define ORTE_DAEMON_SCRIPTFILE_CMD 0x02
|
||||
#define ORTE_DAEMON_CONTACT_QUERY_CMD 0x03
|
||||
#define ORTE_DAEMON_HEARTBEAT_CMD 0xfe
|
||||
#define ORTE_DAEMON_EXIT_CMD 0xff
|
||||
|
||||
#include "tools/orted/orted.h"
|
||||
|
||||
/*
|
||||
* Globals
|
||||
*/
|
||||
|
||||
typedef uint16_t orte_daemon_cmd_flag_t;
|
||||
|
||||
typedef struct {
|
||||
bool help;
|
||||
bool version;
|
||||
bool verbose;
|
||||
bool debug;
|
||||
char* name_string;
|
||||
char* requestor_string;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user