The Windows PLS now is able to spawn process locally.
This commit was SVN r13074.
Этот коммит содержится в:
родитель
d2921a9d42
Коммит
c8222b57eb
@ -69,10 +69,6 @@ struct orte_pls_process_component_t {
|
||||
bool force_process;
|
||||
int delay;
|
||||
int priority;
|
||||
char *agent_param;
|
||||
char** agent_argv;
|
||||
int agent_argc;
|
||||
char* agent_path;
|
||||
char* orted;
|
||||
orte_std_cntr_t num_children;
|
||||
orte_std_cntr_t num_concurrent;
|
||||
|
@ -115,9 +115,6 @@ int orte_pls_process_component_open(void)
|
||||
OBJ_CONSTRUCT(&mca_pls_process_component.lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_pls_process_component.cond, opal_condition_t);
|
||||
mca_pls_process_component.num_children = 0;
|
||||
mca_pls_process_component.agent_argv = NULL;
|
||||
mca_pls_process_component.agent_argc = 0;
|
||||
mca_pls_process_component.agent_path = NULL;
|
||||
|
||||
/* lookup parameters */
|
||||
mca_base_param_reg_int(c, "debug",
|
||||
@ -181,11 +178,6 @@ int orte_pls_process_component_open(void)
|
||||
false, false, 1, &tmp);
|
||||
mca_pls_process_component.assume_same_shell = OPAL_INT_TO_BOOL(tmp);
|
||||
|
||||
mca_base_param_reg_string(c, "agent",
|
||||
"The command used to launch executables on remote nodes (typically either \"ssh\" or \"process\")",
|
||||
false, false, "ssh : process",
|
||||
&mca_pls_process_component.agent_param);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -196,56 +188,11 @@ extern char **environ;
|
||||
|
||||
orte_pls_base_module_t *orte_pls_process_component_init(int *priority)
|
||||
{
|
||||
char *bname;
|
||||
size_t i;
|
||||
|
||||
/* if we are not an HNP, then don't select us */
|
||||
if (!orte_process_info.seed) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Take the string that was given to us by the pla_process_agent MCA
|
||||
param and search for it */
|
||||
mca_pls_process_component.agent_argv =
|
||||
search(mca_pls_process_component.agent_param);
|
||||
mca_pls_process_component.agent_argc =
|
||||
opal_argv_count(mca_pls_process_component.agent_argv);
|
||||
mca_pls_process_component.agent_path = NULL;
|
||||
if (mca_pls_process_component.agent_argc > 0) {
|
||||
/* If the agent is ssh, and debug was not selected, then
|
||||
automatically add "-x" */
|
||||
|
||||
bname = opal_basename(mca_pls_process_component.agent_argv[0]);
|
||||
if (NULL != bname && 0 == strcmp(bname, "ssh") &&
|
||||
mca_pls_process_component.debug == 0) {
|
||||
for (i = 1; NULL != mca_pls_process_component.agent_argv[i]; ++i) {
|
||||
if (0 == strcasecmp("-x",
|
||||
mca_pls_process_component.agent_argv[i])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == mca_pls_process_component.agent_argv[i]) {
|
||||
opal_argv_append(&mca_pls_process_component.agent_argc,
|
||||
&mca_pls_process_component.agent_argv, "-x");
|
||||
}
|
||||
}
|
||||
if (NULL != bname) {
|
||||
free(bname);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't find the agent in the path, then don't use this
|
||||
component */
|
||||
if (NULL == mca_pls_process_component.agent_argv ||
|
||||
NULL == mca_pls_process_component.agent_argv[0]) {
|
||||
return NULL;
|
||||
}
|
||||
mca_pls_process_component.agent_path =
|
||||
opal_path_findv(mca_pls_process_component.agent_argv[0], X_OK,
|
||||
environ, NULL);
|
||||
if (NULL == mca_pls_process_component.agent_path) {
|
||||
return NULL;
|
||||
}
|
||||
*priority = mca_pls_process_component.priority;
|
||||
|
||||
return &orte_pls_process_module;
|
||||
@ -260,15 +207,6 @@ int orte_pls_process_component_close(void)
|
||||
if (NULL != mca_pls_process_component.orted) {
|
||||
free(mca_pls_process_component.orted);
|
||||
}
|
||||
if (NULL != mca_pls_process_component.agent_param) {
|
||||
free(mca_pls_process_component.agent_param);
|
||||
}
|
||||
if (NULL != mca_pls_process_component.agent_argv) {
|
||||
opal_argv_free(mca_pls_process_component.agent_argv);
|
||||
}
|
||||
if (NULL != mca_pls_process_component.agent_path) {
|
||||
free(mca_pls_process_component.agent_path);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -93,12 +93,11 @@
|
||||
#include "orte/mca/pls/base/pls_private.h"
|
||||
#include "orte/mca/pls/process/pls_process.h"
|
||||
|
||||
_CRTIMP extern char **environ; //daniel
|
||||
//_CRTIMP extern char **environ; //daniel
|
||||
//extern char **environ;
|
||||
|
||||
|
||||
#define rindex(a,b) strrchr((a),(b))
|
||||
//daniel
|
||||
#define rindex(a,b) strrchr((a),(b)) //daniel
|
||||
|
||||
#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS
|
||||
static int orte_pls_process_launch_threaded(orte_jobid_t jobid);
|
||||
@ -139,7 +138,8 @@ static const char * orte_pls_process_shell_name[] = {
|
||||
"ksh",
|
||||
"sh",
|
||||
"unknown"
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/* local global storage of timing variables */
|
||||
static unsigned long mintime=999999999, miniter, maxtime=0, maxiter;
|
||||
@ -158,9 +158,8 @@ static opal_list_t active_daemons;
|
||||
static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_shell * shell)
|
||||
{
|
||||
char ** argv;
|
||||
int argc, rc, nfds, i;
|
||||
int rc, nfds;
|
||||
int fd[2];
|
||||
pid_t pid; //daniel
|
||||
|
||||
HANDLE myPipeFd[2];
|
||||
SECURITY_ATTRIBUTES securityAttr;
|
||||
@ -179,24 +178,13 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
|
||||
/*
|
||||
* Build argv array
|
||||
*/
|
||||
argv = opal_argv_copy(mca_pls_process_component.agent_argv);
|
||||
argc = mca_pls_process_component.agent_argc;
|
||||
opal_argv_append(&argc, &argv, node->nodename);
|
||||
opal_argv_append(&argc, &argv, "echo $SHELL");
|
||||
|
||||
/* daniel *******************
|
||||
*/
|
||||
|
||||
/*
|
||||
if (pipe(fd)) {
|
||||
opal_output(0, "pls:process: pipe failed with errno=%d\n", errno);
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
securityAttr.nLength = sizeof(SECURITY_ATTRIBUTES); // Size of struct
|
||||
securityAttr.lpSecurityDescriptor = NULL; // Default descriptor
|
||||
securityAttr.bInheritHandle = TRUE; // Inheritable
|
||||
|
||||
// Create the pipe
|
||||
if (CreatePipe(&myPipeFd[0], &myPipeFd[1], &securityAttr, 0)) {
|
||||
|
||||
@ -230,7 +218,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
|
||||
|
||||
// Start the child process.
|
||||
if( !CreateProcess( argv[0], //module name NULL,
|
||||
(LPSTR) _tcsdup(TEXT((const char *)argv)), // Command line szCmdline,
|
||||
NULL, //(LPSTR)(const char *) argv,
|
||||
NULL, // Process handle not inheritable
|
||||
NULL, // Thread handle not inheritable
|
||||
TRUE, // Set handle inheritance to TRUE;
|
||||
@ -324,7 +312,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
|
||||
}
|
||||
|
||||
/* Search for the substring of known shell-names */
|
||||
for (i = 0; i < (int)(sizeof (orte_pls_process_shell_name)/
|
||||
/* for (i = 0; i < (int)(sizeof (orte_pls_process_shell_name)/
|
||||
sizeof(orte_pls_process_shell_name[0])); i++) {
|
||||
char *sh_name = NULL;
|
||||
|
||||
@ -334,7 +322,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
|
||||
|
||||
/* We cannot use "echo -n $SHELL" because -n is not portable. Therefore
|
||||
* we have to remove the "\n" */
|
||||
if ( sh_name[strlen(sh_name)-1] == '\n' ) {
|
||||
/* if ( sh_name[strlen(sh_name)-1] == '\n' ) {
|
||||
sh_name[strlen(sh_name)-1] = '\0';
|
||||
}
|
||||
if ( 0 == strcmp(sh_name, orte_pls_process_shell_name[i]) ) {
|
||||
@ -343,6 +331,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
if (mca_pls_process_component.debug) {
|
||||
opal_output(0, "pls:process: node:%s has SHELL: %s\n",
|
||||
node->nodename, orte_pls_process_shell_name[*shell]);
|
||||
@ -523,7 +512,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
orte_mapped_node_t *rmaps_node;
|
||||
orte_std_cntr_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
int node_name_index1;
|
||||
int node_name_index2;
|
||||
int proc_name_index;
|
||||
int local_exec_index, local_exec_index_end;
|
||||
@ -531,7 +519,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
char *uri, *param;
|
||||
char **argv = NULL;
|
||||
char *prefix_dir;
|
||||
int argc;
|
||||
int argc = 0;
|
||||
int rc;
|
||||
char *lib_base = NULL, *bin_base = NULL;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
@ -640,9 +628,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
/*
|
||||
* Build argv array
|
||||
*/
|
||||
argv = opal_argv_copy(mca_pls_process_component.agent_argv);
|
||||
argc = mca_pls_process_component.agent_argc;
|
||||
node_name_index1 = argc;
|
||||
opal_argv_append(&argc, &argv, "<template>");
|
||||
|
||||
/* add the daemon command (as specified by user) */
|
||||
@ -775,15 +760,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
opal_list_append(&active_daemons, &dmn->super);
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index1]);
|
||||
if (NULL != rmaps_node->username &&
|
||||
0 != strlen (rmaps_node->username)) {
|
||||
asprintf (&argv[node_name_index1], "%s@%s",
|
||||
rmaps_node->username, rmaps_node->nodename);
|
||||
} else {
|
||||
argv[node_name_index1] = strdup(rmaps_node->nodename);
|
||||
}
|
||||
|
||||
free(argv[node_name_index2]);
|
||||
argv[node_name_index2] = strdup(rmaps_node->nodename);
|
||||
|
||||
@ -812,18 +788,18 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
/* pid = fork();
|
||||
if (pid < 0) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
*/
|
||||
/* child */
|
||||
if (pid == 0) {
|
||||
/*if (pid == 0)*/ {
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
int fdmax = sysconf(_SC_OPEN_MAX);
|
||||
|
||||
if (mca_pls_process_component.debug) {
|
||||
opal_output(0, "pls:process: launching on node %s\n",
|
||||
@ -902,7 +878,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
opal_output(0, "pls:process: reset PATH: %s", newenv);
|
||||
}
|
||||
free(newenv);
|
||||
|
||||
#if 0
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
@ -918,6 +894,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Since this is a local execution, we need to
|
||||
@ -958,7 +935,8 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
rmaps_node->nodename);
|
||||
}
|
||||
exec_argv = argv;
|
||||
exec_path = strdup(mca_pls_process_component.agent_path);
|
||||
//exec_path = strdup(mca_pls_process_component.agent_path);
|
||||
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
@ -970,17 +948,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
if (!mca_pls_process_component.debug) {
|
||||
/* setup stdin */
|
||||
int fd = open("/dev/null", O_RDWR);
|
||||
dup2(fd, 0);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* close all file descriptors w/ exception of stdin/stdout/stderr */
|
||||
for(fd=3; fd<fdmax; fd++)
|
||||
close(fd);
|
||||
|
||||
/* Set signal handlers back to the default. Do this close
|
||||
to the execve() because the event library may (and likely
|
||||
will) reset them. If we don't do this, the event
|
||||
@ -1017,11 +984,12 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
execve(exec_path, exec_argv, env);
|
||||
opal_output(0, "pls:process: execv failed with errno=%d\n", errno);
|
||||
exit(-1);
|
||||
|
||||
} else { /* father */
|
||||
//execve(exec_path, exec_argv, env);
|
||||
pid = _spawnve( _P_DETACH, exec_path, exec_argv, env); //daniel
|
||||
|
||||
opal_output(0, "pls:process: execv hopefully started (pid %llx)\n", pid);
|
||||
#if 0
|
||||
} /*else*/ { /* father */
|
||||
OPAL_THREAD_LOCK(&mca_pls_process_component.lock);
|
||||
/* JJH Bug:
|
||||
* If we are in '--debug-daemons' we keep the ssh connection
|
||||
@ -1039,7 +1007,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
|
||||
opal_condition_wait(&mca_pls_process_component.cond, &mca_pls_process_component.lock);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_pls_process_component.lock);
|
||||
|
||||
#endif
|
||||
/* setup callback on sigchild - wait until setup above is complete
|
||||
* as the callback can occur in the call to orte_wait_cb
|
||||
*/
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user