1
1

The Windows PLS now is able to spawn process locally.

This commit was SVN r13074.
Этот коммит содержится в:
George Bosilca 2007-01-11 00:16:58 +00:00
родитель d2921a9d42
Коммит c8222b57eb
3 изменённых файлов: 29 добавлений и 127 удалений

Просмотреть файл

@ -69,10 +69,6 @@ struct orte_pls_process_component_t {
bool force_process; bool force_process;
int delay; int delay;
int priority; int priority;
char *agent_param;
char** agent_argv;
int agent_argc;
char* agent_path;
char* orted; char* orted;
orte_std_cntr_t num_children; orte_std_cntr_t num_children;
orte_std_cntr_t num_concurrent; orte_std_cntr_t num_concurrent;

Просмотреть файл

@ -115,9 +115,6 @@ int orte_pls_process_component_open(void)
OBJ_CONSTRUCT(&mca_pls_process_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_pls_process_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_pls_process_component.cond, opal_condition_t); OBJ_CONSTRUCT(&mca_pls_process_component.cond, opal_condition_t);
mca_pls_process_component.num_children = 0; mca_pls_process_component.num_children = 0;
mca_pls_process_component.agent_argv = NULL;
mca_pls_process_component.agent_argc = 0;
mca_pls_process_component.agent_path = NULL;
/* lookup parameters */ /* lookup parameters */
mca_base_param_reg_int(c, "debug", mca_base_param_reg_int(c, "debug",
@ -181,11 +178,6 @@ int orte_pls_process_component_open(void)
false, false, 1, &tmp); false, false, 1, &tmp);
mca_pls_process_component.assume_same_shell = OPAL_INT_TO_BOOL(tmp); mca_pls_process_component.assume_same_shell = OPAL_INT_TO_BOOL(tmp);
mca_base_param_reg_string(c, "agent",
"The command used to launch executables on remote nodes (typically either \"ssh\" or \"process\")",
false, false, "ssh : process",
&mca_pls_process_component.agent_param);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -196,56 +188,11 @@ extern char **environ;
orte_pls_base_module_t *orte_pls_process_component_init(int *priority) orte_pls_base_module_t *orte_pls_process_component_init(int *priority)
{ {
char *bname;
size_t i;
/* if we are not an HNP, then don't select us */ /* if we are not an HNP, then don't select us */
if (!orte_process_info.seed) { if (!orte_process_info.seed) {
return NULL; return NULL;
} }
/* Take the string that was given to us by the pla_process_agent MCA
param and search for it */
mca_pls_process_component.agent_argv =
search(mca_pls_process_component.agent_param);
mca_pls_process_component.agent_argc =
opal_argv_count(mca_pls_process_component.agent_argv);
mca_pls_process_component.agent_path = NULL;
if (mca_pls_process_component.agent_argc > 0) {
/* If the agent is ssh, and debug was not selected, then
automatically add "-x" */
bname = opal_basename(mca_pls_process_component.agent_argv[0]);
if (NULL != bname && 0 == strcmp(bname, "ssh") &&
mca_pls_process_component.debug == 0) {
for (i = 1; NULL != mca_pls_process_component.agent_argv[i]; ++i) {
if (0 == strcasecmp("-x",
mca_pls_process_component.agent_argv[i])) {
break;
}
}
if (NULL == mca_pls_process_component.agent_argv[i]) {
opal_argv_append(&mca_pls_process_component.agent_argc,
&mca_pls_process_component.agent_argv, "-x");
}
}
if (NULL != bname) {
free(bname);
}
}
/* If we didn't find the agent in the path, then don't use this
component */
if (NULL == mca_pls_process_component.agent_argv ||
NULL == mca_pls_process_component.agent_argv[0]) {
return NULL;
}
mca_pls_process_component.agent_path =
opal_path_findv(mca_pls_process_component.agent_argv[0], X_OK,
environ, NULL);
if (NULL == mca_pls_process_component.agent_path) {
return NULL;
}
*priority = mca_pls_process_component.priority; *priority = mca_pls_process_component.priority;
return &orte_pls_process_module; return &orte_pls_process_module;
@ -260,15 +207,6 @@ int orte_pls_process_component_close(void)
if (NULL != mca_pls_process_component.orted) { if (NULL != mca_pls_process_component.orted) {
free(mca_pls_process_component.orted); free(mca_pls_process_component.orted);
} }
if (NULL != mca_pls_process_component.agent_param) {
free(mca_pls_process_component.agent_param);
}
if (NULL != mca_pls_process_component.agent_argv) {
opal_argv_free(mca_pls_process_component.agent_argv);
}
if (NULL != mca_pls_process_component.agent_path) {
free(mca_pls_process_component.agent_path);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -93,12 +93,11 @@
#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/base/pls_private.h"
#include "orte/mca/pls/process/pls_process.h" #include "orte/mca/pls/process/pls_process.h"
_CRTIMP extern char **environ; //daniel //_CRTIMP extern char **environ; //daniel
//extern char **environ; //extern char **environ;
#define rindex(a,b) strrchr((a),(b)) #define rindex(a,b) strrchr((a),(b)) //daniel
//daniel
#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS #if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS
static int orte_pls_process_launch_threaded(orte_jobid_t jobid); static int orte_pls_process_launch_threaded(orte_jobid_t jobid);
@ -139,7 +138,8 @@ static const char * orte_pls_process_shell_name[] = {
"ksh", "ksh",
"sh", "sh",
"unknown" "unknown"
}; };
/* local global storage of timing variables */ /* local global storage of timing variables */
static unsigned long mintime=999999999, miniter, maxtime=0, maxiter; static unsigned long mintime=999999999, miniter, maxtime=0, maxiter;
@ -158,9 +158,8 @@ static opal_list_t active_daemons;
static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_shell * shell) static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_shell * shell)
{ {
char ** argv; char ** argv;
int argc, rc, nfds, i; int rc, nfds;
int fd[2]; int fd[2];
pid_t pid; //daniel
HANDLE myPipeFd[2]; HANDLE myPipeFd[2];
SECURITY_ATTRIBUTES securityAttr; SECURITY_ATTRIBUTES securityAttr;
@ -179,24 +178,13 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
/* /*
* Build argv array * Build argv array
*/ */
argv = opal_argv_copy(mca_pls_process_component.agent_argv);
argc = mca_pls_process_component.agent_argc;
opal_argv_append(&argc, &argv, node->nodename);
opal_argv_append(&argc, &argv, "echo $SHELL");
/* daniel *******************
*/
/*
if (pipe(fd)) {
opal_output(0, "pls:process: pipe failed with errno=%d\n", errno);
return ORTE_ERR_IN_ERRNO;
}
*/
securityAttr.nLength = sizeof(SECURITY_ATTRIBUTES); // Size of struct securityAttr.nLength = sizeof(SECURITY_ATTRIBUTES); // Size of struct
securityAttr.lpSecurityDescriptor = NULL; // Default descriptor securityAttr.lpSecurityDescriptor = NULL; // Default descriptor
securityAttr.bInheritHandle = TRUE; // Inheritable securityAttr.bInheritHandle = TRUE; // Inheritable
// Create the pipe // Create the pipe
if (CreatePipe(&myPipeFd[0], &myPipeFd[1], &securityAttr, 0)) { if (CreatePipe(&myPipeFd[0], &myPipeFd[1], &securityAttr, 0)) {
@ -230,7 +218,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
// Start the child process. // Start the child process.
if( !CreateProcess( argv[0], //module name NULL, if( !CreateProcess( argv[0], //module name NULL,
(LPSTR) _tcsdup(TEXT((const char *)argv)), // Command line szCmdline, NULL, //(LPSTR)(const char *) argv,
NULL, // Process handle not inheritable NULL, // Process handle not inheritable
NULL, // Thread handle not inheritable NULL, // Thread handle not inheritable
TRUE, // Set handle inheritance to TRUE; TRUE, // Set handle inheritance to TRUE;
@ -324,7 +312,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
} }
/* Search for the substring of known shell-names */ /* Search for the substring of known shell-names */
for (i = 0; i < (int)(sizeof (orte_pls_process_shell_name)/ /* for (i = 0; i < (int)(sizeof (orte_pls_process_shell_name)/
sizeof(orte_pls_process_shell_name[0])); i++) { sizeof(orte_pls_process_shell_name[0])); i++) {
char *sh_name = NULL; char *sh_name = NULL;
@ -334,7 +322,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
/* We cannot use "echo -n $SHELL" because -n is not portable. Therefore /* We cannot use "echo -n $SHELL" because -n is not portable. Therefore
* we have to remove the "\n" */ * we have to remove the "\n" */
if ( sh_name[strlen(sh_name)-1] == '\n' ) { /* if ( sh_name[strlen(sh_name)-1] == '\n' ) {
sh_name[strlen(sh_name)-1] = '\0'; sh_name[strlen(sh_name)-1] = '\0';
} }
if ( 0 == strcmp(sh_name, orte_pls_process_shell_name[i]) ) { if ( 0 == strcmp(sh_name, orte_pls_process_shell_name[i]) ) {
@ -343,6 +331,7 @@ static int orte_pls_process_probe(orte_mapped_node_t * node, orte_pls_process_sh
} }
} }
} }
*/
if (mca_pls_process_component.debug) { if (mca_pls_process_component.debug) {
opal_output(0, "pls:process: node:%s has SHELL: %s\n", opal_output(0, "pls:process: node:%s has SHELL: %s\n",
node->nodename, orte_pls_process_shell_name[*shell]); node->nodename, orte_pls_process_shell_name[*shell]);
@ -523,7 +512,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
orte_mapped_node_t *rmaps_node; orte_mapped_node_t *rmaps_node;
orte_std_cntr_t num_nodes; orte_std_cntr_t num_nodes;
orte_vpid_t vpid; orte_vpid_t vpid;
int node_name_index1;
int node_name_index2; int node_name_index2;
int proc_name_index; int proc_name_index;
int local_exec_index, local_exec_index_end; int local_exec_index, local_exec_index_end;
@ -531,7 +519,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
char *uri, *param; char *uri, *param;
char **argv = NULL; char **argv = NULL;
char *prefix_dir; char *prefix_dir;
int argc; int argc = 0;
int rc; int rc;
char *lib_base = NULL, *bin_base = NULL; char *lib_base = NULL, *bin_base = NULL;
orte_pls_daemon_info_t *dmn; orte_pls_daemon_info_t *dmn;
@ -640,9 +628,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
/* /*
* Build argv array * Build argv array
*/ */
argv = opal_argv_copy(mca_pls_process_component.agent_argv);
argc = mca_pls_process_component.agent_argc;
node_name_index1 = argc;
opal_argv_append(&argc, &argv, "<template>"); opal_argv_append(&argc, &argv, "<template>");
/* add the daemon command (as specified by user) */ /* add the daemon command (as specified by user) */
@ -775,15 +760,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
opal_list_append(&active_daemons, &dmn->super); opal_list_append(&active_daemons, &dmn->super);
/* setup node name */ /* setup node name */
free(argv[node_name_index1]);
if (NULL != rmaps_node->username &&
0 != strlen (rmaps_node->username)) {
asprintf (&argv[node_name_index1], "%s@%s",
rmaps_node->username, rmaps_node->nodename);
} else {
argv[node_name_index1] = strdup(rmaps_node->nodename);
}
free(argv[node_name_index2]); free(argv[node_name_index2]);
argv[node_name_index2] = strdup(rmaps_node->nodename); argv[node_name_index2] = strdup(rmaps_node->nodename);
@ -812,18 +788,18 @@ int orte_pls_process_launch(orte_jobid_t jobid)
goto cleanup; goto cleanup;
} }
pid = fork(); /* pid = fork();
if (pid < 0) { if (pid < 0) {
rc = ORTE_ERR_OUT_OF_RESOURCE; rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup; goto cleanup;
} }
*/
/* child */ /* child */
if (pid == 0) { /*if (pid == 0)*/ {
char* name_string; char* name_string;
char** env; char** env;
char* var; char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX); int fdmax = sysconf(_SC_OPEN_MAX);
if (mca_pls_process_component.debug) { if (mca_pls_process_component.debug) {
opal_output(0, "pls:process: launching on node %s\n", opal_output(0, "pls:process: launching on node %s\n",
@ -902,7 +878,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
opal_output(0, "pls:process: reset PATH: %s", newenv); opal_output(0, "pls:process: reset PATH: %s", newenv);
} }
free(newenv); free(newenv);
#if 0
/* Reset LD_LIBRARY_PATH */ /* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL ); newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH"); oldenv = getenv("LD_LIBRARY_PATH");
@ -918,6 +894,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
newenv); newenv);
} }
free(newenv); free(newenv);
#endif
} }
/* Since this is a local execution, we need to /* Since this is a local execution, we need to
@ -958,7 +935,8 @@ int orte_pls_process_launch(orte_jobid_t jobid)
rmaps_node->nodename); rmaps_node->nodename);
} }
exec_argv = argv; exec_argv = argv;
exec_path = strdup(mca_pls_process_component.agent_path); //exec_path = strdup(mca_pls_process_component.agent_path);
} }
/* setup process name */ /* setup process name */
@ -970,17 +948,6 @@ int orte_pls_process_launch(orte_jobid_t jobid)
free(argv[proc_name_index]); free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string); argv[proc_name_index] = strdup(name_string);
if (!mca_pls_process_component.debug) {
/* setup stdin */
int fd = open("/dev/null", O_RDWR);
dup2(fd, 0);
close(fd);
}
/* close all file descriptors w/ exception of stdin/stdout/stderr */
for(fd=3; fd<fdmax; fd++)
close(fd);
/* Set signal handlers back to the default. Do this close /* Set signal handlers back to the default. Do this close
to the execve() because the event library may (and likely to the execve() because the event library may (and likely
will) reset them. If we don't do this, the event will) reset them. If we don't do this, the event
@ -1017,11 +984,12 @@ int orte_pls_process_launch(orte_jobid_t jobid)
free(param); free(param);
} }
} }
execve(exec_path, exec_argv, env); //execve(exec_path, exec_argv, env);
opal_output(0, "pls:process: execv failed with errno=%d\n", errno); pid = _spawnve( _P_DETACH, exec_path, exec_argv, env); //daniel
exit(-1);
opal_output(0, "pls:process: execv hopefully started (pid %llx)\n", pid);
} else { /* father */ #if 0
} /*else*/ { /* father */
OPAL_THREAD_LOCK(&mca_pls_process_component.lock); OPAL_THREAD_LOCK(&mca_pls_process_component.lock);
/* JJH Bug: /* JJH Bug:
* If we are in '--debug-daemons' we keep the ssh connection * If we are in '--debug-daemons' we keep the ssh connection
@ -1039,7 +1007,7 @@ int orte_pls_process_launch(orte_jobid_t jobid)
opal_condition_wait(&mca_pls_process_component.cond, &mca_pls_process_component.lock); opal_condition_wait(&mca_pls_process_component.cond, &mca_pls_process_component.lock);
} }
OPAL_THREAD_UNLOCK(&mca_pls_process_component.lock); OPAL_THREAD_UNLOCK(&mca_pls_process_component.lock);
#endif
/* setup callback on sigchild - wait until setup above is complete /* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb * as the callback can occur in the call to orte_wait_cb
*/ */