This brings the new session directory system online, with the exception of the session_dir_finalize routine to
remove the tree. This will come online in the next couple of days. I'll send out a separate note highlighting the changes. This commit was SVN r1498.
Этот коммит содержится в:
родитель
58bf5bf271
Коммит
5d300e11c0
@ -13,6 +13,8 @@ noinst_LTLIBRARIES = libruntime.la
|
||||
|
||||
headers = \
|
||||
runtime.h \
|
||||
universe_init.h \
|
||||
universe_connect.h \
|
||||
ompi_progress.h
|
||||
|
||||
libruntime_la_SOURCES = \
|
||||
@ -23,7 +25,9 @@ libruntime_la_SOURCES = \
|
||||
ompi_mpi_init.c \
|
||||
ompi_mpi_finalize.c \
|
||||
ompi_progress.c \
|
||||
universe_init.c \
|
||||
ompi_rte_finalize.c \
|
||||
universe_connect.c \
|
||||
ompi_rte_init.c
|
||||
|
||||
# Conditionally install the header files
|
||||
|
@ -25,14 +25,18 @@ openmpi_SOURCES = \
|
||||
openmpi.h \
|
||||
openmpi.c
|
||||
|
||||
old_sources = ompi_init.h ompi_init.c
|
||||
|
||||
openmpi_LDADD = \
|
||||
$(libs) \
|
||||
$(LIBMPI_EXTRA_LIBS) \
|
||||
$(LIBOMPI_EXTRA_LIBS) \
|
||||
$(top_builddir)/src/util/os_path.lo \
|
||||
$(top_builddir)/src/util/os_create_dirpath.lo \
|
||||
$(top_builddir)/src/util/session_dir.lo \
|
||||
$(top_builddir)/src/util/proc_info.lo \
|
||||
$(top_builddir)/src/util/cmd_line.lo \
|
||||
$(top_builddir)/src/util/common_cmd_line.lo \
|
||||
$(top_builddir)/src/runtime/universe_init.lo \
|
||||
$(top_builddir)/src/runtime/universe_connect.lo \
|
||||
$(top_builddir)/src/util/sys_info.lo
|
||||
|
||||
openmpi_DFLAGS = $(LIBMPI_EXTRA_LDFLAGS) $(LIBOMPI_EXTRA_LDFLAGS)
|
||||
|
@ -11,33 +11,24 @@ openmpi.c - main program for spawning persistent universe.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pwd.h>
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "util/sys_info.h"
|
||||
#include "util/cmd_line.h"
|
||||
#include "util/common_cmd_line.h"
|
||||
|
||||
#include "rte/universe/os_session_dir.h"
|
||||
#include "rte/universe/ompi_init.h"
|
||||
#include "rte/universe/openmpi.h"
|
||||
#include "util/proc_info.h"
|
||||
#include "util/session_dir.h"
|
||||
#include "runtime/universe_init.h"
|
||||
#include "runtime/universe_connect.h"
|
||||
#include "tools/openmpi/openmpi.h"
|
||||
|
||||
/**
|
||||
* Parse command line options and check for validity. Track which
|
||||
* ones have been provided to assess completeness of information.
|
||||
*
|
||||
* @retval OMPI_SUCCESS if all options provided are valid
|
||||
* @retval OMPI_ERROR if any option is not valid. Invalid options
|
||||
* will be reported to user but will not terminate processing.
|
||||
*/
|
||||
|
||||
ompi_universe_t universe = {
|
||||
ompi_universe_t ompi_universe = {
|
||||
/* .name = */ NULL,
|
||||
/* .host = */ NULL,
|
||||
/* .user_name = */ NULL,
|
||||
/* .user_id = */ -1,
|
||||
/* .pid = */ -1,
|
||||
/* .session_file = */ NULL,
|
||||
/* .uid = */ NULL,
|
||||
/* .persistence = */ false,
|
||||
/* .silent_mode = */ false,
|
||||
/* .script_mode = */ false,
|
||||
@ -46,16 +37,22 @@ ompi_universe_t universe = {
|
||||
};
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
ompi_cmd_line_t *cmd_line = NULL;
|
||||
char *tmpdir_option = NULL;
|
||||
char *universe_option = NULL;
|
||||
char *tmpdir = NULL;
|
||||
char *universe = NULL;
|
||||
char *tmp, *universe_name, *remote_host, *remote_uid;
|
||||
struct passwd *pwdent;
|
||||
|
||||
tmp = universe_name = remote_host = remote_uid = NULL;
|
||||
|
||||
/* get info on type of system we are on */
|
||||
ompi_sys_info();
|
||||
|
||||
/* setup to read common command line options that span all Open MPI programs */
|
||||
ompi_common_cmd_line_init(argc, argv);
|
||||
|
||||
/* setup to check non-common command line options - ones specific to this program */
|
||||
cmd_line = ompi_cmd_line_create();
|
||||
if (NULL == cmd_line) {
|
||||
fprintf(stderr,"openmpi: Command line handle could not be created - please report error to bugs@open-mpi.org");
|
||||
@ -63,93 +60,103 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
ompi_cmd_line_make_opt(cmd_line, 'v', "version", 0,
|
||||
"Show version of Open MPI and this program");
|
||||
ompi_cmd_line_make_opt(cmd_line, 'u', "universe", 1,
|
||||
"User specified name for universe");
|
||||
ompi_cmd_line_make_opt(cmd_line, 't', "tmpdir", 1,
|
||||
"Temp directory to be used by universe");
|
||||
"Show version of Open MPI and this program");
|
||||
ompi_cmd_line_make_opt(cmd_line, 'w', "webserver", 1,
|
||||
"Web server available");
|
||||
"Web server available");
|
||||
ompi_cmd_line_make_opt(cmd_line, 's', "silent", 1,
|
||||
"No console prompt - operate silently");
|
||||
"No console prompt - operate silently");
|
||||
ompi_cmd_line_make_opt(cmd_line, 'f', "script", 1,
|
||||
"Read commands from script file");
|
||||
"Read commands from script file");
|
||||
ompi_cmd_line_make_opt(cmd_line, 'h', "help", 0,
|
||||
"Show help for this function");
|
||||
|
||||
if ((OMPI_SUCCESS != ompi_cmd_line_parse(cmd_line, false, argc, argv)) ||
|
||||
ompi_cmd_line_is_taken(cmd_line, "help") ||
|
||||
ompi_cmd_line_is_taken(cmd_line, "h")) {
|
||||
ompi_cmd_line_is_taken(cmd_line, "help")) {
|
||||
fprintf(stderr, "...showing openmpi help message...\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* get universe name and store it, if user specified it */
|
||||
/* otherwise, stick with default name */
|
||||
if (ompi_cmd_line_is_taken(cmd_line, "universe")) {
|
||||
if (NULL == ompi_cmd_line_get_param(cmd_line, "universe", 0, 0)) {
|
||||
if (ompi_cmd_line_is_taken(ompi_common_cmd_line, "universe")) {
|
||||
if (NULL == ompi_cmd_line_get_param(ompi_common_cmd_line, "universe", 0, 0)) {
|
||||
fprintf(stderr, "error retrieving universe name - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
exit(1);
|
||||
}
|
||||
universe_option = strdup(ompi_cmd_line_get_param(cmd_line, "universe", 0, 0));
|
||||
universe = strdup(ompi_cmd_line_get_param(ompi_common_cmd_line, "universe", 0, 0));
|
||||
|
||||
if (NULL != (tmp = strchr(universe_option, ':'))) { /* name contains remote host */
|
||||
if (NULL != (tmp = strchr(universe, ':'))) { /* name contains remote host */
|
||||
/* get the host name, and the universe name separated */
|
||||
/* could be in form remote-uid@remote-host:universe */
|
||||
*tmp = '\0';
|
||||
tmp++;
|
||||
universe_name = strdup(tmp);
|
||||
if (NULL != (tmp = strchr(universe_option, '@'))) { /* remote name includes remote uid */
|
||||
ompi_universe.name = strdup(tmp);
|
||||
if (NULL != (tmp = strchr(universe, '@'))) { /* remote name includes remote uid */
|
||||
*tmp = '\0';
|
||||
tmp++;
|
||||
remote_host = strdup(tmp);
|
||||
remote_uid = strdup(universe_option);
|
||||
ompi_universe.host = strdup(tmp);
|
||||
ompi_universe.uid = strdup(universe);
|
||||
} else { /* no remote id - just remote host */
|
||||
ompi_universe.host = strdup(universe);
|
||||
}
|
||||
} else { /* no remote host - just universe name provided */
|
||||
ompi_universe.name = strdup(universe);
|
||||
}
|
||||
} else {
|
||||
universe_name = strdup("default");
|
||||
ompi_universe.name = strdup("default-universe");
|
||||
}
|
||||
|
||||
/* get the pid and store it for later use */
|
||||
universe.pid = getpid();
|
||||
|
||||
/* get the temporary directory name for the session directory, if provided on command line */
|
||||
if (ompi_cmd_line_is_taken(cmd_line, "tmpdir")) {
|
||||
if (NULL == ompi_cmd_line_get_param(cmd_line, "tmpdir", 0, 0)) {
|
||||
if (ompi_cmd_line_is_taken(ompi_common_cmd_line, "tmpdir")) {
|
||||
if (NULL == ompi_cmd_line_get_param(ompi_common_cmd_line, "tmpdir", 0, 0)) {
|
||||
fprintf(stderr, "error retrieving tmpdir name - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
tmpdir_option = strdup(ompi_cmd_line_get_param(cmd_line, "tmpdir", 0, 0));
|
||||
tmpdir = strdup(ompi_cmd_line_get_param(ompi_common_cmd_line, "tmpdir", 0, 0));
|
||||
} else {
|
||||
tmpdir_option = NULL;
|
||||
tmpdir = NULL;
|
||||
}
|
||||
|
||||
/* Store all the information in the Universe structure for later use */
|
||||
universe.name = strdup(universe_name);
|
||||
if (NULL != remote_host) {
|
||||
universe.host = strdup(remote_host);
|
||||
} else {
|
||||
universe.host = (char *)malloc(OMPI_RIDICULOUS_NAMELEN);
|
||||
if (NULL == universe.host) {
|
||||
fprintf(stderr, "openmpi(error): unable to get memory allocation - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
/* startup the MCA so we can use OOB */
|
||||
/* if (OMPI_ERROR == ompi_mca_init()) {
|
||||
fprintf(stderr, "MCA could not start - please report error to bugs@open-mpi.org\n");
|
||||
exit (1);
|
||||
}
|
||||
*/
|
||||
/* does universe already exist on specified host? Check session directory to see */
|
||||
/* don't know how to handle remote host yet - only cover localhost */
|
||||
|
||||
if (NULL == ompi_universe.host) { /* localhost specified */
|
||||
if (NULL == (tmp = ompi_session_dir(false, tmpdir, ompi_system_info.user, ompi_universe.name,
|
||||
NULL, NULL))) { /* not found */
|
||||
fprintf(stderr, "session dir not found - creating it - calling univ_init\n");
|
||||
/* setup universe and connections */
|
||||
if (NULL == (tmp = ompi_universe_init(tmpdir, ompi_system_info.user,
|
||||
ompi_universe.name))) { /* couldn't create universe - error */
|
||||
fprintf(stderr, "could not create universe session directory tree - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
if (OMPI_ERROR == ompi_universe_connect(tmp)) { /* try to connect */
|
||||
/* failed - we're doomed */
|
||||
fprintf(stderr, "could not connect to universe - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
} else { /* was found! read session info and try to connect */
|
||||
fprintf(stderr, "think i found something\n");
|
||||
if (OMPI_ERROR == ompi_universe_connect(tmp)) { /* try to connect */
|
||||
/* first failure - try to start universe and then try again */
|
||||
if (NULL == (tmp = ompi_universe_init(tmpdir, ompi_system_info.user,
|
||||
ompi_universe.name))) { /* couldn't create universe - error */
|
||||
fprintf(stderr, "could not create universe session directory tree - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
if (OMPI_ERROR == ompi_universe_connect(tmp)) { /* try to connect */
|
||||
/* second failure - we're doomed */
|
||||
fprintf(stderr, "could not connect to universe - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
gethostname(universe.host, OMPI_RIDICULOUS_NAMELEN);
|
||||
}
|
||||
|
||||
if (NULL != remote_uid) {
|
||||
universe.user_name = strdup(remote_uid);
|
||||
} else { /* get the name of the user */
|
||||
if ((pwdent = getpwuid(getuid())) != 0) {
|
||||
universe.user_name = strdup(pwdent->pw_name);
|
||||
} else {
|
||||
universe.user_name = strdup("unknown");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* initialize the Open MPI system */
|
||||
if (OMPI_ERROR == ompi_init(universe_name, tmpdir_option)) {
|
||||
fprintf(stderr, "Unable to initialize system - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
@ -11,10 +11,7 @@
|
||||
struct ompi_universe_t {
|
||||
char *name;
|
||||
char *host;
|
||||
char *user_name;
|
||||
uid_t user_id;
|
||||
pid_t pid;
|
||||
char *session_file;
|
||||
char *uid;
|
||||
bool persistence;
|
||||
bool silent_mode;
|
||||
bool script_mode;
|
||||
@ -23,6 +20,8 @@ struct ompi_universe_t {
|
||||
};
|
||||
typedef struct ompi_universe_t ompi_universe_t;
|
||||
|
||||
extern ompi_universe_t ompi_universe;
|
||||
|
||||
#ifndef MAXHOSTNAMELEN
|
||||
#define MAXHOSTNAMELEN 256
|
||||
#endif
|
||||
|
@ -4,20 +4,10 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <libgen.h>
|
||||
#include <stdlib.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "util/sys_info.h"
|
||||
#include "util/os_path.h"
|
||||
#include "util/os_create_dirpath.h"
|
||||
#include "util/os_session_dir.h"
|
||||
#include "util/cmd_line.h"
|
||||
#include "util/common_cmd_line.h"
|
||||
|
||||
#include "util/proc_info.h"
|
||||
|
||||
@ -35,10 +25,6 @@ ompi_proc_info_t ompi_process_info = {
|
||||
|
||||
int ompi_proc_info(void)
|
||||
{
|
||||
char *universe = NULL;
|
||||
char *tmpdir = NULL;
|
||||
char *procname, *procpath, *jobname, *jobpath;
|
||||
mode_t mode = S_IRWXU;
|
||||
|
||||
if (ompi_process_info.init) { /* already done this - don't do it again */
|
||||
return(OMPI_SUCCESS);
|
||||
@ -57,49 +43,6 @@ int ompi_proc_info(void)
|
||||
ompi_process_info.name->jobid,
|
||||
ompi_process_info.name->procid);
|
||||
|
||||
/* see if user specified universe name */
|
||||
if (ompi_cmd_line_is_taken(ompi_common_cmd_line, "universe")) {
|
||||
if (NULL == ompi_cmd_line_get_param(ompi_common_cmd_line, "universe", 0, 0)) {
|
||||
return(OMPI_ERROR);
|
||||
}
|
||||
universe = strdup(ompi_cmd_line_get_param(ompi_common_cmd_line, "universe", 0, 0));
|
||||
}
|
||||
|
||||
/* see if user specified session directory prefix */
|
||||
if (ompi_cmd_line_is_taken(ompi_common_cmd_line, "tmpdir")) {
|
||||
if (NULL == ompi_cmd_line_get_param(ompi_common_cmd_line, "tmpdir", 0, 0)) {
|
||||
return(OMPI_ERROR);
|
||||
}
|
||||
tmpdir = strdup(ompi_cmd_line_get_param(ompi_common_cmd_line, "tmpdir", 0, 0));
|
||||
}
|
||||
|
||||
/* get the universe session directory setup */
|
||||
if (OMPI_ERROR == ompi_session_tree_init(tmpdir, universe)) {
|
||||
/* this is a serious error, so return the error condition */
|
||||
return(OMPI_ERROR);
|
||||
}
|
||||
|
||||
/* get the job session directory setup */
|
||||
sprintf(jobname, "%x", ompi_process_info.name->jobid);
|
||||
jobpath = ompi_os_path(false, ompi_process_info.universe_session_dir, jobname, NULL);
|
||||
if (OMPI_ERROR == ompi_os_create_dirpath(jobpath, mode)) {
|
||||
/* this is a serious error, so return the error condition */
|
||||
return(OMPI_ERROR);
|
||||
}
|
||||
ompi_process_info.job_session_dir = (char *)malloc((strlen(jobpath)+strlen(ompi_system_info.path_sep)+1)*sizeof(char));
|
||||
strcpy(ompi_process_info.job_session_dir, jobpath);
|
||||
ompi_process_info.job_session_dir = strcat(ompi_process_info.job_session_dir, ompi_system_info.path_sep);
|
||||
|
||||
|
||||
/* setup process session directory */
|
||||
sprintf(procname, "%x", ompi_process_info.name->procid);
|
||||
procpath = ompi_os_path(false, ompi_process_info.job_session_dir, procname, NULL);
|
||||
if (OMPI_ERROR == ompi_os_create_dirpath(procpath, mode)) { /* error in setting up the directory - cannot proceed */
|
||||
return(OMPI_ERROR);
|
||||
}
|
||||
ompi_process_info.proc_session_dir = (char *)malloc((strlen(procpath)+strlen(ompi_system_info.path_sep)+1)*sizeof(char));
|
||||
strcpy(ompi_process_info.proc_session_dir, procpath);
|
||||
ompi_process_info.proc_session_dir = strcat(ompi_process_info.proc_session_dir, ompi_system_info.path_sep);
|
||||
|
||||
ompi_process_info.init = true;
|
||||
return(OMPI_SUCCESS);
|
||||
|
@ -17,10 +17,11 @@
|
||||
/**
|
||||
* Process information structure
|
||||
*
|
||||
* The ompi_proc_info() function fills the pid field, obtains the process name, and
|
||||
* creates the process session directory, storing that information in the global
|
||||
* structure. The structure also holds path names to the stdin, stdout, and
|
||||
* stderr temp files - however, these are initialized elsewhere.
|
||||
* The ompi_proc_info() function fills the pid field and obtains the process name,
|
||||
* storing that information in the global
|
||||
* structure. The structure also holds path names to the universe, job, and process
|
||||
* session directories, and to the stdin, stdout, and
|
||||
* stderr temp files - however, these are all initialized elsewhere.
|
||||
*
|
||||
*/
|
||||
struct ompi_proc_info_t {
|
||||
@ -30,15 +31,12 @@ struct ompi_proc_info_t {
|
||||
*/
|
||||
pid_t pid; /**< Local process ID for this process */
|
||||
ompi_process_name_t *name; /**< Process name structure */
|
||||
char *universe_session_dir; /**< Location of user writable temp dir.
|
||||
char *universe_session_dir; /**< Location of universe temp dir.
|
||||
* The session directory has the form
|
||||
* <prefix><openmpi-sessions-user><universe><job><process>, where the prefix
|
||||
* <prefix><openmpi-sessions-user><universe>, where the prefix
|
||||
* can either be provided by the user via the
|
||||
* --tmpdir command-line flag, the use of one of several
|
||||
* environmental variables, or else a default location. The
|
||||
* function ompi_session_dir_init() develops
|
||||
* the name of this directory, creates it, and stores the name
|
||||
* in this location.
|
||||
* environmental variables, or else a default location.
|
||||
*/
|
||||
|
||||
char *job_session_dir; /**< Session directory for job */
|
||||
|
@ -61,10 +61,6 @@
|
||||
* \par \em <process>
|
||||
* A directory for the specific process, will house all information for that process.
|
||||
*
|
||||
* The ompi_session_dir() function searches either a user-specified location, or a
|
||||
* set of standard locations that might contain the specified directory. Once
|
||||
* found, the function returns the pathname of that directory. The function calls the
|
||||
* ompi_check_dir() function.
|
||||
*/
|
||||
|
||||
/** @param create A boolean variable that indicates whether or not to create the specified
|
||||
|
@ -19,9 +19,7 @@
|
||||
* System information structure
|
||||
*
|
||||
* The ompi_sys_info() function fills the sysname, nodename, release, version, machine,
|
||||
* path_sep, and user fields, but does not populate
|
||||
* the session_dir, enviro, suffix, or sock_* fields. These latter fields are populated by other
|
||||
* functions as required.
|
||||
* path_sep, and user fields
|
||||
*
|
||||
*/
|
||||
struct ompi_sys_info_t {
|
||||
@ -60,7 +58,8 @@ extern ompi_sys_info_t ompi_system_info;
|
||||
* this code is executing. ompi_sys_info populates a global variable with information about the system
|
||||
* upon which the process is executing.
|
||||
*
|
||||
* @retval None
|
||||
* @retval OMPI_SUCCESS If values are successfully determined.
|
||||
* @retval OMPI_ERROR If the system does not provide the requested information.
|
||||
*/
|
||||
|
||||
int ompi_sys_info(void);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user