From ae2af61ee307e01523638e06a91f67b6f5c312f9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 15 Aug 2016 22:46:46 -0500 Subject: [PATCH] Update the session dir structure. Restore the creation of a top-level dir based on userid so that everything is contained under the user's top-level dir. Make the next level down (the "job family" level) be either the pid (indicated by a name of "pid.N") or the job family if not launched by mpirun. This allows for proper rendezvous by direct-launched procs. --- orte/mca/oob/usock/oob_usock_component.c | 16 +++-- orte/mca/schizo/ompi/schizo_ompi.c | 1 + orte/orted/pmix/pmix_server.c | 8 ++- orte/runtime/orte_mca_params.c | 17 ++++- orte/util/proc_info.c | 8 ++- orte/util/proc_info.h | 3 +- orte/util/session_dir.c | 92 +++++++++++++----------- 7 files changed, 96 insertions(+), 49 deletions(-) diff --git a/orte/mca/oob/usock/oob_usock_component.c b/orte/mca/oob/usock/oob_usock_component.c index 290810e12f..eb4737abae 100644 --- a/orte/mca/oob/usock/oob_usock_component.c +++ b/orte/mca/oob/usock/oob_usock_component.c @@ -205,6 +205,7 @@ static void connection_event_handler(int incoming_sd, short flags, void* cbdata) static int component_startup(void) { int rc=ORTE_SUCCESS; + char *session; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s USOCK STARTUP", @@ -213,11 +214,18 @@ static int component_startup(void) /* setup the path to the daemon rendezvous point */ memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un)); mca_oob_usock_component.address.sun_family = AF_UNIX; + session = opal_os_path(false, orte_process_info.tmpdir_base, + orte_process_info.top_session_dir, + orte_process_info.jobfam_session_dir, + "usock", NULL); + if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) { + opal_output(0, "SESSION DIR TOO LONG"); + return ORTE_ERR_NOT_SUPPORTED; + } snprintf(mca_oob_usock_component.address.sun_path, sizeof(mca_oob_usock_component.address.sun_path)-1, - "%s/%s/%s/0/%s", orte_process_info.tmpdir_base, - orte_process_info.top_session_dir, - ORTE_JOB_FAMILY_PRINT(ORTE_PROC_MY_NAME->jobid), "usock"); + "%s", session); + free(session); opal_output_verbose(2, orte_oob_base_framework.framework_output, "SUNPATH: %s", mca_oob_usock_component.address.sun_path); @@ -231,7 +239,7 @@ static int component_startup(void) /* if the rendezvous point isn't there, then that's an error */ /* if the rendezvous file doesn't exist, that's an error */ if (0 != access(mca_oob_usock_component.address.sun_path, R_OK)) { - opal_output_verbose(2, orte_oob_base_framework.framework_output, + opal_output_verbose(2, orte_oob_base_framework.framework_output, "SUNPATH: %s NOT READABLE", mca_oob_usock_component.address.sun_path); return OPAL_ERR_NOT_FOUND; } diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index bc96b741f3..cb0773d545 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -944,6 +944,7 @@ static int setup_fork(orte_job_t *jdata, /* forcibly set the local tmpdir base and top session dir to match ours */ opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env); opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env); + opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env); /* MPI-3 requires we provide some further info to the procs, * so we pass them as envars to avoid introducing further diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 67e53af9bf..7073900be7 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -57,6 +57,7 @@ #include "opal/util/show_help.h" #include "opal/util/error.h" #include "opal/util/output.h" +#include "opal/util/os_path.h" #include "opal/util/argv.h" #include "orte/mca/errmgr/errmgr.h" @@ -261,9 +262,12 @@ int pmix_server_init(void) kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR); kv->type = OPAL_STRING; - kv->data.string = strdup(orte_process_info.tmpdir_base); + kv->data.string = opal_os_path(false, orte_process_info.tmpdir_base, + orte_process_info.top_session_dir, + orte_process_info.jobfam_session_dir, NULL); opal_list_append(&info, &kv->super); - /* use the same for the system temp directory */ + /* use the same for the system temp directory - this is + * where the system-level tool connections will go */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SYSTEM_TMPDIR); kv->type = OPAL_STRING; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 9e9e800f10..2b6f015df6 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -13,7 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -51,6 +51,7 @@ static char *orte_tmpdir_base = NULL; static char *orte_local_tmpdir_base = NULL; static char *orte_remote_tmpdir_base = NULL; static char *orte_top_session_dir = NULL; +static char *orte_jobfam_session_dir = NULL; int orte_register_params(void) { @@ -165,6 +166,20 @@ int orte_register_params(void) orte_process_info.top_session_dir = strdup(orte_top_session_dir); } + orte_jobfam_session_dir = NULL; + (void) mca_base_var_register ("orte", "orte", NULL, "jobfam_session_dir", + "The jobfamily session directory for applications", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, + &orte_jobfam_session_dir); + + if (NULL != orte_jobfam_session_dir) { + if (NULL != orte_process_info.jobfam_session_dir) { + free(orte_process_info.jobfam_session_dir); + } + orte_process_info.jobfam_session_dir = strdup(orte_jobfam_session_dir); + } + orte_prohibited_session_dirs = NULL; (void) mca_base_var_register ("orte", "orte", NULL, "no_session_dirs", "Prohibited locations for session directories (multiple locations separated by ',', default=NULL)", diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 5a73493515..48961ff48a 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -12,7 +12,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,6 +80,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = { .num_local_peers = 0, .tmpdir_base = NULL, .top_session_dir = NULL, + .jobfam_session_dir = NULL, .job_session_dir = NULL, .proc_session_dir = NULL, .sock_stdin = NULL, @@ -294,6 +295,11 @@ int orte_proc_info_finalize(void) orte_process_info.top_session_dir = NULL; } + if (NULL != orte_process_info.jobfam_session_dir) { + free(orte_process_info.jobfam_session_dir); + orte_process_info.jobfam_session_dir = NULL; + } + if (NULL != orte_process_info.job_session_dir) { free(orte_process_info.job_session_dir); orte_process_info.job_session_dir = NULL; diff --git a/orte/util/proc_info.h b/orte/util/proc_info.h index f5d451d50a..116bab3440 100644 --- a/orte/util/proc_info.h +++ b/orte/util/proc_info.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -119,6 +119,7 @@ struct orte_proc_info_t { */ char *tmpdir_base; /**< Base directory of the session dir tree */ char *top_session_dir; /**< Top-most directory of the session tree */ + char *jobfam_session_dir; /**< Session directory for this family of jobs (i.e., share same mpirun) */ char *job_session_dir; /**< Session directory for job */ char *proc_session_dir; /**< Session directory for the process */ diff --git a/orte/util/session_dir.c b/orte/util/session_dir.c index 77bd37eaad..cebbc700ed 100644 --- a/orte/util/session_dir.c +++ b/orte/util/session_dir.c @@ -114,7 +114,16 @@ static int orte_create_dir(char *directory) /* * Construct the fullpath to the session directory - it - * will consist of "ompi.." + * will consist of "ompi..", and + * have subdirs: + * + * pid - the pid of the mpirun that oversees this job. Note + * that direct-launched processes will have manufactured + * this value + * + * jobid - jobid of the application being executed + * + * vpid - vpid of the process */ int orte_session_dir_get_name(char **fulldirpath, @@ -132,10 +141,14 @@ orte_session_dir_get_name(char **fulldirpath, bool prefix_provided = false; int exit_status = ORTE_SUCCESS; size_t len; + uid_t uid; /* Ensure that system info is set */ orte_proc_info(); + /* get the effective uid */ + uid = geteuid(); + /* * set the 'hostname' */ @@ -156,30 +169,48 @@ orte_session_dir_get_name(char **fulldirpath, /* construct the frontend of the session directory*/ if (NULL != orte_process_info.top_session_dir) { frontend = strdup(orte_process_info.top_session_dir); - } - else { /* If not set then construct it */ - if (0 > asprintf(&frontend, "ompi.%s.%lu", hostname, (unsigned long)orte_process_info.pid)) { + } else { /* If not set then construct it */ + if (0 > asprintf(&frontend, "ompi.%s.%lu", hostname, (unsigned long)uid)) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } } - /* - * Construct the session directory - */ - /* If we were given a valid vpid then we can construct it fully into: - * openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID/VPID - */ - if( NULL != proc) { - if (ORTE_VPID_INVALID != proc->vpid) { - - if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) { + /* construct the next level down, which belongs to the + * job family. This is related to the mpirun that launched + * the job, or is an arbitrary (agreed upon) value if + * direct launched */ + if (ORTE_PROC_IS_HNP) { + if (0 > asprintf(&jobfam, "pid.%lu", (unsigned long)orte_process_info.pid)) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + exit_status = ORTE_ERR_OUT_OF_RESOURCE; + goto cleanup; + } + orte_process_info.jobfam_session_dir = strdup(jobfam); + } else if (NULL != orte_process_info.jobfam_session_dir) { + /* we had a job family session dir passed down to us by mpirun */ + jobfam = strdup(orte_process_info.jobfam_session_dir); + } else { + /* we were not given one, so define it */ + if (NULL == proc) { + jobfam = strdup("jobfam"); + } else { + if (0 > asprintf(&jobfam, "jf.%d", ORTE_JOB_FAMILY(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } + } + orte_process_info.jobfam_session_dir = strdup(jobfam); + } + /* + * Construct the session directory + */ + /* If we were given a valid vpid then we can construct it fully */ + if( NULL != proc) { + if (ORTE_VPID_INVALID != proc->vpid) { if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; @@ -192,23 +223,13 @@ orte_session_dir_get_name(char **fulldirpath, goto cleanup; } - sessions = opal_os_path( false, frontend, jobfam, job, vpidstr, NULL ); + sessions = opal_os_path(false, frontend, jobfam, job, vpidstr, NULL); if( NULL == sessions ) { ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } - } - /* If we were given a valid jobid then we can construct it partially into: - * openmpi-sessions-USERNAME@HOSTNAME_BATCHID/JOB-FAMILY/JOBID - */ - else if (ORTE_JOBID_INVALID != proc->jobid) { - if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - + } else if (ORTE_JOBID_INVALID != proc->jobid) { if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); exit_status = ORTE_ERR_OUT_OF_RESOURCE; @@ -221,14 +242,12 @@ orte_session_dir_get_name(char **fulldirpath, exit_status = ORTE_ERROR; goto cleanup; } - } /* if both are invalid */ - else { + } else { sessions = strdup(frontend); /* must dup this to avoid double-free later */ } - } /* If we were not given a proc at all, then we just set it to frontend - */ - else { + } else { + /* If we were not given a proc at all, then we just set it to frontend */ sessions = strdup(frontend); /* must dup this to avoid double-free later */ } @@ -666,14 +685,8 @@ static char *orte_build_job_session_dir(char *top_dir, orte_process_name_t *proc, orte_jobid_t jobid) { - char *jobfam = NULL; char *job_session_dir; - if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return NULL; - } - if (ORTE_JOBID_WILDCARD != jobid) { char *job = NULL; @@ -682,19 +695,18 @@ static char *orte_build_job_session_dir(char *top_dir, job_session_dir = NULL; goto out; } - job_session_dir = opal_os_path(false, top_dir, jobfam, job, NULL); + job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, job, NULL); free(job); if (NULL == job_session_dir) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); } } else { - job_session_dir = opal_os_path(false, top_dir, jobfam, NULL); + job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, NULL); if( NULL == job_session_dir) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); } } out: - free(jobfam); return job_session_dir; }