diff --git a/acinclude.m4 b/acinclude.m4 index 054e3e5fbd..3240802c22 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -13,6 +13,7 @@ dnl All rights reserved. dnl Copyright (c) 2006 Cisco Systems, Inc. dnl Copyright (c) 2006 Los Alamos National Security, LLC. All rights dnl reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -114,3 +115,8 @@ m4_include(config/mca_no_configure_components.m4) # the list of all component configure.m4 macros. # m4_include(config/mca_m4_config_include.m4) + +# +# Check for ps commands and arguments +# +m4_include(config/ompi_check_ps.m4) diff --git a/config/ompi_check_ps.m4 b/config/ompi_check_ps.m4 new file mode 100644 index 0000000000..f718ad05ea --- /dev/null +++ b/config/ompi_check_ps.m4 @@ -0,0 +1,41 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +# See if there is a ps command that will produce the output we are +# interested in. If so, then save it away. Otherwise, the string is +# set to unknown. + +define([OMPI_PS_FLAVOR_CHECK],[ +AC_MSG_CHECKING([for flavor of ps to use]) +PS_FLAVOR="unknown" +ps -A -o fname > /dev/null 2>&1 + +if test "$?" = "0"; then + PS_FLAVOR="ps -A -o fname,pid,user" +else + ps -A -o command > /dev/null 2>&1 + if test "$?" = "0"; then + PS_FLAVOR="ps -A -o command,pid,user" + fi +fi +AC_MSG_RESULT([$PS_FLAVOR]) +AC_DEFINE_UNQUOTED([ORTE_CLEAN_PS_CMD], ["$PS_FLAVOR"], [Specific ps command to use in orte-clean]) +]) + diff --git a/configure.ac b/configure.ac index c2b73394f1..c5065a008a 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2006 Los Alamos National Security, LLC. All rights # reserved. # $COPYRIGHT$ @@ -881,6 +881,11 @@ if test -z "$LEX" -o -n "`echo $LEX | grep missing`" -o \ fi fi +# +# Look for ps command and arguments for orte-clean +# +OMPI_PS_FLAVOR_CHECK + # # File system case sensitivity # diff --git a/orte/runtime/orte_universe_exists.c b/orte/runtime/orte_universe_exists.c index e3a38906ab..6dae3ca30a 100644 --- a/orte/runtime/orte_universe_exists.c +++ b/orte/runtime/orte_universe_exists.c @@ -364,3 +364,203 @@ int orte_universe_exists(orte_universe_t *univ) return orte_universe_check_connect(univ); } + +void +orte_universe_clean_directories(char *my_universe, int verbose) { + char *session_dir = NULL; +#if !defined(__WINDOWS__) + DIR *cur_dirp = NULL; + struct dirent * dir_entry; +#else + HANDLE hFind = INVALID_HANDLE_VALUE; + WIN32_FIND_DATA file_data; +#endif /* __WINDOWS__ */ + char *fulldirpath = NULL; + char *prefix = NULL; + char *frontend = NULL; + + /* + * Compute the full pathname to the session directory. + */ + if (ORTE_SUCCESS != orte_session_dir_get_name(&fulldirpath, + &prefix, + &frontend, + orte_system_info.user, + orte_system_info.nodename, + NULL, /* batch ID -- Not used */ + NULL, /* Universe Name -- NONE */ + NULL, /* jobid */ + NULL /* vpid */ + )) { + goto cleanup; + } + +#if !defined(__WINDOWS__) + session_dir = opal_os_path(false, prefix, frontend, NULL); + + /* + * Free up the various strings as these are allocated within + * the previous function. + */ + if (NULL != fulldirpath) { + free(fulldirpath); + fulldirpath = NULL; + } + if (NULL != prefix) { + free(prefix); + prefix = NULL; + } + if (NULL != frontend) { + free(frontend); + frontend = NULL; + } + + /* + * Check to make sure we have access to this directory + */ + if (ORTE_SUCCESS != opal_os_dirpath_access(session_dir, 0)) { + goto cleanup; + } + + /* + * Open up the base directory so we can get a listing + */ + if (NULL == (cur_dirp = opendir(session_dir))) { + goto cleanup; + } + /* + * For each directory/universe + */ + while (NULL != (dir_entry = readdir(cur_dirp))) { + + /* + * Skip non-universe directories + */ + if (0 == strncmp(dir_entry->d_name, ".", strlen(".")) || + 0 == strncmp(dir_entry->d_name, ".", strlen(".."))) { + continue; + } + + /* + * Skip my own universe. Let normal cleanup take care of that. + */ + if ((0 == strcmp(dir_entry->d_name, my_universe)) && + (strlen(dir_entry->d_name) == strlen(my_universe))) { + if (verbose) { + opal_output(0, "orte-clean: skipping ourselves, name=%s\n", + orte_universe_info.name); + } + continue; + } + + if (ORTE_SUCCESS != orte_session_dir_get_name(&fulldirpath, + &prefix, + &frontend, + orte_system_info.user, + orte_system_info.nodename, + NULL, /* batch ID -- Not used */ + dir_entry->d_name, + NULL, /* jobid */ + NULL /* vpid */ + )) { + continue; + } + + if (verbose) { + opal_output(0, "orte-clean: removing directory %s\n", fulldirpath); + } + opal_os_dirpath_destroy(fulldirpath, true, NULL); + + /* + * The orte_session_dir_get_name handles the freeing of the + * fulldirpath each time it is called. The prefix gets reused. + * So, there is no need to free them on each call. + */ + if (NULL != frontend) { + free(frontend); + } + } +#else + /* + * Open up the base directory so we can get a listing. + * + * On Windows if we want to parse the content of a directory the filename + * should end with the "*". Otherwise we will only open the directory + * structure (and not the content). + */ + frontend_abs = opal_os_path(false, prefix, frontend, "*", NULL); + hFind = FindFirstFile (frontend_abs, &file_data); + if (INVALID_HANDLE_VALUE == hFind) { + goto cleanup; + } + + do { + /* + * Skip non-universe directories + */ + if (0 == strncmp(dir_entry->d_name, ".", strlen(".")) || + 0 == strncmp(dir_entry->d_name, ".", strlen(".."))) { + continue; + } + + if ((0 == strcmp(dir_entry->d_name, my_universe)) && + (strlen(dir_entry->d_name) == strlen(my_universe))) { + if (verbose) { + opal_output(0, "orte-clean: skipping ourseleves, name=%s\n", + orte_universe_info.name); + } + continue; + } + + if (ORTE_SUCCESS != orte_session_dir_get_name(&fulldirpath, + &prefix, + &frontend, + orte_system_info.user, + orte_system_info.nodename, + NULL, /* batch ID -- Not used */ + dir_entry->d_name, + NULL, /* jobid */ + NULL /* vpid */ + )) { + continue; + } + + if (verbose) { + opal_output(0, "orte-clean: removing directory %s\n", fulldirpath); + } + opal_os_dirpath_destroy(fulldirpath, true, NULL); + + /* + * The orte_session_dir_get_name handles the freeing of the + * fulldirpath each time it is called. The prefix gets reused. + * So, there is no need to free them on each call. + */ + if (NULL != frontend) + free(frontend); + + } while (0 != FindNextFile(hFind, &file_data)); +#endif /* !defined(__WINDOWS__) */ + +#if !defined(__WINDOWS__) + if (NULL != cur_dirp) { + closedir(cur_dirp); + } +#else + FindClose(hFind); +#endif /* __WINDOWS__ */ + + if(NULL != fulldirpath) { + free(fulldirpath); + } + if(NULL != prefix) { + free(prefix); + } + + /* + * If the session directory is empty, then remove that too + */ + opal_os_dirpath_destroy(session_dir, false, NULL); + free(session_dir); + cleanup: + return; +} diff --git a/orte/runtime/runtime.h b/orte/runtime/runtime.h index 79630fbe58..86577bfaeb 100644 --- a/orte/runtime/runtime.h +++ b/orte/runtime/runtime.h @@ -184,6 +184,14 @@ ORTE_DECLSPEC int ompi_rte_init_io(void); */ ORTE_DECLSPEC int orte_setup_hnp(char *target_cluster, char *headnode, char *username); + /** + * Clean out all directories in a session directory except for the one + * handed in. + * @param my_universe Name of universe to not remove + * @param verbose Print out information as directories are removed + */ +ORTE_DECLSPEC void orte_universe_clean_directories(char *my_universe, int verbose); + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/orte/tools/orte-clean/orte-clean.c b/orte/tools/orte-clean/orte-clean.c index d443a60807..f4e2dc35ca 100644 --- a/orte/tools/orte-clean/orte-clean.c +++ b/orte/tools/orte-clean/orte-clean.c @@ -43,6 +43,10 @@ #include #endif /* HAVE_STRING_H */ #include +#ifdef HAVE_DIRENT_H +#include +#endif /* HAVE_DIRENT_H */ +#include #include "orte/orte_constants.h" @@ -72,8 +76,9 @@ ******************/ static int orte_clean_init(void); static int parse_args(int argc, char *argv[]); -static int orte_clean_check_universe(orte_universe_t *universe); -static int orte_clean_universe(orte_universe_t *universe); +#if !defined(__WINDOWS__) +static void kill_procs(void); +#endif /* !defined(__WINDOWS__) */ /***************************************** * Global Vars for Command line Arguments @@ -96,7 +101,7 @@ opal_cmd_line_init_t cmd_line_opts[] = { 'v', NULL, "verbose", 0, &orte_clean_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, - "Be Verbose" }, + "Generate verbose output" }, /* End of list */ { NULL, NULL, NULL, @@ -106,12 +111,16 @@ opal_cmd_line_init_t cmd_line_opts[] = { NULL } }; +/* + * This utility will do a brute force clean of a node. It will + * attempt to clean up any files in the user's session directory. + * It will also look for any orted and orterun processes that are + * not part of this job, and kill them off. +*/ int main(int argc, char *argv[]) { int ret, exit_status = ORTE_SUCCESS; - opal_list_item_t* item = NULL; - opal_list_t universe_search_result; /*************** * Initialize @@ -119,81 +128,27 @@ main(int argc, char *argv[]) if (ORTE_SUCCESS != (ret = parse_args(argc, argv))) { return ret; } - - OBJ_CONSTRUCT(&universe_search_result, opal_list_t); - if (ORTE_SUCCESS != (ret = orte_clean_init())) { exit_status = ret; goto cleanup; } - /* - * Get the list of universes on this machine + * Clean out all /tmp directories except for our own. */ - if( orte_clean_globals.verbose ) { - printf("orte_clean: Acquiring universe list...\n"); - } - if (ORTE_SUCCESS != (ret = orte_universe_search(&universe_search_result, true, false) ) ) { - exit_status = ret; - goto cleanup; - } - - /* - * For each universe in the listing - */ - for(item = opal_list_get_first(&universe_search_result); - item != opal_list_get_end(&universe_search_result); - item = opal_list_get_next(item) ) { - orte_universe_t *search_result; - search_result = (orte_universe_t *) item; - - /* - * Avoid cleaning our own universe. - */ - if( (0 == strcmp(search_result->name, orte_universe_info.name)) && - (strlen(search_result->name) == strlen(orte_universe_info.name)) ) { - continue; - } - /* - * Try to connect to the universe - */ - if( orte_clean_globals.verbose ) { - printf("orte_clean: Connecting to universe: %s\n", search_result->name); - } - if( ORTE_SUCCESS == (ret = orte_clean_check_universe(search_result)) ) { - /* - * The universe was able to be contacted, so let it be - */ - continue; - } - - /* - * If unable to connect to the universe, - * clean it up! - */ - if( orte_clean_globals.verbose ) { - printf("orte_clean: Cleaning the session directory for universe: %s\n", search_result->name); - } - if( ORTE_SUCCESS != (ret = orte_clean_universe(search_result)) ){ - exit_status = ret; - goto cleanup; - } - } - - /*************** - * Cleanup - ***************/ - cleanup: - while (NULL != (item = opal_list_remove_first(&universe_search_result))) { - OBJ_RELEASE(item); - } + orte_universe_clean_directories(orte_universe_info.name, orte_clean_globals.verbose); +#if !defined(__WINDOWS__) + kill_procs(); +#endif /* !defined(__WINDOWS__) */ orte_finalize(); opal_finalize(); - + cleanup: return exit_status; } - +/* + * Parse the command line arguments using the functions command + * line utility functions. + */ static int parse_args(int argc, char *argv[]) { int i, ret, len; opal_cmd_line_t cmd_line; @@ -207,10 +162,12 @@ static int parse_args(int argc, char *argv[]) { by value. So do a memcpy here instead. */ memcpy(&orte_clean_globals, &tmp, sizeof(tmp)); + /* + * Initialize list of available command line options. + */ opal_cmd_line_create(&cmd_line, cmd_line_opts); mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); ret = opal_cmd_line_parse(&cmd_line, true, argc, argv); /** @@ -238,12 +195,14 @@ static int parse_args(int argc, char *argv[]) { orte_clean_globals.help) { char *args = NULL; args = opal_cmd_line_get_usage_msg(&cmd_line); - opal_show_help("help-orte-ps.txt", "usage", true, + opal_show_help("help-orte-clean.txt", "usage", true, args); free(args); return ORTE_ERROR; } + OBJ_DESTRUCT(&cmd_line); + return ORTE_SUCCESS; } @@ -299,85 +258,143 @@ static int orte_clean_init(void) { return exit_status; } -static int orte_clean_universe(orte_universe_t *universe) { - int ret, exit_status = ORTE_SUCCESS; - char *fulldirpath = NULL; - char *prefix = NULL; - char *frontend = NULL; - char *command = NULL; - char *session_dir = NULL; - - if( ORTE_SUCCESS != (ret = orte_session_dir_get_name(&fulldirpath, - &prefix, - &frontend, - universe->uid, - universe->host, - NULL, /* batch ID -- Not used */ - universe->name, - NULL, /* jobid */ - NULL /* vpid */ - ) ) ) { - exit_status = ret; - goto cleanup; - } - - opal_os_dirpath_destroy( fulldirpath, true, NULL ); - - /******************** - * If the session directory is empty, then remove that too - ********************/ - session_dir = opal_os_path( false, prefix, frontend, NULL ); - opal_os_dirpath_destroy(session_dir, false, NULL ); - - /******************** - * Need to check - * - openmpi-sessions-UID@gethostbyname()_0 - * - openmpi-sessions-UID@localhost_0 - * - remote nodes... - ********************/ - - cleanup: - if( NULL != fulldirpath) - free(fulldirpath); - if( NULL != prefix) - free(prefix); - if( NULL != frontend) - free(frontend); - if( NULL != command) - free(command); - - return exit_status; -} - -static int orte_clean_check_universe(orte_universe_t *universe) -{ - int ret, exit_status = ORTE_SUCCESS; - struct timeval ping_wait = {2, 0}; +#if !defined(__WINDOWS__) +/* + * This function makes a call to "ps" to find out the processes that + * are running on this node. It then attempts to kill off any orteds + * and orteruns that are not related to this job. + */ +static +void kill_procs(void) { + int ortedpid, orunpid; + char procname[MAXPATHLEN]; /* only really need 8, but being safe */ + char pidstr[MAXPATHLEN]; /* only really need 8, but being safe */ + char user[MAXPATHLEN]; + int procpid; + FILE *psfile; + bool kill_orteruns = false; /* - * Make sure session directory still exists + * This is the command that is used to get the information about + * all the processes that are running. The output looks like the + * following: + * COMMAND PID USER + * tcsh 12556 rolfv + * ps 14424 rolfv + * etc. + * Currently, we do not make use of the USER field, but we may later + * on so we grab it also. */ - if (ORTE_SUCCESS != (ret = orte_session_dir(false, - orte_process_info.tmpdir_base, - universe->uid, - universe->host, - NULL, /* Batch ID -- Not used */ - universe->name, - NULL, /* Jobid */ - NULL /* VPID */ - )) ) { - exit_status = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - + /* - * Contact the HNP to see if it is still around + * The configure determines if there is a valid ps command for us to + * use. If it is set to unknown, then we skip this section. */ - if( ORTE_SUCCESS != (ret = orte_rml.ping(universe->seed_uri, &ping_wait)) ) { - exit_status = ORTE_ERR_CONNECTION_FAILED; - goto cleanup; + char command[] = ORTE_CLEAN_PS_CMD; + if (!(strcmp("unknown", command))) { + return; } - - cleanup: - return exit_status; + + /* + * Try to get the pid of our orterun process from our universe name. + * This works in the case where one is using the default universe name + * which appends the pid after the 'default-universe-' string. In + * this way, we avoid killing our own mpirun process. Note that if + * we cannot determine our orterun pid, then we skip killing the + * orterun processes to avoid odd behavior for the user. + */ + if (!(strncmp(ORTE_DEFAULT_UNIVERSE, orte_universe_info.name, + sizeof(ORTE_DEFAULT_UNIVERSE)-1))) { + char *tptr; + + /* + * Set a pointer to the pid part of the name. The pointer + * is adjusted by the name along with one extra to remove the + * dash before the pid. Then convert to a pid. If the strtol() + * returns zero, then we got an error on the conversion and we + * will skip killing the orteruns. + */ + tptr = orte_universe_info.name + sizeof(ORTE_DEFAULT_UNIVERSE); + if (0 != (orunpid = (int)strtol(tptr, (char **)NULL, 10))) { + kill_orteruns = true; + } + } + + /* + * Get our parent pid which is the pid of the orted. + */ + ortedpid = getppid(); + + /* + * There is a race condition here. The problem is that we are looking + * for any processes named orted. However, one may erroneously find more + * orteds then there really are because the orted is doing a series of + * fork/execs. If we run with more than one orte-clean on a node, then + * one of the orte-cleans may catch the other one while it has forked, + * but not exec'ed. It will therefore kill an orte-clean. Now one + * can argue it is silly to run more than one orte-clean on a node, and + * this is true. We will have to figure out how to prevent this. For + * now, we use a big hammer and just sleep a second to decrease the + * probability. + */ + sleep(1); + + psfile = popen(command, "r"); + /* + * Read the first line of the output. We just throw it away + * as it is the header consisting of the words COMMAND, PID and + * USER. + */ + if ((fscanf(psfile, "%s%s%s", procname, pidstr, user)) == EOF) { + return; + } + + while ((fscanf(psfile, "%s%s%s", procname, pidstr, user)) != EOF) { + + procpid = atoi(pidstr); + + /* + * Look for any orteds that are not our parent and attempt to + * kill them. We currently do not worry whether we are the + * owner or not. If we are not, we will just fail to send + * the signal and that is OK. This also allows a root process + * to kill all orteds. + */ + if (!strcmp("orted", procname)) { + if (procpid != ortedpid) { + if (orte_clean_globals.verbose) { + opal_output(0, "orte-clean: found potential rogue orted process" + " (pid=%d,user=%s), sending SIGKILL...\n", + procpid, user); + } + /* + * We ignore the return code here as we do not really + * care whether this worked or not. + */ + (void)kill(procpid, SIGKILL); + } + } + + /* + * Now check for any orteruns. + */ + if (kill_orteruns) { + if (!strcmp("orterun", procname)) { + if (procpid != orunpid) { + if (orte_clean_globals.verbose) { + opal_output(0, "orte-clean: found potential rogue orterun process" + " (pid=%d,user=%s), sending SIGKILL...\n", + procpid, user); + } + /* + * We ignore the return code here as we do not really + * care whether this worked or not. + */ + (void)kill(procpid, SIGKILL); + } + } + } + } + return; } +#endif /* !defined(__WINDOWS__) */