From ea4de16561d0f74d1f7f2dd7cabb2c3bff8a66d6 Mon Sep 17 00:00:00 2001 From: Rainer Keller <rainer.keller@hlrs.de> Date: Wed, 10 Feb 2010 23:18:29 +0000 Subject: [PATCH] - Check whether file is opened on network file-system. If file does not exist, check the directory it lives in... Maybe used by caller, trying to open mmap() on NFS, Lustre or Panasas (thanks Sam). For now, this is used to warn about the usage of mmap on such FS. Please note, that Ralph mentioned the orte_no_session_dir parameter. The help message includes a reference to this. Tested on NFS and Lustre on Linux on smoky: mpirun --mca orte_tmpdir_base $HOME/tmp -np 2 ./mpi_stub jaguar: mpirun ... --mca orte_tmpdir_base /tmp/work/$USER ... Fixes trac:1354 This should cmr:v1.5 once it has soaked and is shown to work on Solaris This commit was SVN r22604. The following Trac tickets were found above: Ticket 1354 --> https://svn.open-mpi.org/trac/ompi/ticket/1354 --- configure.ac | 7 +- ompi/mca/common/sm/common_sm_mmap.c | 6 + ompi/mca/common/sm/help-mpi-common-sm.txt | 11 ++ opal/util/path.c | 170 ++++++++++++++++++++++ opal/util/path.h | 18 +++ test/Makefile.am | 2 +- test/util/Makefile.am | 9 +- test/util/opal_path_nfs.c | 140 ++++++++++++++++++ 8 files changed, 358 insertions(+), 5 deletions(-) create mode 100644 test/util/opal_path_nfs.c diff --git a/configure.ac b/configure.ac index 87df4dde01..ed53485eaf 100644 --- a/configure.ac +++ b/configure.ac @@ -519,10 +519,10 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h stdint.h \ stdlib.h string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \ - sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \ + sys/ioctl.h sys/mman.h sys/mount.h sys/param.h sys/queue.h \ sys/resource.h sys/select.h sys/socket.h sys/sockio.h \ - stdarg.h sys/stat.h sys/statvfs.h sys/time.h sys/tree.h \ - sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/wait.h syslog.h \ + stdarg.h sys/stat.h sys/statfs.h sys/statvfs.h sys/time.h sys/tree.h \ + sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \ time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \ ifaddrs.h sys/sysctl.h crt_externs.h regex.h \ ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h]) @@ -1228,6 +1228,7 @@ AC_CONFIG_FILES([ test/class/Makefile test/support/Makefile test/threads/Makefile + test/util/Makefile ]) OPAL_CONFIG_FILES diff --git a/ompi/mca/common/sm/common_sm_mmap.c b/ompi/mca/common/sm/common_sm_mmap.c index b528c93add..d5a186325f 100644 --- a/ompi/mca/common/sm/common_sm_mmap.c +++ b/ompi/mca/common/sm/common_sm_mmap.c @@ -43,6 +43,7 @@ #endif #include "opal/util/output.h" +#include "opal/util/path.h" #include "opal/align.h" #include "opal/threads/mutex.h" @@ -221,6 +222,11 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs, if (0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &(procs[0]->proc_name))) { + /* Check, whether the specified filename is on a network file system */ + if (opal_path_nfs(file_name)) { + orte_show_help("help-mpi-common-sm.txt", "mmap on nfs", 1, + file_name); + } /* process initializing the file */ fd = open(file_name, O_CREAT|O_RDWR, 0600); if (fd < 0) { diff --git a/ompi/mca/common/sm/help-mpi-common-sm.txt b/ompi/mca/common/sm/help-mpi-common-sm.txt index 395f5b53d5..c1550dff09 100644 --- a/ompi/mca/common/sm/help-mpi-common-sm.txt +++ b/ompi/mca/common/sm/help-mpi-common-sm.txt @@ -29,3 +29,14 @@ Open MPI, and if not, contact the Open MPI developers. Requested size: %ul Control seg size: %ul Data seg aligment: %ul + +[mmap on nfs] +Creating the shared memory backup file on a network file system, such +as NFS or Lustre is not recommended. +This may be due to using the wrong tmpdir variable. +Please ask Your system administrator to provide a system-wide MCA +parameter file, in which parameter orte_no_session_dir is specified: +It states paths of directories, not allowed as session directories. + + File Name: %s + diff --git a/opal/util/path.c b/opal/util/path.c index d10a367a12..973d390668 100644 --- a/opal/util/path.c +++ b/opal/util/path.c @@ -20,16 +20,33 @@ #include "opal_config.h" #include <stdlib.h> #include <string.h> +#include <errno.h> #ifdef HAVE_UNISTD_H #include <unistd.h> #endif #ifdef HAVE_SHLWAPI_H #include <shlwapi.h> #endif +#ifdef HAVE_SYS_MOUNT_H +#include <sys/mount.h> +#endif +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif #ifdef HAVE_SYS_STAT_H #include <sys/stat.h> #endif +#ifdef HAVE_SYS_VFS_H +#include <sys/vfs.h> +#endif +#ifdef HAVE_SYS_STATFS_H +#include <sys/statfs.h> +#endif +#ifdef HAVE_SYS_MOUNT_H +#include <sys/mount.h> +#endif +#include "opal/util/output.h" #include "opal/util/path.h" #include "opal/util/os_path.h" #include "opal/util/argv.h" @@ -358,3 +375,156 @@ char* opal_find_absolute_path( char* app_name ) } return NULL; } + + +/** + * @brief Figure out, whether fname is on network file system + * + * Try to figure out, whether the file name specified through fname is + * on any network file system (currently NFS, Lustre and Panasas). + * + * If the file is not created, the parent directory is checked. + * This allows checking for NFS prior to opening the file. + * + * @param[in] fname File name to check + * + * @retval true If fname is on NFS, Lustre or Panasas + * @retval false otherwise + * + * + * Linux: + * statfs(const char *path, struct statfs *buf); + * with fsid_t f_fsid; (in kernel struct{ int val[2] };) + * return 0 success, -1 on failure with errno set. + * statvfs (const char *path, struct statvfs *buf); + * with unsigned long f_fsid; -- returns wrong info + * return 0 success, -1 on failure with errno set. + * Solaris: + * statvfs (const char *path, struct statvfs *buf); + * with f_basetype, contains a string of length FSTYPSZ + * return 0 success, -1 on failure with errno set. + * FreeBSD: + * statfs(const char *path, struct statfs *buf); + * with f_fstypename, contains a string of length MFSNAMELEN + * return 0 success, -1 on failure with errno set. + * compliant with: 4.4BSD. + * Mac OSX (10.6.2): + * statvfs(const char * restrict path, struct statvfs * restrict buf); + * with fsid Not meaningful in this implementation. + * is just a wrapper around statfs() + * statfs(const char *path, struct statfs *buf); + * with f_fstypename, contains a string of length MFSTYPENAMELEN + * return 0 success, -1 on failure with errno set. + * Windows (interix): + * statvfs(const char *path, struct statvfs *buf); + * with unsigned long f_fsid + * return 0 success, -1 on failure with errno set. + */ +#ifndef LL_SUPER_MAGIC +#define LL_SUPER_MAGIC 0x0BD00BD0 /* Lustre magic number */ +#endif +#ifndef NFS_SUPER_MAGIC +#define NFS_SUPER_MAGIC 0x6969 +#endif +#ifndef PAN_KERNEL_FS_CLIENT_SUPER_MAGIC +#define PAN_KERNEL_FS_CLIENT_SUPER_MAGIC 0xAAD7AAEA /* Panasas FS */ +#endif + +#define MASK1 0xff +#define MASK2 0xffff +#define MASK3 0xffffff +#define MASK4 0xffffffff + +bool opal_path_nfs(char *fname) +{ +#if !defined(__WINDOWS__) + int i; + int rc; + int trials; + char * file = strdup (fname); +#if defined (__sun__) + struct statvfs buf; +#elif defined(linux) || defined (__BSD) || defined(__MACOSX__) + struct statfs buf; +#endif + static struct fs_types_t { + unsigned long long f_fsid; + unsigned long long f_mask; + const char * f_fsname; + } fs_types[] = { + {LL_SUPER_MAGIC, MASK4, "lustre"}, + {NFS_SUPER_MAGIC, MASK2, "nfs"}, + {PAN_KERNEL_FS_CLIENT_SUPER_MAGIC, MASK4, "panfs"}, + }; +#define FS_TYPES_NUM (int)(sizeof (fs_types)/sizeof (fs_types[0])) + + /* + * First, get the OS-dependent struct stat(v)fs buf + * This may return the ESTALE error on NFS, if the underlying file/path has changed + */ +again: + trials = 5; + do { +#if defined (__sun__) + rc = statvfs (file, &buf); +#elif defined(linux) || defined (__BSD) || defined(__MACOSX__) + rc = statfs (file, &buf); +#endif + } while (-1 == rc && ESTALE == errno && (0 < --trials)); + + /* In case some error with the current filename, try the directory */ + if (-1 == rc) { + char * last_sep; + char * tmp; + for (tmp = file; '\0' != *tmp; tmp++) { + if (OPAL_PATH_SEP[0] == *tmp) + last_sep = tmp; + } + *last_sep = '\0'; + + OPAL_OUTPUT_VERBOSE((10, 0, "opal_path_nfs: stat(v)fs on file:%s failed errno:%d directory:%s\n", + fname, errno, file)); + /* Stop the search, when we have searched past root '/' */ + if (0 == strlen (file)) { + free (file); + return false; + } + + goto again; + } + + /* Next, extract the magic value */ +#if defined (__sun__) + for (i = 0; i < FS_TYPES_NUM; i++) + if (0 == strncasecmp (fs_types[i].f_fsname, buf.f_basetype, FSTYPSZ)) + goto found; +#elif defined(__MACOSX__) + for (i = 0; i < FS_TYPES_NUM; i++) + if (0 == strncasecmp (fs_types[i].f_fsname, buf.f_fstypename, MFSTYPENAMELEN)) + goto found; +#elif defined(__BSD) + for (i = 0; i < FS_TYPES_NUM; i++) + if (0 == strncasecmp (fs_types[i].f_fsname, buf.f_fstypename, MFSNAMELEN)) + goto found; +#elif defined(linux) + for (i = 0; i < FS_TYPES_NUM; i++) + if (fs_types[i].f_fsid == (buf.f_type & fs_types[i].f_mask)) + goto found; +#endif + + free (file); + return false; + +found: + OPAL_OUTPUT_VERBOSE((10, 0, "opal_path_nfs: file:%s on fs:%s\n", + fname, fs_types[i].f_fsname)); + free (file); + return true; + +#undef FS_TYPES_NUM + +#else + return false; +#endif /* __WINDOWS__ */ +} + diff --git a/opal/util/path.h b/opal/util/path.h index 9373feb8d3..9420b35286 100644 --- a/opal/util/path.h +++ b/opal/util/path.h @@ -119,5 +119,23 @@ OPAL_DECLSPEC char* opal_find_absolute_path( char* app_name ) __opal_attribute_w */ OPAL_DECLSPEC char *opal_path_access(char *fname, char *path, int mode) __opal_attribute_malloc__ __opal_attribute_warn_unused_result__; + +/** + * @brief Figure out, whether fname is on network file system + * + * Try to figure out, whether the file name specified through fname is + * on any network file system (currently NFS, Lustre and Panasas). + * + * If the file is not created, the parent directory is checked. + * This allows checking for NFS prior to opening the file. + * + * @param[in] fname File name to check + * + * @retval true If fname is on NFS, Lustre or Panasas + * @retval false otherwise + */ +OPAL_DECLSPEC bool opal_path_nfs(char *fname) __opal_attribute_warn_unused_result__; + END_C_DECLS #endif /* OPAL_PATH_H */ + diff --git a/test/Makefile.am b/test/Makefile.am index dd88364b52..76f360a2be 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -24,7 +24,7 @@ if PROJECT_OMPI endif # support needs to be first for dependencies -SUBDIRS = support asm class threads datatype $(REQUIRES_OMPI) +SUBDIRS = support asm class threads datatype util $(REQUIRES_OMPI) DIST_SUBDIRS = event $(SUBDIRS) TESTS = diff --git a/test/util/Makefile.am b/test/util/Makefile.am index 4c97769b64..e3bdfeacb6 100644 --- a/test/util/Makefile.am +++ b/test/util/Makefile.am @@ -26,7 +26,8 @@ check_PROGRAMS = \ opal_timer \ opal_os_create_dirpath \ opal_argv \ - opal_basename + opal_basename \ + opal_path_nfs TESTS = \ $(check_PROGRAMS) @@ -61,6 +62,12 @@ opal_basename_LDADD = \ $(top_builddir)/test/support/libsupport.a opal_basename_DEPENDENCIES = $(opal_basename_LDADD) +opal_path_nfs_SOURCES = opal_path_nfs.c +opal_path_nfs_LDADD = \ + $(top_builddir)/opal/libopen-pal.la \ + $(top_builddir)/test/support/libsupport.a +opal_path_nfs_DEPENDENCIES = $(opal_path_nfs_LDADD) + opal_os_path_SOURCES = opal_os_path.c opal_os_path_LDADD = \ $(top_builddir)/opal/libopen-pal.la \ diff --git a/test/util/opal_path_nfs.c b/test/util/opal_path_nfs.c new file mode 100644 index 0000000000..29f67fd558 --- /dev/null +++ b/test/util/opal_path_nfs.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 Oak Ridge National Laboratory. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <dirent.h> + +#include "support.h" +#include "opal/util/path.h" +#include "opal/util/output.h" + +/* +#define DEBUG +*/ + +static void test(char* file, bool expect); +static void get_mounts (int * num_dirs, char ** dirs[], bool ** nfs); + + +int main(int argc, char* argv[]) +{ + int num_dirs; + char ** dirs; + bool * nfs; + + test_init("opal_path_nfs()"); +#ifdef DEBUG + printf ("Test usage: ./opal_path_nfs [DIR]\n"); + printf ("On Linux interprets output from mount(8) to check for nfs and verify opal_path_nfs()\n"); + printf ("Additionally, you may specify multiple DIR on the cmd-line, of which you the output\n"); +#endif + + if (1 < argc) { + int i; + for (i = 1; i < argc; i++) + printf ("Is dir[%d]:%s one of the detected network file systems? %s\n", + i, argv[i], opal_path_nfs (argv[i]) ? "Yes": "No"); + } + +#ifdef __linux__ + get_mounts (&num_dirs, &dirs, &nfs); + while (num_dirs--) { + test (dirs[num_dirs], nfs[num_dirs]); + } +#endif + +#ifdef __WINDOWS__ +#endif + + /* All done */ + return test_finalize(); +} + + +void test(char* file, bool expect) +{ +#ifdef DEBUG + printf ("test(): file:%s bool:%d\n", + file, expect); +#endif + if (expect == opal_path_nfs (file)) { + test_success(); + } else { + char * msg; + asprintf(&msg, "Mismatch: input \"%s\", expected:%d got:%d\n", + file, expect, !expect); + test_failure(msg); + free(msg); + } +} + +void get_mounts (int * num_dirs, char ** dirs[], bool * nfs[]) +{ +#define MAX_DIR 256 +#define SIZE 1024 + char * cmd = "mount | cut -f3,5 -d' ' > opal_path_nfs.out"; + int rc; + int i; + FILE * file; + char ** dirs_tmp; + bool * nfs_tmp; + char buffer[SIZE]; + + rc = system (cmd); + + if (-1 == rc) { + *num_dirs = 0; + **dirs = NULL; + *nfs = NULL; + } + dirs_tmp = (char**) malloc (MAX_DIR * sizeof(char**)); + nfs_tmp = (bool*) malloc (MAX_DIR * sizeof(bool)); + + file = fopen("opal_path_nfs.out", "r"); + i = 0; + rc = 4711; + while (NULL != fgets (buffer, SIZE, file)) { + char fs[MAXNAMLEN]; + + dirs_tmp[i] = malloc (MAXNAMLEN); + if (2 != (rc = sscanf (buffer, "%s %s\n", dirs_tmp[i], fs))) { + goto out; + } + nfs_tmp[i] = false; + if (0 == strcasecmp (fs, "nfs") || + 0 == strcasecmp (fs, "lustre") || + 0 == strcasecmp (fs, "panfs")) + nfs_tmp[i] = true; +#ifdef DEBUG + printf ("get_mounts: dirs[%d]:%s fs:%s nfs:%s\n", + i, dirs_tmp[i], fs, nfs_tmp[i] ? "Yes" : "No"); +#endif + i++; + } +out: + *num_dirs = i; + *dirs = dirs_tmp; + *nfs = nfs_tmp; +} +