From 2d33b0a74535b773c2feb6e8c3a701ce52414492 Mon Sep 17 00:00:00 2001 From: mjbhaskar Date: Wed, 7 Jan 2015 14:41:44 -0600 Subject: [PATCH 1/8] A fix for memory corruption seen on 32 bit machines --- ompi/mca/io/ompio/io_ompio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index a6516eeddc..6258820f29 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -2228,7 +2228,7 @@ int mca_io_ompio_merge_initial_groups(mca_io_ompio_file_t *fh, } end = i; } - merge_aggrs = (int *)malloc((end - start) * sizeof(int)); + merge_aggrs = (int *)malloc((end - start + 1) * sizeof(int)); if (NULL == merge_aggrs) { opal_output (1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; From 7d206ae7693f3bafca7fc0805a26a005db1c8ed8 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 7 Jan 2015 13:48:17 -0700 Subject: [PATCH 2/8] btl/ugni: fix a couple of bugs Two fixes: - Do not try to return a mailbox to the free list if one wasn't allocated. - Do not try to tear down IRQ CQs if they were not created. --- opal/mca/btl/ugni/btl_ugni_endpoint.c | 6 ++++-- opal/mca/btl/ugni/btl_ugni_module.c | 16 +++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index dc7e9dfd55..7496601944 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -77,8 +77,10 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec (void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle); OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock); - OMPI_FREE_LIST_RETURN_MT(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox)); - ep->mailbox = NULL; + if (ep->mailbox) { + OMPI_FREE_LIST_RETURN_MT(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox)); + ep->mailbox = NULL; + } ep->state = MCA_BTL_UGNI_EP_STATE_INIT; diff --git a/opal/mca/btl/ugni/btl_ugni_module.c b/opal/mca/btl/ugni/btl_ugni_module.c index ddfa6eb6e1..0bc4e59a27 100644 --- a/opal/mca/btl/ugni/btl_ugni_module.c +++ b/opal/mca/btl/ugni/btl_ugni_module.c @@ -166,14 +166,16 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl) BTL_ERROR(("error tearing down RX SMSG CQ - %s",gni_err_str[rc])); } - rc = GNI_CqDestroy (ugni_module->rdma_local_irq_cq); - if (GNI_RC_SUCCESS != rc) { - BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc])); - } + if (mca_btl_ugni_component.progress_thread_enabled) { + rc = GNI_CqDestroy (ugni_module->rdma_local_irq_cq); + if (GNI_RC_SUCCESS != rc) { + BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc])); + } - rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq); - if (GNI_RC_SUCCESS != rc) { - BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc])); + rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq); + if (GNI_RC_SUCCESS != rc) { + BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc])); + } } /* cancel wildcard post */ From 4e592ac434ef10940097538439ad8a650494a868 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 7 Jan 2015 18:37:26 -0800 Subject: [PATCH 3/8] Fix the tarball by providing the correct list of headers in the Makefile.am --- ompi/mca/osc/pt2pt/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/Makefile.am b/ompi/mca/osc/pt2pt/Makefile.am index 95e695e949..83bdb33e6c 100644 --- a/ompi/mca/osc/pt2pt/Makefile.am +++ b/ompi/mca/osc/pt2pt/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved # $COPYRIGHT$ # # Additional copyrights may follow @@ -27,7 +28,7 @@ pt2pt_sources = \ osc_pt2pt_frag.h \ osc_pt2pt_frag.c \ osc_pt2pt_header.h \ - osc_pt2pt_obj_convert.h \ + osc_pt2pt_pending_frag.h \ osc_pt2pt_request.h \ osc_pt2pt_request.c \ osc_pt2pt_active_target.c \ From b746a8f584bf3a195b56d4baff21a564550aa400 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Jan 2015 14:08:46 +0900 Subject: [PATCH 4/8] romio: compile openmpi mpi-io glue --- .../io/romio/romio/mpi-io/glue/Makefile.mk | 1 + ompi/mca/io/romio/romio/ompi.patch | 104 +++++++++++------- 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/ompi/mca/io/romio/romio/mpi-io/glue/Makefile.mk b/ompi/mca/io/romio/romio/mpi-io/glue/Makefile.mk index 05954a167c..66f7f9efab 100644 --- a/ompi/mca/io/romio/romio/mpi-io/glue/Makefile.mk +++ b/ompi/mca/io/romio/romio/mpi-io/glue/Makefile.mk @@ -7,6 +7,7 @@ include $(top_srcdir)/mpi-io/glue/default/Makefile.mk include $(top_srcdir)/mpi-io/glue/mpich/Makefile.mk +include $(top_srcdir)/mpi-io/glue/openmpi/Makefile.mk if !BUILD_ROMIO_EMBEDDED romio_other_sources += \ diff --git a/ompi/mca/io/romio/romio/ompi.patch b/ompi/mca/io/romio/romio/ompi.patch index 4223e864c7..eae59bd086 100644 --- a/ompi/mca/io/romio/romio/ompi.patch +++ b/ompi/mca/io/romio/romio/ompi.patch @@ -1,6 +1,6 @@ -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/common/ad_end.c ompi/ompi/mca/io/romio/romio/adio/common/ad_end.c +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/adio/common/ad_end.c ompi/ompi/mca/io/romio/romio/adio/common/ad_end.c --- mpich/src/mpi/romio/adio/common/ad_end.c 2014-12-25 16:45:58.224965512 +0900 -+++ ompi/ompi/mca/io/romio/romio/adio/common/ad_end.c 2015-01-06 19:41:59.287738412 +0900 ++++ ompi/ompi/mca/io/romio/romio/adio/common/ad_end.c 2015-01-06 19:43:31.734793345 +0900 @@ -16,7 +16,12 @@ /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure @@ -14,9 +14,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/common/ad_end.c ompi/ /* delete the flattened datatype list */ curr = ADIOI_Flatlist; -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/common/ad_iread_coll.c ompi/ompi/mca/io/romio/romio/adio/common/ad_iread_coll.c +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/adio/common/ad_iread_coll.c ompi/ompi/mca/io/romio/romio/adio/common/ad_iread_coll.c --- mpich/src/mpi/romio/adio/common/ad_iread_coll.c 2014-12-25 16:45:58.225965642 +0900 -+++ ompi/ompi/mca/io/romio/romio/adio/common/ad_iread_coll.c 2015-01-06 19:41:59.287738412 +0900 ++++ ompi/ompi/mca/io/romio/romio/adio/common/ad_iread_coll.c 2015-01-07 11:01:28.116415010 +0900 @@ -15,6 +15,8 @@ #include "mpe.h" #endif @@ -31,9 +31,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/common/ad_iread_coll. } +#endif /* HAVE_MPI_GREQUEST_EXTENSIONS */ -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/common/ad_iwrite_coll.c ompi/ompi/mca/io/romio/romio/adio/common/ad_iwrite_coll.c +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/adio/common/ad_iwrite_coll.c ompi/ompi/mca/io/romio/romio/adio/common/ad_iwrite_coll.c --- mpich/src/mpi/romio/adio/common/ad_iwrite_coll.c 2014-12-25 16:45:58.225965642 +0900 -+++ ompi/ompi/mca/io/romio/romio/adio/common/ad_iwrite_coll.c 2015-01-06 19:41:59.287738412 +0900 ++++ ompi/ompi/mca/io/romio/romio/adio/common/ad_iwrite_coll.c 2015-01-07 11:01:28.179454706 +0900 @@ -12,6 +12,8 @@ #include "mpe.h" #endif @@ -48,9 +48,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/common/ad_iwrite_coll } +#endif /* HAVE_MPI_GREQUEST_EXTENSIONS */ -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/include/adioi.h ompi/ompi/mca/io/romio/romio/adio/include/adioi.h +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/adio/include/adioi.h ompi/ompi/mca/io/romio/romio/adio/include/adioi.h --- mpich/src/mpi/romio/adio/include/adioi.h 2014-12-25 16:45:58.228966032 +0900 -+++ ompi/ompi/mca/io/romio/romio/adio/include/adioi.h 2015-01-06 19:41:59.291738936 +0900 ++++ ompi/ompi/mca/io/romio/romio/adio/include/adioi.h 2015-01-07 11:01:28.353564349 +0900 @@ -429,18 +429,26 @@ MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int @@ -78,9 +78,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/include/adioi.h ompi/ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Offset **offset_list_ptr, ADIO_Offset -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/include/romioconf-undefs.h ompi/ompi/mca/io/romio/romio/adio/include/romioconf-undefs.h +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/adio/include/romioconf-undefs.h ompi/ompi/mca/io/romio/romio/adio/include/romioconf-undefs.h --- mpich/src/mpi/romio/adio/include/romioconf-undefs.h 1970-01-01 09:00:00.000000000 +0900 -+++ ompi/ompi/mca/io/romio/romio/adio/include/romioconf-undefs.h 2015-01-06 19:41:59.291738936 +0900 ++++ ompi/ompi/mca/io/romio/romio/adio/include/romioconf-undefs.h 2015-01-07 11:01:28.438617911 +0900 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana @@ -127,17 +127,32 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/adio/include/romioconf-und +#endif + +#endif /* ROMIOCONF_UNDEFS_H */ -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/.config_params ompi/ompi/mca/io/romio/romio/.config_params +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/adio/Makefile.mk ompi/ompi/mca/io/romio/romio/adio/Makefile.mk +--- mpich/src/mpi/romio/adio/Makefile.mk 2014-12-25 16:45:58.214964210 +0900 ++++ ompi/ompi/mca/io/romio/romio/adio/Makefile.mk 2015-01-07 14:46:19.504816749 +0900 +@@ -19,7 +19,10 @@ + adio/include/mpio_error.h \ + adio/include/mpipr.h \ + adio/include/mpiu_greq.h \ +- adio/include/nopackage.h ++ adio/include/nopackage.h \ ++ adio/include/romioconf-undefs.h \ ++ adio/include/mpiu_external32.h \ ++ adio/include/hint_fns.h + + include $(top_srcdir)/adio/ad_gpfs/Makefile.mk + include $(top_srcdir)/adio/ad_gpfs/bg/Makefile.mk +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/.config_params ompi/ompi/mca/io/romio/romio/.config_params --- mpich/src/mpi/romio/.config_params 2014-12-25 16:45:58.214964210 +0900 -+++ ompi/ompi/mca/io/romio/romio/.config_params 2015-01-06 19:41:59.277737109 +0900 ++++ ompi/ompi/mca/io/romio/romio/.config_params 2015-01-06 19:43:31.738793869 +0900 @@ -36,3 +36,4 @@ __hp_mpi __cray_mpi __lam_mpi +__open_mpi -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/configure.ac ompi/ompi/mca/io/romio/romio/configure.ac +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/configure.ac ompi/ompi/mca/io/romio/romio/configure.ac --- mpich/src/mpi/romio/configure.ac 2014-12-25 16:45:58.229966162 +0900 -+++ ompi/ompi/mca/io/romio/romio/configure.ac 2015-01-06 19:41:59.293739196 +0900 ++++ ompi/ompi/mca/io/romio/romio/configure.ac 2015-01-07 11:01:28.602721250 +0900 @@ -3,12 +3,21 @@ # autoconf --localdir=../confdb configure.ac # (or wherever the confdb is) @@ -483,7 +498,7 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/configure.ac ompi/ompi/mca AC_OUTPUT dnl PAC_SUBDIR_CACHE_CLEANUP -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/doc/Makefile ompi/ompi/mca/io/romio/romio/doc/Makefile +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/doc/Makefile ompi/ompi/mca/io/romio/romio/doc/Makefile --- mpich/src/mpi/romio/doc/Makefile 2014-12-25 16:45:58.229966162 +0900 +++ ompi/ompi/mca/io/romio/romio/doc/Makefile 1970-01-01 09:00:00.000000000 +0900 @@ -1,11 +0,0 @@ @@ -498,9 +513,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/doc/Makefile ompi/ompi/mca - -rm -f users-guide.toc users-guide.log users-guide.dvi \ - users-guide.aux users-guide.bbl users-guide.ps \ - users-guide.blg -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/doc/users-guide.tex ompi/ompi/mca/io/romio/romio/doc/users-guide.tex +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/doc/users-guide.tex ompi/ompi/mca/io/romio/romio/doc/users-guide.tex --- mpich/src/mpi/romio/doc/users-guide.tex 2014-12-25 16:45:58.230966293 +0900 -+++ ompi/ompi/mca/io/romio/romio/doc/users-guide.tex 2015-01-06 19:41:59.294739326 +0900 ++++ ompi/ompi/mca/io/romio/romio/doc/users-guide.tex 2015-01-07 11:01:28.619731962 +0900 @@ -807,13 +807,19 @@ your MPI-IO program. @@ -532,9 +547,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/doc/users-guide.tex ompi/o \item The file-open mode {\tt MPI\_MODE\_EXCL} does not work on Intel PFS file system, due to a bug in PFS. -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/.gitignore ompi/ompi/mca/io/romio/romio/.gitignore +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/.gitignore ompi/ompi/mca/io/romio/romio/.gitignore --- mpich/src/mpi/romio/.gitignore 1970-01-01 09:00:00.000000000 +0900 -+++ ompi/ompi/mca/io/romio/romio/.gitignore 2015-01-06 19:41:59.277737109 +0900 ++++ ompi/ompi/mca/io/romio/romio/.gitignore 2015-01-07 11:01:26.982701076 +0900 @@ -0,0 +1,13 @@ +/Makefile +/.deps @@ -549,9 +564,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/.gitignore ompi/ompi/mca/i +.state-cache +version.m4 +confdb/config.rpath -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/include/io_romio_conv.h ompi/ompi/mca/io/romio/romio/include/io_romio_conv.h +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/include/io_romio_conv.h ompi/ompi/mca/io/romio/romio/include/io_romio_conv.h --- mpich/src/mpi/romio/include/io_romio_conv.h 1970-01-01 09:00:00.000000000 +0900 -+++ ompi/ompi/mca/io/romio/romio/include/io_romio_conv.h 2015-01-06 19:41:59.294739326 +0900 ++++ ompi/ompi/mca/io/romio/romio/include/io_romio_conv.h 2015-01-07 11:01:28.638743935 +0900 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana @@ -677,9 +692,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/include/io_romio_conv.h om +#endif + +#endif /* MCA_IO_ROMIO_CONV_H */ -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/include/mpio.h.in ompi/ompi/mca/io/romio/romio/include/mpio.h.in +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/include/mpio.h.in ompi/ompi/mca/io/romio/romio/include/mpio.h.in --- mpich/src/mpi/romio/include/mpio.h.in 2014-12-25 16:45:58.230966293 +0900 -+++ ompi/ompi/mca/io/romio/romio/include/mpio.h.in 2015-01-06 19:41:59.294739326 +0900 ++++ ompi/ompi/mca/io/romio/romio/include/mpio.h.in 2015-01-07 11:01:28.655754646 +0900 @@ -11,6 +11,16 @@ #define MPIO_INCLUDE @@ -742,9 +757,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/include/mpio.h.in ompi/omp #if defined(__cplusplus) } #endif -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/localdefs.in ompi/ompi/mca/io/romio/romio/localdefs.in +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/localdefs.in ompi/ompi/mca/io/romio/romio/localdefs.in --- mpich/src/mpi/romio/localdefs.in 2014-12-25 16:45:58.230966293 +0900 -+++ ompi/ompi/mca/io/romio/romio/localdefs.in 2015-01-06 19:41:59.294739326 +0900 ++++ ompi/ompi/mca/io/romio/romio/localdefs.in 2015-01-06 19:43:31.743794524 +0900 @@ -1,7 +1,4 @@ #! /bin/sh - @@ -754,9 +769,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/localdefs.in ompi/ompi/mca +LIBS="@LIBS@" MPI_OFFSET_TYPE="@MPI_OFFSET_TYPE@" FORTRAN_MPI_OFFSET="@FORTRAN_MPI_OFFSET@" -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/Makefile.am ompi/ompi/mca/io/romio/romio/Makefile.am +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/Makefile.am ompi/ompi/mca/io/romio/romio/Makefile.am --- mpich/src/mpi/romio/Makefile.am 2014-12-25 16:45:58.214964210 +0900 -+++ ompi/ompi/mca/io/romio/romio/Makefile.am 2015-01-06 19:41:59.277737109 +0900 ++++ ompi/ompi/mca/io/romio/romio/Makefile.am 2015-01-07 11:01:27.001713046 +0900 @@ -1,9 +1,28 @@ # -*- Mode: Makefile; -*- +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana @@ -816,9 +831,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/Makefile.am ompi/ompi/mca/ ## NOTE: ROMIO's old build system builds a bunch of _foo.o objects that contain ## PMPI_ implementations as well as calls to only other PMPI routines. In -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/Makefile.options ompi/ompi/mca/io/romio/romio/Makefile.options +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/Makefile.options ompi/ompi/mca/io/romio/romio/Makefile.options --- mpich/src/mpi/romio/Makefile.options 1970-01-01 09:00:00.000000000 +0900 -+++ ompi/ompi/mca/io/romio/romio/Makefile.options 2015-01-06 19:41:59.277737109 +0900 ++++ ompi/ompi/mca/io/romio/romio/Makefile.options 2015-01-07 11:01:27.015721870 +0900 @@ -0,0 +1,36 @@ +# -*- makefile -*- +# @@ -856,9 +871,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/Makefile.options ompi/ompi + -I$(OMPI_TOP_BUILDDIR)/ompi/include \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/adio/include -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/close.c ompi/ompi/mca/io/romio/romio/mpi-io/close.c +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/mpi-io/close.c ompi/ompi/mca/io/romio/romio/mpi-io/close.c --- mpich/src/mpi/romio/mpi-io/close.c 2014-12-25 16:45:58.230966293 +0900 -+++ ompi/ompi/mca/io/romio/romio/mpi-io/close.c 2015-01-06 19:41:59.294739326 +0900 ++++ ompi/ompi/mca/io/romio/romio/mpi-io/close.c 2015-01-07 11:01:28.670764098 +0900 @@ -76,8 +76,13 @@ * somehow inform the MPI library that we no longer hold a reference to any * user defined error handler. We do this by setting the errhandler at this @@ -873,9 +888,20 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/close.c ompi/ompi/m ADIO_Close(adio_fh, &error_code); MPIO_File_free(fh); -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/iread_all.c ompi/ompi/mca/io/romio/romio/mpi-io/iread_all.c +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/mpi-io/glue/Makefile.mk ompi/ompi/mca/io/romio/romio/mpi-io/glue/Makefile.mk +--- mpich/src/mpi/romio/mpi-io/glue/Makefile.mk 2014-12-25 16:45:58.232966555 +0900 ++++ ompi/ompi/mca/io/romio/romio/mpi-io/glue/Makefile.mk 2015-01-08 14:02:47.060155366 +0900 +@@ -7,6 +7,7 @@ + + include $(top_srcdir)/mpi-io/glue/default/Makefile.mk + include $(top_srcdir)/mpi-io/glue/mpich/Makefile.mk ++include $(top_srcdir)/mpi-io/glue/openmpi/Makefile.mk + + if !BUILD_ROMIO_EMBEDDED + romio_other_sources += \ +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/mpi-io/iread_all.c ompi/ompi/mca/io/romio/romio/mpi-io/iread_all.c --- mpich/src/mpi/romio/mpi-io/iread_all.c 2014-12-25 16:45:58.233966685 +0900 -+++ ompi/ompi/mca/io/romio/romio/mpi-io/iread_all.c 2015-01-06 19:41:59.298739847 +0900 ++++ ompi/ompi/mca/io/romio/romio/mpi-io/iread_all.c 2015-01-07 11:01:28.885899578 +0900 @@ -26,7 +26,7 @@ #include "mpioprof.h" #endif @@ -885,9 +911,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/iread_all.c ompi/om #include "mpiu_greq.h" #endif -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/Makefile.mk ompi/ompi/mca/io/romio/romio/mpi-io/Makefile.mk +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/mpi-io/Makefile.mk ompi/ompi/mca/io/romio/romio/mpi-io/Makefile.mk --- mpich/src/mpi/romio/mpi-io/Makefile.mk 2014-12-25 16:45:58.230966293 +0900 -+++ ompi/ompi/mca/io/romio/romio/mpi-io/Makefile.mk 2015-01-06 19:41:59.294739326 +0900 ++++ ompi/ompi/mca/io/romio/romio/mpi-io/Makefile.mk 2015-01-07 11:01:28.669763468 +0900 @@ -14,8 +14,6 @@ romio_mpi_sources += \ mpi-io/close.c \ @@ -897,9 +923,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/Makefile.mk ompi/om mpi-io/fsync.c \ mpi-io/get_amode.c \ mpi-io/get_atom.c \ -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/mpioprof.h ompi/ompi/mca/io/romio/romio/mpi-io/mpioprof.h +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/mpi-io/mpioprof.h ompi/ompi/mca/io/romio/romio/mpi-io/mpioprof.h --- mpich/src/mpi/romio/mpi-io/mpioprof.h 2014-12-25 16:45:58.233966685 +0900 -+++ ompi/ompi/mca/io/romio/romio/mpi-io/mpioprof.h 2015-01-06 19:41:59.298739847 +0900 ++++ ompi/ompi/mca/io/romio/romio/mpi-io/mpioprof.h 2015-01-07 11:01:29.005975192 +0900 @@ -10,6 +10,15 @@ building the profiling interface */ @@ -921,9 +947,9 @@ diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/mpi-io/mpioprof.h ompi/omp #endif +#endif -diff -x ompi.patch -x confdb -ruN mpich/src/mpi/romio/README_OMPI ompi/ompi/mca/io/romio/romio/README_OMPI +diff -x ompi.patch -x confdb -x autom4te.cache -x version.m4 -x aclocal.m4 -ruN mpich/src/mpi/romio/README_OMPI ompi/ompi/mca/io/romio/romio/README_OMPI --- mpich/src/mpi/romio/README_OMPI 1970-01-01 09:00:00.000000000 +0900 -+++ ompi/ompi/mca/io/romio/romio/README_OMPI 2015-01-06 19:41:59.277737109 +0900 ++++ ompi/ompi/mca/io/romio/romio/README_OMPI 2015-01-07 11:01:27.034733843 +0900 @@ -0,0 +1,11 @@ +Please note that this is *NOT* a vanilla MPICH v3.2a2-84-gef1cf14 +distribution of the ROMIO package from Argonne National Labs. From 8ab605d9c5bc82ef2baf424a5d5f970132d89ca1 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Jan 2015 15:40:16 +0900 Subject: [PATCH 5/8] btl/tcp: fix overflow in mca_btl_tcp_endpoint_dump() --- opal/mca/btl/tcp/btl_tcp_endpoint.c | 20 +++++++++++++++++++- opal/mca/btl/tcp/btl_tcp_frag.c | 10 +++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index 7b6e514f1c..f2e2b1e38e 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -12,6 +12,8 @@ * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -180,26 +182,33 @@ mca_btl_tcp_endpoint_dump(int level, sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr)); #endif - used += snprintf(&outmsg[used], 1024 - used, "%s: %s - %s [%d", + used = snprintf(outmsg, 1024, "%s: %s - %s [%d", msg, src, dst, btl_endpoint->endpoint_sd); + if (used >= 1024) goto out; switch(btl_endpoint->endpoint_state) { case MCA_BTL_TCP_CONNECTING: used += snprintf(&outmsg[used], 1024 - used, ":%s]", "connecting"); + if (used >= 1024) goto out; break; case MCA_BTL_TCP_CONNECT_ACK: used += snprintf(&outmsg[used], 1024 - used, ":%s]", "ack"); + if (used >= 1024) goto out; break; case MCA_BTL_TCP_CLOSED: used += snprintf(&outmsg[used], 1024 - used, ":%s]", "close"); + if (used >= 1024) goto out; break; case MCA_BTL_TCP_FAILED: used += snprintf(&outmsg[used], 1024 - used, ":%s]", "failed"); + if (used >= 1024) goto out; break; case MCA_BTL_TCP_CONNECTED: used += snprintf(&outmsg[used], 1024 - used, ":%s]", "connected"); + if (used >= 1024) goto out; break; default: used += snprintf(&outmsg[used], 1024 - used, ":%s]", "unknown"); + if (used >= 1024) goto out; break; } @@ -238,26 +247,35 @@ mca_btl_tcp_endpoint_dump(int level, #endif used += snprintf(&outmsg[used], 1024 - used, " nodelay %d sndbuf %d rcvbuf %d flags %08x", nodelay, sndbuf, rcvbuf, flags); + if (used >= 1024) goto out; #if MCA_BTL_TCP_ENDPOINT_CACHE used += snprintf(&outmsg[used], 1024 - used, "\n\t[cache %p used %lu/%lu]", btl_endpoint->endpoint_cache, btl_endpoint->endpoint_cache_pos - btl_endpoint->endpoint_cache, btl_endpoint->endpoint_cache_length); + if (used >= 1024) goto out; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ used += snprintf(&outmsg[used], 1024 - used, "{%s - retries %d}", (btl_endpoint->endpoint_nbo ? "NBO" : ""), (int)btl_endpoint->endpoint_retries); + if (used >= 1024) goto out; } used += snprintf(&outmsg[used], 1024 - used, "\n"); + if (used >= 1024) goto out; if( NULL != btl_endpoint->endpoint_recv_frag ) used += mca_btl_tcp_frag_dump(btl_endpoint->endpoint_recv_frag, "active recv", &outmsg[used], 1024 - used); + if (used >= 1024) goto out; if( NULL != btl_endpoint->endpoint_send_frag ) used += mca_btl_tcp_frag_dump(btl_endpoint->endpoint_send_frag, "active send (inaccurate iov)", &outmsg[used], 1024 - used); + if (used >= 1024) goto out; OPAL_LIST_FOREACH(item, &btl_endpoint->endpoint_frags, mca_btl_tcp_frag_t) { used += mca_btl_tcp_frag_dump(item, "pending send", &outmsg[used], 1024 - used); + if (used >= 1024) goto out; } +out: + outmsg[1023] = '\0'; opal_output_verbose(level, opal_btl_base_framework.framework_output, "[%s:%d:%s][%s -> %s] %s", fname, lineno, funcname, diff --git a/opal/mca/btl/tcp/btl_tcp_frag.c b/opal/mca/btl/tcp/btl_tcp_frag.c index 214f839216..d9f04b5caa 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.c +++ b/opal/mca/btl/tcp/btl_tcp_frag.c @@ -13,6 +13,8 @@ * Copyright (c) 2008-2012 Oracle and/or all its affiliates. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -90,14 +92,16 @@ OBJ_CLASS_INSTANCE( size_t mca_btl_tcp_frag_dump(mca_btl_tcp_frag_t* frag, char* msg, char* buf, size_t length) { - int i, used = 0; + int i, used; - used += snprintf(&buf[used], length - used, "%s frag %p iov_cnt %d iov_idx %d size %lu\n", - msg, (void*)frag, (int)frag->iov_cnt, (int)frag->iov_idx, frag->size); + used = snprintf(buf, length, "%s frag %p iov_cnt %d iov_idx %d size %lu\n", + msg, (void*)frag, (int)frag->iov_cnt, (int)frag->iov_idx, frag->size); + if (used >= length) return length; for( i = 0; i < (int)frag->iov_cnt; i++ ) { used += snprintf(&buf[used], length - used, "[%s%p:%lu] ", (i < (int)frag->iov_idx ? "*" : ""), frag->iov[i].iov_base, frag->iov[i].iov_len); + if (used >= length) return length; } return used; } From 4c29d8e2475e2ddf40d386f751156d76bceb9bdd Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Jan 2015 17:18:07 +0900 Subject: [PATCH 6/8] btl/openib: silence warning (unused code) --- opal/mca/btl/openib/btl_openib_async.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib_async.c b/opal/mca/btl/openib/btl_openib_async.c index e7fe7e8625..9f95152608 100644 --- a/opal/mca/btl/openib/btl_openib_async.c +++ b/opal/mca/btl/openib/btl_openib_async.c @@ -123,7 +123,7 @@ static mca_btl_openib_endpoint_t * qp2endpoint(struct ibv_qp *qp, mca_btl_openib return NULL; } -#if HAVE_XRC +#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS /* XRC recive QP to endpoint */ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_openib_device_t *device) { @@ -131,11 +131,7 @@ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_open int ep_i; for(ep_i = 0; ep_i < opal_pointer_array_get_size(device->endpoints); ep_i++) { ep = opal_pointer_array_get_item(device->endpoints, ep_i); -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS - if (qp_num == ep->xrc_recv_qp->qp_num) -#else if (qp_num == ep->xrc_recv_qp_num) -#endif return ep; } return NULL; @@ -376,13 +372,10 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po if (!xrc_event) mca_btl_openib_load_apm(event.element.qp, qp2endpoint(event.element.qp, device)); -#if HAVE_XRC -#if OPAL_HAVE_CONNECTX_XRC_DOMAINS -#else +#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS else mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num, xrc_qp2endpoint(event.element.xrc_qp_num, device)); -#endif #endif } break; From 9f6faadd91a0c874b8541753bbd19d158cb19e11 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 8 Jan 2015 09:14:56 -0700 Subject: [PATCH 7/8] opal_fifo: add missing memory barrier in pop Thanks to Adrian Reber for reporting this. Signed-off-by: Nathan Hjelm --- opal/class/opal_fifo.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 0c87a5a277..23b1ab4d04 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -219,6 +219,8 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb (); } while (1); + opal_atomic_wmb(); + item = opal_fifo_head (fifo); if (&fifo->opal_fifo_ghost == item) { fifo->opal_fifo_head.data.counter = 0; From c65f026feec5a65f5c927034deb3f963aa19ffa7 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 8 Jan 2015 12:52:38 -0700 Subject: [PATCH 8/8] btl/vader: fix typo in xpmem setup --- opal/mca/btl/vader/btl_vader_xpmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/vader/btl_vader_xpmem.c b/opal/mca/btl/vader/btl_vader_xpmem.c index 4bb9a3b643..8ad57bb44a 100644 --- a/opal/mca/btl/vader/btl_vader_xpmem.c +++ b/opal/mca/btl/vader/btl_vader_xpmem.c @@ -27,7 +27,7 @@ int mca_btl_vader_xpmem_init (void) } mca_btl_vader.super.btl_get = mca_btl_vader_get_xpmem; - mca_btl_vader.super.btl_put = mca_btl_vader_get_xpmem; + mca_btl_vader.super.btl_put = mca_btl_vader_put_xpmem; return OPAL_SUCCESS; }